From 7c0303ff7e67b637c47d8afee533ca9e2a02359b Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 26 Apr 2021 02:57:31 +0900 Subject: [PATCH 001/142] crypto: arm - generate *.S by Perl at build time instead of shipping them Generate *.S by Perl like arch/{mips,x86}/crypto/Makefile. Signed-off-by: Masahiro Yamada Signed-off-by: Herbert Xu --- arch/arm/crypto/Makefile | 8 +- arch/arm/crypto/poly1305-core.S_shipped | 1158 ---------- arch/arm/crypto/sha256-core.S_shipped | 2816 ----------------------- arch/arm/crypto/sha512-core.S_shipped | 1869 --------------- 4 files changed, 3 insertions(+), 5848 deletions(-) delete mode 100644 arch/arm/crypto/poly1305-core.S_shipped delete mode 100644 arch/arm/crypto/sha256-core.S_shipped delete mode 100644 arch/arm/crypto/sha512-core.S_shipped diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index 8f26c454ea12e..51f160c61740f 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -45,19 +45,17 @@ poly1305-arm-y := poly1305-core.o poly1305-glue.o nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o curve25519-neon-y := curve25519-core.o curve25519-glue.o -ifdef REGENERATE_ARM_CRYPTO quiet_cmd_perl = PERL $@ cmd_perl = $(PERL) $(<) > $(@) -$(src)/poly1305-core.S_shipped: $(src)/poly1305-armv4.pl +$(obj)/poly1305-core.S: $(src)/poly1305-armv4.pl $(call cmd,perl) -$(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl +$(obj)/sha256-core.S: $(src)/sha256-armv4.pl $(call cmd,perl) -$(src)/sha512-core.S_shipped: $(src)/sha512-armv4.pl +$(obj)/sha512-core.S: $(src)/sha512-armv4.pl $(call cmd,perl) -endif clean-files += poly1305-core.S sha256-core.S sha512-core.S diff --git a/arch/arm/crypto/poly1305-core.S_shipped b/arch/arm/crypto/poly1305-core.S_shipped deleted file mode 100644 index 37b71d9902932..0000000000000 --- a/arch/arm/crypto/poly1305-core.S_shipped +++ /dev/null @@ -1,1158 +0,0 @@ -#ifndef __KERNEL__ -# include "arm_arch.h" -#else -# define __ARM_ARCH__ __LINUX_ARM_ARCH__ -# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__ -# define poly1305_init poly1305_init_arm -# define poly1305_blocks poly1305_blocks_arm -# define poly1305_emit poly1305_emit_arm -.globl poly1305_blocks_neon -#endif - -#if defined(__thumb2__) -.syntax unified -.thumb -#else -.code 32 -#endif - -.text - -.globl poly1305_emit -.globl poly1305_blocks -.globl poly1305_init -.type poly1305_init,%function -.align 5 -poly1305_init: -.Lpoly1305_init: - stmdb sp!,{r4-r11} - - eor r3,r3,r3 - cmp r1,#0 - str r3,[r0,#0] @ zero hash value - str r3,[r0,#4] - str r3,[r0,#8] - str r3,[r0,#12] - str r3,[r0,#16] - str r3,[r0,#36] @ clear is_base2_26 - add r0,r0,#20 - -#ifdef __thumb2__ - it eq -#endif - moveq r0,#0 - beq .Lno_key - -#if __ARM_MAX_ARCH__>=7 - mov r3,#-1 - str r3,[r0,#28] @ impossible key power value -# ifndef __KERNEL__ - adr r11,.Lpoly1305_init - ldr r12,.LOPENSSL_armcap -# endif -#endif - ldrb r4,[r1,#0] - mov r10,#0x0fffffff - ldrb r5,[r1,#1] - and r3,r10,#-4 @ 0x0ffffffc - ldrb r6,[r1,#2] - ldrb r7,[r1,#3] - orr r4,r4,r5,lsl#8 - ldrb r5,[r1,#4] - orr r4,r4,r6,lsl#16 - ldrb r6,[r1,#5] - orr r4,r4,r7,lsl#24 - ldrb r7,[r1,#6] - and r4,r4,r10 - -#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) -# if !defined(_WIN32) - ldr r12,[r11,r12] @ OPENSSL_armcap_P -# endif -# if defined(__APPLE__) || defined(_WIN32) - ldr r12,[r12] -# endif -#endif - ldrb r8,[r1,#7] - orr r5,r5,r6,lsl#8 - ldrb r6,[r1,#8] - orr r5,r5,r7,lsl#16 - ldrb r7,[r1,#9] - orr r5,r5,r8,lsl#24 - ldrb r8,[r1,#10] - and r5,r5,r3 - -#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) - tst r12,#ARMV7_NEON @ check for NEON -# ifdef __thumb2__ - adr r9,.Lpoly1305_blocks_neon - adr r11,.Lpoly1305_blocks - it ne - movne r11,r9 - adr r12,.Lpoly1305_emit - orr r11,r11,#1 @ thumb-ify addresses - orr r12,r12,#1 -# else - add r12,r11,#(.Lpoly1305_emit-.Lpoly1305_init) - ite eq - addeq r11,r11,#(.Lpoly1305_blocks-.Lpoly1305_init) - addne r11,r11,#(.Lpoly1305_blocks_neon-.Lpoly1305_init) -# endif -#endif - ldrb r9,[r1,#11] - orr r6,r6,r7,lsl#8 - ldrb r7,[r1,#12] - orr r6,r6,r8,lsl#16 - ldrb r8,[r1,#13] - orr r6,r6,r9,lsl#24 - ldrb r9,[r1,#14] - and r6,r6,r3 - - ldrb r10,[r1,#15] - orr r7,r7,r8,lsl#8 - str r4,[r0,#0] - orr r7,r7,r9,lsl#16 - str r5,[r0,#4] - orr r7,r7,r10,lsl#24 - str r6,[r0,#8] - and r7,r7,r3 - str r7,[r0,#12] -#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) - stmia r2,{r11,r12} @ fill functions table - mov r0,#1 -#else - mov r0,#0 -#endif -.Lno_key: - ldmia sp!,{r4-r11} -#if __ARM_ARCH__>=5 - bx lr @ bx lr -#else - tst lr,#1 - moveq pc,lr @ be binary compatible with V4, yet - .word 0xe12fff1e @ interoperable with Thumb ISA:-) -#endif -.size poly1305_init,.-poly1305_init -.type poly1305_blocks,%function -.align 5 -poly1305_blocks: -.Lpoly1305_blocks: - stmdb sp!,{r3-r11,lr} - - ands r2,r2,#-16 - beq .Lno_data - - add r2,r2,r1 @ end pointer - sub sp,sp,#32 - -#if __ARM_ARCH__<7 - ldmia r0,{r4-r12} @ load context - add r0,r0,#20 - str r2,[sp,#16] @ offload stuff - str r0,[sp,#12] -#else - ldr lr,[r0,#36] @ is_base2_26 - ldmia r0!,{r4-r8} @ load hash value - str r2,[sp,#16] @ offload stuff - str r0,[sp,#12] - - adds r9,r4,r5,lsl#26 @ base 2^26 -> base 2^32 - mov r10,r5,lsr#6 - adcs r10,r10,r6,lsl#20 - mov r11,r6,lsr#12 - adcs r11,r11,r7,lsl#14 - mov r12,r7,lsr#18 - adcs r12,r12,r8,lsl#8 - mov r2,#0 - teq lr,#0 - str r2,[r0,#16] @ clear is_base2_26 - adc r2,r2,r8,lsr#24 - - itttt ne - movne r4,r9 @ choose between radixes - movne r5,r10 - movne r6,r11 - movne r7,r12 - ldmia r0,{r9-r12} @ load key - it ne - movne r8,r2 -#endif - - mov lr,r1 - cmp r3,#0 - str r10,[sp,#20] - str r11,[sp,#24] - str r12,[sp,#28] - b .Loop - -.align 4 -.Loop: -#if __ARM_ARCH__<7 - ldrb r0,[lr],#16 @ load input -# ifdef __thumb2__ - it hi -# endif - addhi r8,r8,#1 @ 1<<128 - ldrb r1,[lr,#-15] - ldrb r2,[lr,#-14] - ldrb r3,[lr,#-13] - orr r1,r0,r1,lsl#8 - ldrb r0,[lr,#-12] - orr r2,r1,r2,lsl#16 - ldrb r1,[lr,#-11] - orr r3,r2,r3,lsl#24 - ldrb r2,[lr,#-10] - adds r4,r4,r3 @ accumulate input - - ldrb r3,[lr,#-9] - orr r1,r0,r1,lsl#8 - ldrb r0,[lr,#-8] - orr r2,r1,r2,lsl#16 - ldrb r1,[lr,#-7] - orr r3,r2,r3,lsl#24 - ldrb r2,[lr,#-6] - adcs r5,r5,r3 - - ldrb r3,[lr,#-5] - orr r1,r0,r1,lsl#8 - ldrb r0,[lr,#-4] - orr r2,r1,r2,lsl#16 - ldrb r1,[lr,#-3] - orr r3,r2,r3,lsl#24 - ldrb r2,[lr,#-2] - adcs r6,r6,r3 - - ldrb r3,[lr,#-1] - orr r1,r0,r1,lsl#8 - str lr,[sp,#8] @ offload input pointer - orr r2,r1,r2,lsl#16 - add r10,r10,r10,lsr#2 - orr r3,r2,r3,lsl#24 -#else - ldr r0,[lr],#16 @ load input - it hi - addhi r8,r8,#1 @ padbit - ldr r1,[lr,#-12] - ldr r2,[lr,#-8] - ldr r3,[lr,#-4] -# ifdef __ARMEB__ - rev r0,r0 - rev r1,r1 - rev r2,r2 - rev r3,r3 -# endif - adds r4,r4,r0 @ accumulate input - str lr,[sp,#8] @ offload input pointer - adcs r5,r5,r1 - add r10,r10,r10,lsr#2 - adcs r6,r6,r2 -#endif - add r11,r11,r11,lsr#2 - adcs r7,r7,r3 - add r12,r12,r12,lsr#2 - - umull r2,r3,r5,r9 - adc r8,r8,#0 - umull r0,r1,r4,r9 - umlal r2,r3,r8,r10 - umlal r0,r1,r7,r10 - ldr r10,[sp,#20] @ reload r10 - umlal r2,r3,r6,r12 - umlal r0,r1,r5,r12 - umlal r2,r3,r7,r11 - umlal r0,r1,r6,r11 - umlal r2,r3,r4,r10 - str r0,[sp,#0] @ future r4 - mul r0,r11,r8 - ldr r11,[sp,#24] @ reload r11 - adds r2,r2,r1 @ d1+=d0>>32 - eor r1,r1,r1 - adc lr,r3,#0 @ future r6 - str r2,[sp,#4] @ future r5 - - mul r2,r12,r8 - eor r3,r3,r3 - umlal r0,r1,r7,r12 - ldr r12,[sp,#28] @ reload r12 - umlal r2,r3,r7,r9 - umlal r0,r1,r6,r9 - umlal r2,r3,r6,r10 - umlal r0,r1,r5,r10 - umlal r2,r3,r5,r11 - umlal r0,r1,r4,r11 - umlal r2,r3,r4,r12 - ldr r4,[sp,#0] - mul r8,r9,r8 - ldr r5,[sp,#4] - - adds r6,lr,r0 @ d2+=d1>>32 - ldr lr,[sp,#8] @ reload input pointer - adc r1,r1,#0 - adds r7,r2,r1 @ d3+=d2>>32 - ldr r0,[sp,#16] @ reload end pointer - adc r3,r3,#0 - add r8,r8,r3 @ h4+=d3>>32 - - and r1,r8,#-4 - and r8,r8,#3 - add r1,r1,r1,lsr#2 @ *=5 - adds r4,r4,r1 - adcs r5,r5,#0 - adcs r6,r6,#0 - adcs r7,r7,#0 - adc r8,r8,#0 - - cmp r0,lr @ done yet? - bhi .Loop - - ldr r0,[sp,#12] - add sp,sp,#32 - stmdb r0,{r4-r8} @ store the result - -.Lno_data: -#if __ARM_ARCH__>=5 - ldmia sp!,{r3-r11,pc} -#else - ldmia sp!,{r3-r11,lr} - tst lr,#1 - moveq pc,lr @ be binary compatible with V4, yet - .word 0xe12fff1e @ interoperable with Thumb ISA:-) -#endif -.size poly1305_blocks,.-poly1305_blocks -.type poly1305_emit,%function -.align 5 -poly1305_emit: -.Lpoly1305_emit: - stmdb sp!,{r4-r11} - - ldmia r0,{r3-r7} - -#if __ARM_ARCH__>=7 - ldr ip,[r0,#36] @ is_base2_26 - - adds r8,r3,r4,lsl#26 @ base 2^26 -> base 2^32 - mov r9,r4,lsr#6 - adcs r9,r9,r5,lsl#20 - mov r10,r5,lsr#12 - adcs r10,r10,r6,lsl#14 - mov r11,r6,lsr#18 - adcs r11,r11,r7,lsl#8 - mov r0,#0 - adc r0,r0,r7,lsr#24 - - tst ip,ip - itttt ne - movne r3,r8 - movne r4,r9 - movne r5,r10 - movne r6,r11 - it ne - movne r7,r0 -#endif - - adds r8,r3,#5 @ compare to modulus - adcs r9,r4,#0 - adcs r10,r5,#0 - adcs r11,r6,#0 - adc r0,r7,#0 - tst r0,#4 @ did it carry/borrow? - -#ifdef __thumb2__ - it ne -#endif - movne r3,r8 - ldr r8,[r2,#0] -#ifdef __thumb2__ - it ne -#endif - movne r4,r9 - ldr r9,[r2,#4] -#ifdef __thumb2__ - it ne -#endif - movne r5,r10 - ldr r10,[r2,#8] -#ifdef __thumb2__ - it ne -#endif - movne r6,r11 - ldr r11,[r2,#12] - - adds r3,r3,r8 - adcs r4,r4,r9 - adcs r5,r5,r10 - adc r6,r6,r11 - -#if __ARM_ARCH__>=7 -# ifdef __ARMEB__ - rev r3,r3 - rev r4,r4 - rev r5,r5 - rev r6,r6 -# endif - str r3,[r1,#0] - str r4,[r1,#4] - str r5,[r1,#8] - str r6,[r1,#12] -#else - strb r3,[r1,#0] - mov r3,r3,lsr#8 - strb r4,[r1,#4] - mov r4,r4,lsr#8 - strb r5,[r1,#8] - mov r5,r5,lsr#8 - strb r6,[r1,#12] - mov r6,r6,lsr#8 - - strb r3,[r1,#1] - mov r3,r3,lsr#8 - strb r4,[r1,#5] - mov r4,r4,lsr#8 - strb r5,[r1,#9] - mov r5,r5,lsr#8 - strb r6,[r1,#13] - mov r6,r6,lsr#8 - - strb r3,[r1,#2] - mov r3,r3,lsr#8 - strb r4,[r1,#6] - mov r4,r4,lsr#8 - strb r5,[r1,#10] - mov r5,r5,lsr#8 - strb r6,[r1,#14] - mov r6,r6,lsr#8 - - strb r3,[r1,#3] - strb r4,[r1,#7] - strb r5,[r1,#11] - strb r6,[r1,#15] -#endif - ldmia sp!,{r4-r11} -#if __ARM_ARCH__>=5 - bx lr @ bx lr -#else - tst lr,#1 - moveq pc,lr @ be binary compatible with V4, yet - .word 0xe12fff1e @ interoperable with Thumb ISA:-) -#endif -.size poly1305_emit,.-poly1305_emit -#if __ARM_MAX_ARCH__>=7 -.fpu neon - -.type poly1305_init_neon,%function -.align 5 -poly1305_init_neon: -.Lpoly1305_init_neon: - ldr r3,[r0,#48] @ first table element - cmp r3,#-1 @ is value impossible? - bne .Lno_init_neon - - ldr r4,[r0,#20] @ load key base 2^32 - ldr r5,[r0,#24] - ldr r6,[r0,#28] - ldr r7,[r0,#32] - - and r2,r4,#0x03ffffff @ base 2^32 -> base 2^26 - mov r3,r4,lsr#26 - mov r4,r5,lsr#20 - orr r3,r3,r5,lsl#6 - mov r5,r6,lsr#14 - orr r4,r4,r6,lsl#12 - mov r6,r7,lsr#8 - orr r5,r5,r7,lsl#18 - and r3,r3,#0x03ffffff - and r4,r4,#0x03ffffff - and r5,r5,#0x03ffffff - - vdup.32 d0,r2 @ r^1 in both lanes - add r2,r3,r3,lsl#2 @ *5 - vdup.32 d1,r3 - add r3,r4,r4,lsl#2 - vdup.32 d2,r2 - vdup.32 d3,r4 - add r4,r5,r5,lsl#2 - vdup.32 d4,r3 - vdup.32 d5,r5 - add r5,r6,r6,lsl#2 - vdup.32 d6,r4 - vdup.32 d7,r6 - vdup.32 d8,r5 - - mov r5,#2 @ counter - -.Lsquare_neon: - @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ - @ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 - @ d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 - @ d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 - @ d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 - @ d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 - - vmull.u32 q5,d0,d0[1] - vmull.u32 q6,d1,d0[1] - vmull.u32 q7,d3,d0[1] - vmull.u32 q8,d5,d0[1] - vmull.u32 q9,d7,d0[1] - - vmlal.u32 q5,d7,d2[1] - vmlal.u32 q6,d0,d1[1] - vmlal.u32 q7,d1,d1[1] - vmlal.u32 q8,d3,d1[1] - vmlal.u32 q9,d5,d1[1] - - vmlal.u32 q5,d5,d4[1] - vmlal.u32 q6,d7,d4[1] - vmlal.u32 q8,d1,d3[1] - vmlal.u32 q7,d0,d3[1] - vmlal.u32 q9,d3,d3[1] - - vmlal.u32 q5,d3,d6[1] - vmlal.u32 q8,d0,d5[1] - vmlal.u32 q6,d5,d6[1] - vmlal.u32 q7,d7,d6[1] - vmlal.u32 q9,d1,d5[1] - - vmlal.u32 q8,d7,d8[1] - vmlal.u32 q5,d1,d8[1] - vmlal.u32 q6,d3,d8[1] - vmlal.u32 q7,d5,d8[1] - vmlal.u32 q9,d0,d7[1] - - @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ - @ lazy reduction as discussed in "NEON crypto" by D.J. Bernstein - @ and P. Schwabe - @ - @ H0>>+H1>>+H2>>+H3>>+H4 - @ H3>>+H4>>*5+H0>>+H1 - @ - @ Trivia. - @ - @ Result of multiplication of n-bit number by m-bit number is - @ n+m bits wide. However! Even though 2^n is a n+1-bit number, - @ m-bit number multiplied by 2^n is still n+m bits wide. - @ - @ Sum of two n-bit numbers is n+1 bits wide, sum of three - n+2, - @ and so is sum of four. Sum of 2^m n-m-bit numbers and n-bit - @ one is n+1 bits wide. - @ - @ >>+ denotes Hnext += Hn>>26, Hn &= 0x3ffffff. This means that - @ H0, H2, H3 are guaranteed to be 26 bits wide, while H1 and H4 - @ can be 27. However! In cases when their width exceeds 26 bits - @ they are limited by 2^26+2^6. This in turn means that *sum* - @ of the products with these values can still be viewed as sum - @ of 52-bit numbers as long as the amount of addends is not a - @ power of 2. For example, - @ - @ H4 = H4*R0 + H3*R1 + H2*R2 + H1*R3 + H0 * R4, - @ - @ which can't be larger than 5 * (2^26 + 2^6) * (2^26 + 2^6), or - @ 5 * (2^52 + 2*2^32 + 2^12), which in turn is smaller than - @ 8 * (2^52) or 2^55. However, the value is then multiplied by - @ by 5, so we should be looking at 5 * 5 * (2^52 + 2^33 + 2^12), - @ which is less than 32 * (2^52) or 2^57. And when processing - @ data we are looking at triple as many addends... - @ - @ In key setup procedure pre-reduced H0 is limited by 5*4+1 and - @ 5*H4 - by 5*5 52-bit addends, or 57 bits. But when hashing the - @ input H0 is limited by (5*4+1)*3 addends, or 58 bits, while - @ 5*H4 by 5*5*3, or 59[!] bits. How is this relevant? vmlal.u32 - @ instruction accepts 2x32-bit input and writes 2x64-bit result. - @ This means that result of reduction have to be compressed upon - @ loop wrap-around. This can be done in the process of reduction - @ to minimize amount of instructions [as well as amount of - @ 128-bit instructions, which benefits low-end processors], but - @ one has to watch for H2 (which is narrower than H0) and 5*H4 - @ not being wider than 58 bits, so that result of right shift - @ by 26 bits fits in 32 bits. This is also useful on x86, - @ because it allows to use paddd in place for paddq, which - @ benefits Atom, where paddq is ridiculously slow. - - vshr.u64 q15,q8,#26 - vmovn.i64 d16,q8 - vshr.u64 q4,q5,#26 - vmovn.i64 d10,q5 - vadd.i64 q9,q9,q15 @ h3 -> h4 - vbic.i32 d16,#0xfc000000 @ &=0x03ffffff - vadd.i64 q6,q6,q4 @ h0 -> h1 - vbic.i32 d10,#0xfc000000 - - vshrn.u64 d30,q9,#26 - vmovn.i64 d18,q9 - vshr.u64 q4,q6,#26 - vmovn.i64 d12,q6 - vadd.i64 q7,q7,q4 @ h1 -> h2 - vbic.i32 d18,#0xfc000000 - vbic.i32 d12,#0xfc000000 - - vadd.i32 d10,d10,d30 - vshl.u32 d30,d30,#2 - vshrn.u64 d8,q7,#26 - vmovn.i64 d14,q7 - vadd.i32 d10,d10,d30 @ h4 -> h0 - vadd.i32 d16,d16,d8 @ h2 -> h3 - vbic.i32 d14,#0xfc000000 - - vshr.u32 d30,d10,#26 - vbic.i32 d10,#0xfc000000 - vshr.u32 d8,d16,#26 - vbic.i32 d16,#0xfc000000 - vadd.i32 d12,d12,d30 @ h0 -> h1 - vadd.i32 d18,d18,d8 @ h3 -> h4 - - subs r5,r5,#1 - beq .Lsquare_break_neon - - add r6,r0,#(48+0*9*4) - add r7,r0,#(48+1*9*4) - - vtrn.32 d0,d10 @ r^2:r^1 - vtrn.32 d3,d14 - vtrn.32 d5,d16 - vtrn.32 d1,d12 - vtrn.32 d7,d18 - - vshl.u32 d4,d3,#2 @ *5 - vshl.u32 d6,d5,#2 - vshl.u32 d2,d1,#2 - vshl.u32 d8,d7,#2 - vadd.i32 d4,d4,d3 - vadd.i32 d2,d2,d1 - vadd.i32 d6,d6,d5 - vadd.i32 d8,d8,d7 - - vst4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! - vst4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! - vst4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]! - vst4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]! - vst1.32 {d8[0]},[r6,:32] - vst1.32 {d8[1]},[r7,:32] - - b .Lsquare_neon - -.align 4 -.Lsquare_break_neon: - add r6,r0,#(48+2*4*9) - add r7,r0,#(48+3*4*9) - - vmov d0,d10 @ r^4:r^3 - vshl.u32 d2,d12,#2 @ *5 - vmov d1,d12 - vshl.u32 d4,d14,#2 - vmov d3,d14 - vshl.u32 d6,d16,#2 - vmov d5,d16 - vshl.u32 d8,d18,#2 - vmov d7,d18 - vadd.i32 d2,d2,d12 - vadd.i32 d4,d4,d14 - vadd.i32 d6,d6,d16 - vadd.i32 d8,d8,d18 - - vst4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! - vst4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! - vst4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]! - vst4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]! - vst1.32 {d8[0]},[r6] - vst1.32 {d8[1]},[r7] - -.Lno_init_neon: - bx lr @ bx lr -.size poly1305_init_neon,.-poly1305_init_neon - -.type poly1305_blocks_neon,%function -.align 5 -poly1305_blocks_neon: -.Lpoly1305_blocks_neon: - ldr ip,[r0,#36] @ is_base2_26 - - cmp r2,#64 - blo .Lpoly1305_blocks - - stmdb sp!,{r4-r7} - vstmdb sp!,{d8-d15} @ ABI specification says so - - tst ip,ip @ is_base2_26? - bne .Lbase2_26_neon - - stmdb sp!,{r1-r3,lr} - bl .Lpoly1305_init_neon - - ldr r4,[r0,#0] @ load hash value base 2^32 - ldr r5,[r0,#4] - ldr r6,[r0,#8] - ldr r7,[r0,#12] - ldr ip,[r0,#16] - - and r2,r4,#0x03ffffff @ base 2^32 -> base 2^26 - mov r3,r4,lsr#26 - veor d10,d10,d10 - mov r4,r5,lsr#20 - orr r3,r3,r5,lsl#6 - veor d12,d12,d12 - mov r5,r6,lsr#14 - orr r4,r4,r6,lsl#12 - veor d14,d14,d14 - mov r6,r7,lsr#8 - orr r5,r5,r7,lsl#18 - veor d16,d16,d16 - and r3,r3,#0x03ffffff - orr r6,r6,ip,lsl#24 - veor d18,d18,d18 - and r4,r4,#0x03ffffff - mov r1,#1 - and r5,r5,#0x03ffffff - str r1,[r0,#36] @ set is_base2_26 - - vmov.32 d10[0],r2 - vmov.32 d12[0],r3 - vmov.32 d14[0],r4 - vmov.32 d16[0],r5 - vmov.32 d18[0],r6 - adr r5,.Lzeros - - ldmia sp!,{r1-r3,lr} - b .Lhash_loaded - -.align 4 -.Lbase2_26_neon: - @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ - @ load hash value - - veor d10,d10,d10 - veor d12,d12,d12 - veor d14,d14,d14 - veor d16,d16,d16 - veor d18,d18,d18 - vld4.32 {d10[0],d12[0],d14[0],d16[0]},[r0]! - adr r5,.Lzeros - vld1.32 {d18[0]},[r0] - sub r0,r0,#16 @ rewind - -.Lhash_loaded: - add r4,r1,#32 - mov r3,r3,lsl#24 - tst r2,#31 - beq .Leven - - vld4.32 {d20[0],d22[0],d24[0],d26[0]},[r1]! - vmov.32 d28[0],r3 - sub r2,r2,#16 - add r4,r1,#32 - -# ifdef __ARMEB__ - vrev32.8 q10,q10 - vrev32.8 q13,q13 - vrev32.8 q11,q11 - vrev32.8 q12,q12 -# endif - vsri.u32 d28,d26,#8 @ base 2^32 -> base 2^26 - vshl.u32 d26,d26,#18 - - vsri.u32 d26,d24,#14 - vshl.u32 d24,d24,#12 - vadd.i32 d29,d28,d18 @ add hash value and move to #hi - - vbic.i32 d26,#0xfc000000 - vsri.u32 d24,d22,#20 - vshl.u32 d22,d22,#6 - - vbic.i32 d24,#0xfc000000 - vsri.u32 d22,d20,#26 - vadd.i32 d27,d26,d16 - - vbic.i32 d20,#0xfc000000 - vbic.i32 d22,#0xfc000000 - vadd.i32 d25,d24,d14 - - vadd.i32 d21,d20,d10 - vadd.i32 d23,d22,d12 - - mov r7,r5 - add r6,r0,#48 - - cmp r2,r2 - b .Long_tail - -.align 4 -.Leven: - subs r2,r2,#64 - it lo - movlo r4,r5 - - vmov.i32 q14,#1<<24 @ padbit, yes, always - vld4.32 {d20,d22,d24,d26},[r1] @ inp[0:1] - add r1,r1,#64 - vld4.32 {d21,d23,d25,d27},[r4] @ inp[2:3] (or 0) - add r4,r4,#64 - itt hi - addhi r7,r0,#(48+1*9*4) - addhi r6,r0,#(48+3*9*4) - -# ifdef __ARMEB__ - vrev32.8 q10,q10 - vrev32.8 q13,q13 - vrev32.8 q11,q11 - vrev32.8 q12,q12 -# endif - vsri.u32 q14,q13,#8 @ base 2^32 -> base 2^26 - vshl.u32 q13,q13,#18 - - vsri.u32 q13,q12,#14 - vshl.u32 q12,q12,#12 - - vbic.i32 q13,#0xfc000000 - vsri.u32 q12,q11,#20 - vshl.u32 q11,q11,#6 - - vbic.i32 q12,#0xfc000000 - vsri.u32 q11,q10,#26 - - vbic.i32 q10,#0xfc000000 - vbic.i32 q11,#0xfc000000 - - bls .Lskip_loop - - vld4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! @ load r^2 - vld4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! @ load r^4 - vld4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]! - vld4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]! - b .Loop_neon - -.align 5 -.Loop_neon: - @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ - @ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2 - @ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r - @ ___________________/ - @ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2 - @ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r - @ ___________________/ ____________________/ - @ - @ Note that we start with inp[2:3]*r^2. This is because it - @ doesn't depend on reduction in previous iteration. - @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ - @ d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 - @ d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 - @ d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 - @ d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 - @ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 - - @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ - @ inp[2:3]*r^2 - - vadd.i32 d24,d24,d14 @ accumulate inp[0:1] - vmull.u32 q7,d25,d0[1] - vadd.i32 d20,d20,d10 - vmull.u32 q5,d21,d0[1] - vadd.i32 d26,d26,d16 - vmull.u32 q8,d27,d0[1] - vmlal.u32 q7,d23,d1[1] - vadd.i32 d22,d22,d12 - vmull.u32 q6,d23,d0[1] - - vadd.i32 d28,d28,d18 - vmull.u32 q9,d29,d0[1] - subs r2,r2,#64 - vmlal.u32 q5,d29,d2[1] - it lo - movlo r4,r5 - vmlal.u32 q8,d25,d1[1] - vld1.32 d8[1],[r7,:32] - vmlal.u32 q6,d21,d1[1] - vmlal.u32 q9,d27,d1[1] - - vmlal.u32 q5,d27,d4[1] - vmlal.u32 q8,d23,d3[1] - vmlal.u32 q9,d25,d3[1] - vmlal.u32 q6,d29,d4[1] - vmlal.u32 q7,d21,d3[1] - - vmlal.u32 q8,d21,d5[1] - vmlal.u32 q5,d25,d6[1] - vmlal.u32 q9,d23,d5[1] - vmlal.u32 q6,d27,d6[1] - vmlal.u32 q7,d29,d6[1] - - vmlal.u32 q8,d29,d8[1] - vmlal.u32 q5,d23,d8[1] - vmlal.u32 q9,d21,d7[1] - vmlal.u32 q6,d25,d8[1] - vmlal.u32 q7,d27,d8[1] - - vld4.32 {d21,d23,d25,d27},[r4] @ inp[2:3] (or 0) - add r4,r4,#64 - - @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ - @ (hash+inp[0:1])*r^4 and accumulate - - vmlal.u32 q8,d26,d0[0] - vmlal.u32 q5,d20,d0[0] - vmlal.u32 q9,d28,d0[0] - vmlal.u32 q6,d22,d0[0] - vmlal.u32 q7,d24,d0[0] - vld1.32 d8[0],[r6,:32] - - vmlal.u32 q8,d24,d1[0] - vmlal.u32 q5,d28,d2[0] - vmlal.u32 q9,d26,d1[0] - vmlal.u32 q6,d20,d1[0] - vmlal.u32 q7,d22,d1[0] - - vmlal.u32 q8,d22,d3[0] - vmlal.u32 q5,d26,d4[0] - vmlal.u32 q9,d24,d3[0] - vmlal.u32 q6,d28,d4[0] - vmlal.u32 q7,d20,d3[0] - - vmlal.u32 q8,d20,d5[0] - vmlal.u32 q5,d24,d6[0] - vmlal.u32 q9,d22,d5[0] - vmlal.u32 q6,d26,d6[0] - vmlal.u32 q8,d28,d8[0] - - vmlal.u32 q7,d28,d6[0] - vmlal.u32 q5,d22,d8[0] - vmlal.u32 q9,d20,d7[0] - vmov.i32 q14,#1<<24 @ padbit, yes, always - vmlal.u32 q6,d24,d8[0] - vmlal.u32 q7,d26,d8[0] - - vld4.32 {d20,d22,d24,d26},[r1] @ inp[0:1] - add r1,r1,#64 -# ifdef __ARMEB__ - vrev32.8 q10,q10 - vrev32.8 q11,q11 - vrev32.8 q12,q12 - vrev32.8 q13,q13 -# endif - - @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ - @ lazy reduction interleaved with base 2^32 -> base 2^26 of - @ inp[0:3] previously loaded to q10-q13 and smashed to q10-q14. - - vshr.u64 q15,q8,#26 - vmovn.i64 d16,q8 - vshr.u64 q4,q5,#26 - vmovn.i64 d10,q5 - vadd.i64 q9,q9,q15 @ h3 -> h4 - vbic.i32 d16,#0xfc000000 - vsri.u32 q14,q13,#8 @ base 2^32 -> base 2^26 - vadd.i64 q6,q6,q4 @ h0 -> h1 - vshl.u32 q13,q13,#18 - vbic.i32 d10,#0xfc000000 - - vshrn.u64 d30,q9,#26 - vmovn.i64 d18,q9 - vshr.u64 q4,q6,#26 - vmovn.i64 d12,q6 - vadd.i64 q7,q7,q4 @ h1 -> h2 - vsri.u32 q13,q12,#14 - vbic.i32 d18,#0xfc000000 - vshl.u32 q12,q12,#12 - vbic.i32 d12,#0xfc000000 - - vadd.i32 d10,d10,d30 - vshl.u32 d30,d30,#2 - vbic.i32 q13,#0xfc000000 - vshrn.u64 d8,q7,#26 - vmovn.i64 d14,q7 - vaddl.u32 q5,d10,d30 @ h4 -> h0 [widen for a sec] - vsri.u32 q12,q11,#20 - vadd.i32 d16,d16,d8 @ h2 -> h3 - vshl.u32 q11,q11,#6 - vbic.i32 d14,#0xfc000000 - vbic.i32 q12,#0xfc000000 - - vshrn.u64 d30,q5,#26 @ re-narrow - vmovn.i64 d10,q5 - vsri.u32 q11,q10,#26 - vbic.i32 q10,#0xfc000000 - vshr.u32 d8,d16,#26 - vbic.i32 d16,#0xfc000000 - vbic.i32 d10,#0xfc000000 - vadd.i32 d12,d12,d30 @ h0 -> h1 - vadd.i32 d18,d18,d8 @ h3 -> h4 - vbic.i32 q11,#0xfc000000 - - bhi .Loop_neon - -.Lskip_loop: - @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ - @ multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1 - - add r7,r0,#(48+0*9*4) - add r6,r0,#(48+1*9*4) - adds r2,r2,#32 - it ne - movne r2,#0 - bne .Long_tail - - vadd.i32 d25,d24,d14 @ add hash value and move to #hi - vadd.i32 d21,d20,d10 - vadd.i32 d27,d26,d16 - vadd.i32 d23,d22,d12 - vadd.i32 d29,d28,d18 - -.Long_tail: - vld4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! @ load r^1 - vld4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! @ load r^2 - - vadd.i32 d24,d24,d14 @ can be redundant - vmull.u32 q7,d25,d0 - vadd.i32 d20,d20,d10 - vmull.u32 q5,d21,d0 - vadd.i32 d26,d26,d16 - vmull.u32 q8,d27,d0 - vadd.i32 d22,d22,d12 - vmull.u32 q6,d23,d0 - vadd.i32 d28,d28,d18 - vmull.u32 q9,d29,d0 - - vmlal.u32 q5,d29,d2 - vld4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]! - vmlal.u32 q8,d25,d1 - vld4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]! - vmlal.u32 q6,d21,d1 - vmlal.u32 q9,d27,d1 - vmlal.u32 q7,d23,d1 - - vmlal.u32 q8,d23,d3 - vld1.32 d8[1],[r7,:32] - vmlal.u32 q5,d27,d4 - vld1.32 d8[0],[r6,:32] - vmlal.u32 q9,d25,d3 - vmlal.u32 q6,d29,d4 - vmlal.u32 q7,d21,d3 - - vmlal.u32 q8,d21,d5 - it ne - addne r7,r0,#(48+2*9*4) - vmlal.u32 q5,d25,d6 - it ne - addne r6,r0,#(48+3*9*4) - vmlal.u32 q9,d23,d5 - vmlal.u32 q6,d27,d6 - vmlal.u32 q7,d29,d6 - - vmlal.u32 q8,d29,d8 - vorn q0,q0,q0 @ all-ones, can be redundant - vmlal.u32 q5,d23,d8 - vshr.u64 q0,q0,#38 - vmlal.u32 q9,d21,d7 - vmlal.u32 q6,d25,d8 - vmlal.u32 q7,d27,d8 - - beq .Lshort_tail - - @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ - @ (hash+inp[0:1])*r^4:r^3 and accumulate - - vld4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! @ load r^3 - vld4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! @ load r^4 - - vmlal.u32 q7,d24,d0 - vmlal.u32 q5,d20,d0 - vmlal.u32 q8,d26,d0 - vmlal.u32 q6,d22,d0 - vmlal.u32 q9,d28,d0 - - vmlal.u32 q5,d28,d2 - vld4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]! - vmlal.u32 q8,d24,d1 - vld4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]! - vmlal.u32 q6,d20,d1 - vmlal.u32 q9,d26,d1 - vmlal.u32 q7,d22,d1 - - vmlal.u32 q8,d22,d3 - vld1.32 d8[1],[r7,:32] - vmlal.u32 q5,d26,d4 - vld1.32 d8[0],[r6,:32] - vmlal.u32 q9,d24,d3 - vmlal.u32 q6,d28,d4 - vmlal.u32 q7,d20,d3 - - vmlal.u32 q8,d20,d5 - vmlal.u32 q5,d24,d6 - vmlal.u32 q9,d22,d5 - vmlal.u32 q6,d26,d6 - vmlal.u32 q7,d28,d6 - - vmlal.u32 q8,d28,d8 - vorn q0,q0,q0 @ all-ones - vmlal.u32 q5,d22,d8 - vshr.u64 q0,q0,#38 - vmlal.u32 q9,d20,d7 - vmlal.u32 q6,d24,d8 - vmlal.u32 q7,d26,d8 - -.Lshort_tail: - @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ - @ horizontal addition - - vadd.i64 d16,d16,d17 - vadd.i64 d10,d10,d11 - vadd.i64 d18,d18,d19 - vadd.i64 d12,d12,d13 - vadd.i64 d14,d14,d15 - - @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ - @ lazy reduction, but without narrowing - - vshr.u64 q15,q8,#26 - vand.i64 q8,q8,q0 - vshr.u64 q4,q5,#26 - vand.i64 q5,q5,q0 - vadd.i64 q9,q9,q15 @ h3 -> h4 - vadd.i64 q6,q6,q4 @ h0 -> h1 - - vshr.u64 q15,q9,#26 - vand.i64 q9,q9,q0 - vshr.u64 q4,q6,#26 - vand.i64 q6,q6,q0 - vadd.i64 q7,q7,q4 @ h1 -> h2 - - vadd.i64 q5,q5,q15 - vshl.u64 q15,q15,#2 - vshr.u64 q4,q7,#26 - vand.i64 q7,q7,q0 - vadd.i64 q5,q5,q15 @ h4 -> h0 - vadd.i64 q8,q8,q4 @ h2 -> h3 - - vshr.u64 q15,q5,#26 - vand.i64 q5,q5,q0 - vshr.u64 q4,q8,#26 - vand.i64 q8,q8,q0 - vadd.i64 q6,q6,q15 @ h0 -> h1 - vadd.i64 q9,q9,q4 @ h3 -> h4 - - cmp r2,#0 - bne .Leven - - @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ - @ store hash value - - vst4.32 {d10[0],d12[0],d14[0],d16[0]},[r0]! - vst1.32 {d18[0]},[r0] - - vldmia sp!,{d8-d15} @ epilogue - ldmia sp!,{r4-r7} - bx lr @ bx lr -.size poly1305_blocks_neon,.-poly1305_blocks_neon - -.align 5 -.Lzeros: -.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -#ifndef __KERNEL__ -.LOPENSSL_armcap: -# ifdef _WIN32 -.word OPENSSL_armcap_P -# else -.word OPENSSL_armcap_P-.Lpoly1305_init -# endif -.comm OPENSSL_armcap_P,4,4 -.hidden OPENSSL_armcap_P -#endif -#endif -.asciz "Poly1305 for ARMv4/NEON, CRYPTOGAMS by @dot-asm" -.align 2 diff --git a/arch/arm/crypto/sha256-core.S_shipped b/arch/arm/crypto/sha256-core.S_shipped deleted file mode 100644 index 6363014a50d79..0000000000000 --- a/arch/arm/crypto/sha256-core.S_shipped +++ /dev/null @@ -1,2816 +0,0 @@ -@ SPDX-License-Identifier: GPL-2.0 - -@ This code is taken from the OpenSSL project but the author (Andy Polyakov) -@ has relicensed it under the GPLv2. Therefore this program is free software; -@ you can redistribute it and/or modify it under the terms of the GNU General -@ Public License version 2 as published by the Free Software Foundation. -@ -@ The original headers, including the original license headers, are -@ included below for completeness. - -@ ==================================================================== -@ Written by Andy Polyakov for the OpenSSL -@ project. The module is, however, dual licensed under OpenSSL and -@ CRYPTOGAMS licenses depending on where you obtain it. For further -@ details see https://www.openssl.org/~appro/cryptogams/. -@ ==================================================================== - -@ SHA256 block procedure for ARMv4. May 2007. - -@ Performance is ~2x better than gcc 3.4 generated code and in "abso- -@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per -@ byte [on single-issue Xscale PXA250 core]. - -@ July 2010. -@ -@ Rescheduling for dual-issue pipeline resulted in 22% improvement on -@ Cortex A8 core and ~20 cycles per processed byte. - -@ February 2011. -@ -@ Profiler-assisted and platform-specific optimization resulted in 16% -@ improvement on Cortex A8 core and ~15.4 cycles per processed byte. - -@ September 2013. -@ -@ Add NEON implementation. On Cortex A8 it was measured to process one -@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon -@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only -@ code (meaning that latter performs sub-optimally, nothing was done -@ about it). - -@ May 2014. -@ -@ Add ARMv8 code path performing at 2.0 cpb on Apple A7. - -#ifndef __KERNEL__ -# include "arm_arch.h" -#else -# define __ARM_ARCH__ __LINUX_ARM_ARCH__ -# define __ARM_MAX_ARCH__ 7 -#endif - -.text -#if __ARM_ARCH__<7 -.code 32 -#else -.syntax unified -# ifdef __thumb2__ -.thumb -# else -.code 32 -# endif -#endif - -.type K256,%object -.align 5 -K256: -.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 -.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 -.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 -.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 -.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc -.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da -.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 -.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 -.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 -.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 -.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 -.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 -.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 -.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 -.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 -.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 -.size K256,.-K256 -.word 0 @ terminator -#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) -.LOPENSSL_armcap: -.word OPENSSL_armcap_P-sha256_block_data_order -#endif -.align 5 - -.global sha256_block_data_order -.type sha256_block_data_order,%function -sha256_block_data_order: -.Lsha256_block_data_order: -#if __ARM_ARCH__<7 - sub r3,pc,#8 @ sha256_block_data_order -#else - adr r3,.Lsha256_block_data_order -#endif -#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) - ldr r12,.LOPENSSL_armcap - ldr r12,[r3,r12] @ OPENSSL_armcap_P - tst r12,#ARMV8_SHA256 - bne .LARMv8 - tst r12,#ARMV7_NEON - bne .LNEON -#endif - add r2,r1,r2,lsl#6 @ len to point at the end of inp - stmdb sp!,{r0,r1,r2,r4-r11,lr} - ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11} - sub r14,r3,#256+32 @ K256 - sub sp,sp,#16*4 @ alloca(X[16]) -.Loop: -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 -# else - ldrb r2,[r1,#3] -# endif - eor r3,r5,r6 @ magic - eor r12,r12,r12 -#if __ARM_ARCH__>=7 - @ ldr r2,[r1],#4 @ 0 -# if 0==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r8,r8,ror#5 - add r4,r4,r12 @ h+=Maj(a,b,c) from the past - eor r0,r0,r8,ror#19 @ Sigma1(e) -# ifndef __ARMEB__ - rev r2,r2 -# endif -#else - @ ldrb r2,[r1,#3] @ 0 - add r4,r4,r12 @ h+=Maj(a,b,c) from the past - ldrb r12,[r1,#2] - ldrb r0,[r1,#1] - orr r2,r2,r12,lsl#8 - ldrb r12,[r1],#4 - orr r2,r2,r0,lsl#16 -# if 0==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r8,r8,ror#5 - orr r2,r2,r12,lsl#24 - eor r0,r0,r8,ror#19 @ Sigma1(e) -#endif - ldr r12,[r14],#4 @ *K256++ - add r11,r11,r2 @ h+=X[i] - str r2,[sp,#0*4] - eor r2,r9,r10 - add r11,r11,r0,ror#6 @ h+=Sigma1(e) - and r2,r2,r8 - add r11,r11,r12 @ h+=K256[i] - eor r2,r2,r10 @ Ch(e,f,g) - eor r0,r4,r4,ror#11 - add r11,r11,r2 @ h+=Ch(e,f,g) -#if 0==31 - and r12,r12,#0xff - cmp r12,#0xf2 @ done? -#endif -#if 0<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r12,r4,r5 @ a^b, b^c in next round -#else - ldr r2,[sp,#2*4] @ from future BODY_16_xx - eor r12,r4,r5 @ a^b, b^c in next round - ldr r1,[sp,#15*4] @ from future BODY_16_xx -#endif - eor r0,r0,r4,ror#20 @ Sigma0(a) - and r3,r3,r12 @ (b^c)&=(a^b) - add r7,r7,r11 @ d+=h - eor r3,r3,r5 @ Maj(a,b,c) - add r11,r11,r0,ror#2 @ h+=Sigma0(a) - @ add r11,r11,r3 @ h+=Maj(a,b,c) -#if __ARM_ARCH__>=7 - @ ldr r2,[r1],#4 @ 1 -# if 1==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r7,r7,ror#5 - add r11,r11,r3 @ h+=Maj(a,b,c) from the past - eor r0,r0,r7,ror#19 @ Sigma1(e) -# ifndef __ARMEB__ - rev r2,r2 -# endif -#else - @ ldrb r2,[r1,#3] @ 1 - add r11,r11,r3 @ h+=Maj(a,b,c) from the past - ldrb r3,[r1,#2] - ldrb r0,[r1,#1] - orr r2,r2,r3,lsl#8 - ldrb r3,[r1],#4 - orr r2,r2,r0,lsl#16 -# if 1==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r7,r7,ror#5 - orr r2,r2,r3,lsl#24 - eor r0,r0,r7,ror#19 @ Sigma1(e) -#endif - ldr r3,[r14],#4 @ *K256++ - add r10,r10,r2 @ h+=X[i] - str r2,[sp,#1*4] - eor r2,r8,r9 - add r10,r10,r0,ror#6 @ h+=Sigma1(e) - and r2,r2,r7 - add r10,r10,r3 @ h+=K256[i] - eor r2,r2,r9 @ Ch(e,f,g) - eor r0,r11,r11,ror#11 - add r10,r10,r2 @ h+=Ch(e,f,g) -#if 1==31 - and r3,r3,#0xff - cmp r3,#0xf2 @ done? -#endif -#if 1<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r3,r11,r4 @ a^b, b^c in next round -#else - ldr r2,[sp,#3*4] @ from future BODY_16_xx - eor r3,r11,r4 @ a^b, b^c in next round - ldr r1,[sp,#0*4] @ from future BODY_16_xx -#endif - eor r0,r0,r11,ror#20 @ Sigma0(a) - and r12,r12,r3 @ (b^c)&=(a^b) - add r6,r6,r10 @ d+=h - eor r12,r12,r4 @ Maj(a,b,c) - add r10,r10,r0,ror#2 @ h+=Sigma0(a) - @ add r10,r10,r12 @ h+=Maj(a,b,c) -#if __ARM_ARCH__>=7 - @ ldr r2,[r1],#4 @ 2 -# if 2==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r6,r6,ror#5 - add r10,r10,r12 @ h+=Maj(a,b,c) from the past - eor r0,r0,r6,ror#19 @ Sigma1(e) -# ifndef __ARMEB__ - rev r2,r2 -# endif -#else - @ ldrb r2,[r1,#3] @ 2 - add r10,r10,r12 @ h+=Maj(a,b,c) from the past - ldrb r12,[r1,#2] - ldrb r0,[r1,#1] - orr r2,r2,r12,lsl#8 - ldrb r12,[r1],#4 - orr r2,r2,r0,lsl#16 -# if 2==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r6,r6,ror#5 - orr r2,r2,r12,lsl#24 - eor r0,r0,r6,ror#19 @ Sigma1(e) -#endif - ldr r12,[r14],#4 @ *K256++ - add r9,r9,r2 @ h+=X[i] - str r2,[sp,#2*4] - eor r2,r7,r8 - add r9,r9,r0,ror#6 @ h+=Sigma1(e) - and r2,r2,r6 - add r9,r9,r12 @ h+=K256[i] - eor r2,r2,r8 @ Ch(e,f,g) - eor r0,r10,r10,ror#11 - add r9,r9,r2 @ h+=Ch(e,f,g) -#if 2==31 - and r12,r12,#0xff - cmp r12,#0xf2 @ done? -#endif -#if 2<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r12,r10,r11 @ a^b, b^c in next round -#else - ldr r2,[sp,#4*4] @ from future BODY_16_xx - eor r12,r10,r11 @ a^b, b^c in next round - ldr r1,[sp,#1*4] @ from future BODY_16_xx -#endif - eor r0,r0,r10,ror#20 @ Sigma0(a) - and r3,r3,r12 @ (b^c)&=(a^b) - add r5,r5,r9 @ d+=h - eor r3,r3,r11 @ Maj(a,b,c) - add r9,r9,r0,ror#2 @ h+=Sigma0(a) - @ add r9,r9,r3 @ h+=Maj(a,b,c) -#if __ARM_ARCH__>=7 - @ ldr r2,[r1],#4 @ 3 -# if 3==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r5,r5,ror#5 - add r9,r9,r3 @ h+=Maj(a,b,c) from the past - eor r0,r0,r5,ror#19 @ Sigma1(e) -# ifndef __ARMEB__ - rev r2,r2 -# endif -#else - @ ldrb r2,[r1,#3] @ 3 - add r9,r9,r3 @ h+=Maj(a,b,c) from the past - ldrb r3,[r1,#2] - ldrb r0,[r1,#1] - orr r2,r2,r3,lsl#8 - ldrb r3,[r1],#4 - orr r2,r2,r0,lsl#16 -# if 3==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r5,r5,ror#5 - orr r2,r2,r3,lsl#24 - eor r0,r0,r5,ror#19 @ Sigma1(e) -#endif - ldr r3,[r14],#4 @ *K256++ - add r8,r8,r2 @ h+=X[i] - str r2,[sp,#3*4] - eor r2,r6,r7 - add r8,r8,r0,ror#6 @ h+=Sigma1(e) - and r2,r2,r5 - add r8,r8,r3 @ h+=K256[i] - eor r2,r2,r7 @ Ch(e,f,g) - eor r0,r9,r9,ror#11 - add r8,r8,r2 @ h+=Ch(e,f,g) -#if 3==31 - and r3,r3,#0xff - cmp r3,#0xf2 @ done? -#endif -#if 3<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r3,r9,r10 @ a^b, b^c in next round -#else - ldr r2,[sp,#5*4] @ from future BODY_16_xx - eor r3,r9,r10 @ a^b, b^c in next round - ldr r1,[sp,#2*4] @ from future BODY_16_xx -#endif - eor r0,r0,r9,ror#20 @ Sigma0(a) - and r12,r12,r3 @ (b^c)&=(a^b) - add r4,r4,r8 @ d+=h - eor r12,r12,r10 @ Maj(a,b,c) - add r8,r8,r0,ror#2 @ h+=Sigma0(a) - @ add r8,r8,r12 @ h+=Maj(a,b,c) -#if __ARM_ARCH__>=7 - @ ldr r2,[r1],#4 @ 4 -# if 4==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r4,r4,ror#5 - add r8,r8,r12 @ h+=Maj(a,b,c) from the past - eor r0,r0,r4,ror#19 @ Sigma1(e) -# ifndef __ARMEB__ - rev r2,r2 -# endif -#else - @ ldrb r2,[r1,#3] @ 4 - add r8,r8,r12 @ h+=Maj(a,b,c) from the past - ldrb r12,[r1,#2] - ldrb r0,[r1,#1] - orr r2,r2,r12,lsl#8 - ldrb r12,[r1],#4 - orr r2,r2,r0,lsl#16 -# if 4==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r4,r4,ror#5 - orr r2,r2,r12,lsl#24 - eor r0,r0,r4,ror#19 @ Sigma1(e) -#endif - ldr r12,[r14],#4 @ *K256++ - add r7,r7,r2 @ h+=X[i] - str r2,[sp,#4*4] - eor r2,r5,r6 - add r7,r7,r0,ror#6 @ h+=Sigma1(e) - and r2,r2,r4 - add r7,r7,r12 @ h+=K256[i] - eor r2,r2,r6 @ Ch(e,f,g) - eor r0,r8,r8,ror#11 - add r7,r7,r2 @ h+=Ch(e,f,g) -#if 4==31 - and r12,r12,#0xff - cmp r12,#0xf2 @ done? -#endif -#if 4<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r12,r8,r9 @ a^b, b^c in next round -#else - ldr r2,[sp,#6*4] @ from future BODY_16_xx - eor r12,r8,r9 @ a^b, b^c in next round - ldr r1,[sp,#3*4] @ from future BODY_16_xx -#endif - eor r0,r0,r8,ror#20 @ Sigma0(a) - and r3,r3,r12 @ (b^c)&=(a^b) - add r11,r11,r7 @ d+=h - eor r3,r3,r9 @ Maj(a,b,c) - add r7,r7,r0,ror#2 @ h+=Sigma0(a) - @ add r7,r7,r3 @ h+=Maj(a,b,c) -#if __ARM_ARCH__>=7 - @ ldr r2,[r1],#4 @ 5 -# if 5==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r11,r11,ror#5 - add r7,r7,r3 @ h+=Maj(a,b,c) from the past - eor r0,r0,r11,ror#19 @ Sigma1(e) -# ifndef __ARMEB__ - rev r2,r2 -# endif -#else - @ ldrb r2,[r1,#3] @ 5 - add r7,r7,r3 @ h+=Maj(a,b,c) from the past - ldrb r3,[r1,#2] - ldrb r0,[r1,#1] - orr r2,r2,r3,lsl#8 - ldrb r3,[r1],#4 - orr r2,r2,r0,lsl#16 -# if 5==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r11,r11,ror#5 - orr r2,r2,r3,lsl#24 - eor r0,r0,r11,ror#19 @ Sigma1(e) -#endif - ldr r3,[r14],#4 @ *K256++ - add r6,r6,r2 @ h+=X[i] - str r2,[sp,#5*4] - eor r2,r4,r5 - add r6,r6,r0,ror#6 @ h+=Sigma1(e) - and r2,r2,r11 - add r6,r6,r3 @ h+=K256[i] - eor r2,r2,r5 @ Ch(e,f,g) - eor r0,r7,r7,ror#11 - add r6,r6,r2 @ h+=Ch(e,f,g) -#if 5==31 - and r3,r3,#0xff - cmp r3,#0xf2 @ done? -#endif -#if 5<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r3,r7,r8 @ a^b, b^c in next round -#else - ldr r2,[sp,#7*4] @ from future BODY_16_xx - eor r3,r7,r8 @ a^b, b^c in next round - ldr r1,[sp,#4*4] @ from future BODY_16_xx -#endif - eor r0,r0,r7,ror#20 @ Sigma0(a) - and r12,r12,r3 @ (b^c)&=(a^b) - add r10,r10,r6 @ d+=h - eor r12,r12,r8 @ Maj(a,b,c) - add r6,r6,r0,ror#2 @ h+=Sigma0(a) - @ add r6,r6,r12 @ h+=Maj(a,b,c) -#if __ARM_ARCH__>=7 - @ ldr r2,[r1],#4 @ 6 -# if 6==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r10,r10,ror#5 - add r6,r6,r12 @ h+=Maj(a,b,c) from the past - eor r0,r0,r10,ror#19 @ Sigma1(e) -# ifndef __ARMEB__ - rev r2,r2 -# endif -#else - @ ldrb r2,[r1,#3] @ 6 - add r6,r6,r12 @ h+=Maj(a,b,c) from the past - ldrb r12,[r1,#2] - ldrb r0,[r1,#1] - orr r2,r2,r12,lsl#8 - ldrb r12,[r1],#4 - orr r2,r2,r0,lsl#16 -# if 6==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r10,r10,ror#5 - orr r2,r2,r12,lsl#24 - eor r0,r0,r10,ror#19 @ Sigma1(e) -#endif - ldr r12,[r14],#4 @ *K256++ - add r5,r5,r2 @ h+=X[i] - str r2,[sp,#6*4] - eor r2,r11,r4 - add r5,r5,r0,ror#6 @ h+=Sigma1(e) - and r2,r2,r10 - add r5,r5,r12 @ h+=K256[i] - eor r2,r2,r4 @ Ch(e,f,g) - eor r0,r6,r6,ror#11 - add r5,r5,r2 @ h+=Ch(e,f,g) -#if 6==31 - and r12,r12,#0xff - cmp r12,#0xf2 @ done? -#endif -#if 6<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r12,r6,r7 @ a^b, b^c in next round -#else - ldr r2,[sp,#8*4] @ from future BODY_16_xx - eor r12,r6,r7 @ a^b, b^c in next round - ldr r1,[sp,#5*4] @ from future BODY_16_xx -#endif - eor r0,r0,r6,ror#20 @ Sigma0(a) - and r3,r3,r12 @ (b^c)&=(a^b) - add r9,r9,r5 @ d+=h - eor r3,r3,r7 @ Maj(a,b,c) - add r5,r5,r0,ror#2 @ h+=Sigma0(a) - @ add r5,r5,r3 @ h+=Maj(a,b,c) -#if __ARM_ARCH__>=7 - @ ldr r2,[r1],#4 @ 7 -# if 7==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r9,r9,ror#5 - add r5,r5,r3 @ h+=Maj(a,b,c) from the past - eor r0,r0,r9,ror#19 @ Sigma1(e) -# ifndef __ARMEB__ - rev r2,r2 -# endif -#else - @ ldrb r2,[r1,#3] @ 7 - add r5,r5,r3 @ h+=Maj(a,b,c) from the past - ldrb r3,[r1,#2] - ldrb r0,[r1,#1] - orr r2,r2,r3,lsl#8 - ldrb r3,[r1],#4 - orr r2,r2,r0,lsl#16 -# if 7==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r9,r9,ror#5 - orr r2,r2,r3,lsl#24 - eor r0,r0,r9,ror#19 @ Sigma1(e) -#endif - ldr r3,[r14],#4 @ *K256++ - add r4,r4,r2 @ h+=X[i] - str r2,[sp,#7*4] - eor r2,r10,r11 - add r4,r4,r0,ror#6 @ h+=Sigma1(e) - and r2,r2,r9 - add r4,r4,r3 @ h+=K256[i] - eor r2,r2,r11 @ Ch(e,f,g) - eor r0,r5,r5,ror#11 - add r4,r4,r2 @ h+=Ch(e,f,g) -#if 7==31 - and r3,r3,#0xff - cmp r3,#0xf2 @ done? -#endif -#if 7<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r3,r5,r6 @ a^b, b^c in next round -#else - ldr r2,[sp,#9*4] @ from future BODY_16_xx - eor r3,r5,r6 @ a^b, b^c in next round - ldr r1,[sp,#6*4] @ from future BODY_16_xx -#endif - eor r0,r0,r5,ror#20 @ Sigma0(a) - and r12,r12,r3 @ (b^c)&=(a^b) - add r8,r8,r4 @ d+=h - eor r12,r12,r6 @ Maj(a,b,c) - add r4,r4,r0,ror#2 @ h+=Sigma0(a) - @ add r4,r4,r12 @ h+=Maj(a,b,c) -#if __ARM_ARCH__>=7 - @ ldr r2,[r1],#4 @ 8 -# if 8==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r8,r8,ror#5 - add r4,r4,r12 @ h+=Maj(a,b,c) from the past - eor r0,r0,r8,ror#19 @ Sigma1(e) -# ifndef __ARMEB__ - rev r2,r2 -# endif -#else - @ ldrb r2,[r1,#3] @ 8 - add r4,r4,r12 @ h+=Maj(a,b,c) from the past - ldrb r12,[r1,#2] - ldrb r0,[r1,#1] - orr r2,r2,r12,lsl#8 - ldrb r12,[r1],#4 - orr r2,r2,r0,lsl#16 -# if 8==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r8,r8,ror#5 - orr r2,r2,r12,lsl#24 - eor r0,r0,r8,ror#19 @ Sigma1(e) -#endif - ldr r12,[r14],#4 @ *K256++ - add r11,r11,r2 @ h+=X[i] - str r2,[sp,#8*4] - eor r2,r9,r10 - add r11,r11,r0,ror#6 @ h+=Sigma1(e) - and r2,r2,r8 - add r11,r11,r12 @ h+=K256[i] - eor r2,r2,r10 @ Ch(e,f,g) - eor r0,r4,r4,ror#11 - add r11,r11,r2 @ h+=Ch(e,f,g) -#if 8==31 - and r12,r12,#0xff - cmp r12,#0xf2 @ done? -#endif -#if 8<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r12,r4,r5 @ a^b, b^c in next round -#else - ldr r2,[sp,#10*4] @ from future BODY_16_xx - eor r12,r4,r5 @ a^b, b^c in next round - ldr r1,[sp,#7*4] @ from future BODY_16_xx -#endif - eor r0,r0,r4,ror#20 @ Sigma0(a) - and r3,r3,r12 @ (b^c)&=(a^b) - add r7,r7,r11 @ d+=h - eor r3,r3,r5 @ Maj(a,b,c) - add r11,r11,r0,ror#2 @ h+=Sigma0(a) - @ add r11,r11,r3 @ h+=Maj(a,b,c) -#if __ARM_ARCH__>=7 - @ ldr r2,[r1],#4 @ 9 -# if 9==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r7,r7,ror#5 - add r11,r11,r3 @ h+=Maj(a,b,c) from the past - eor r0,r0,r7,ror#19 @ Sigma1(e) -# ifndef __ARMEB__ - rev r2,r2 -# endif -#else - @ ldrb r2,[r1,#3] @ 9 - add r11,r11,r3 @ h+=Maj(a,b,c) from the past - ldrb r3,[r1,#2] - ldrb r0,[r1,#1] - orr r2,r2,r3,lsl#8 - ldrb r3,[r1],#4 - orr r2,r2,r0,lsl#16 -# if 9==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r7,r7,ror#5 - orr r2,r2,r3,lsl#24 - eor r0,r0,r7,ror#19 @ Sigma1(e) -#endif - ldr r3,[r14],#4 @ *K256++ - add r10,r10,r2 @ h+=X[i] - str r2,[sp,#9*4] - eor r2,r8,r9 - add r10,r10,r0,ror#6 @ h+=Sigma1(e) - and r2,r2,r7 - add r10,r10,r3 @ h+=K256[i] - eor r2,r2,r9 @ Ch(e,f,g) - eor r0,r11,r11,ror#11 - add r10,r10,r2 @ h+=Ch(e,f,g) -#if 9==31 - and r3,r3,#0xff - cmp r3,#0xf2 @ done? -#endif -#if 9<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r3,r11,r4 @ a^b, b^c in next round -#else - ldr r2,[sp,#11*4] @ from future BODY_16_xx - eor r3,r11,r4 @ a^b, b^c in next round - ldr r1,[sp,#8*4] @ from future BODY_16_xx -#endif - eor r0,r0,r11,ror#20 @ Sigma0(a) - and r12,r12,r3 @ (b^c)&=(a^b) - add r6,r6,r10 @ d+=h - eor r12,r12,r4 @ Maj(a,b,c) - add r10,r10,r0,ror#2 @ h+=Sigma0(a) - @ add r10,r10,r12 @ h+=Maj(a,b,c) -#if __ARM_ARCH__>=7 - @ ldr r2,[r1],#4 @ 10 -# if 10==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r6,r6,ror#5 - add r10,r10,r12 @ h+=Maj(a,b,c) from the past - eor r0,r0,r6,ror#19 @ Sigma1(e) -# ifndef __ARMEB__ - rev r2,r2 -# endif -#else - @ ldrb r2,[r1,#3] @ 10 - add r10,r10,r12 @ h+=Maj(a,b,c) from the past - ldrb r12,[r1,#2] - ldrb r0,[r1,#1] - orr r2,r2,r12,lsl#8 - ldrb r12,[r1],#4 - orr r2,r2,r0,lsl#16 -# if 10==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r6,r6,ror#5 - orr r2,r2,r12,lsl#24 - eor r0,r0,r6,ror#19 @ Sigma1(e) -#endif - ldr r12,[r14],#4 @ *K256++ - add r9,r9,r2 @ h+=X[i] - str r2,[sp,#10*4] - eor r2,r7,r8 - add r9,r9,r0,ror#6 @ h+=Sigma1(e) - and r2,r2,r6 - add r9,r9,r12 @ h+=K256[i] - eor r2,r2,r8 @ Ch(e,f,g) - eor r0,r10,r10,ror#11 - add r9,r9,r2 @ h+=Ch(e,f,g) -#if 10==31 - and r12,r12,#0xff - cmp r12,#0xf2 @ done? -#endif -#if 10<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r12,r10,r11 @ a^b, b^c in next round -#else - ldr r2,[sp,#12*4] @ from future BODY_16_xx - eor r12,r10,r11 @ a^b, b^c in next round - ldr r1,[sp,#9*4] @ from future BODY_16_xx -#endif - eor r0,r0,r10,ror#20 @ Sigma0(a) - and r3,r3,r12 @ (b^c)&=(a^b) - add r5,r5,r9 @ d+=h - eor r3,r3,r11 @ Maj(a,b,c) - add r9,r9,r0,ror#2 @ h+=Sigma0(a) - @ add r9,r9,r3 @ h+=Maj(a,b,c) -#if __ARM_ARCH__>=7 - @ ldr r2,[r1],#4 @ 11 -# if 11==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r5,r5,ror#5 - add r9,r9,r3 @ h+=Maj(a,b,c) from the past - eor r0,r0,r5,ror#19 @ Sigma1(e) -# ifndef __ARMEB__ - rev r2,r2 -# endif -#else - @ ldrb r2,[r1,#3] @ 11 - add r9,r9,r3 @ h+=Maj(a,b,c) from the past - ldrb r3,[r1,#2] - ldrb r0,[r1,#1] - orr r2,r2,r3,lsl#8 - ldrb r3,[r1],#4 - orr r2,r2,r0,lsl#16 -# if 11==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r5,r5,ror#5 - orr r2,r2,r3,lsl#24 - eor r0,r0,r5,ror#19 @ Sigma1(e) -#endif - ldr r3,[r14],#4 @ *K256++ - add r8,r8,r2 @ h+=X[i] - str r2,[sp,#11*4] - eor r2,r6,r7 - add r8,r8,r0,ror#6 @ h+=Sigma1(e) - and r2,r2,r5 - add r8,r8,r3 @ h+=K256[i] - eor r2,r2,r7 @ Ch(e,f,g) - eor r0,r9,r9,ror#11 - add r8,r8,r2 @ h+=Ch(e,f,g) -#if 11==31 - and r3,r3,#0xff - cmp r3,#0xf2 @ done? -#endif -#if 11<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r3,r9,r10 @ a^b, b^c in next round -#else - ldr r2,[sp,#13*4] @ from future BODY_16_xx - eor r3,r9,r10 @ a^b, b^c in next round - ldr r1,[sp,#10*4] @ from future BODY_16_xx -#endif - eor r0,r0,r9,ror#20 @ Sigma0(a) - and r12,r12,r3 @ (b^c)&=(a^b) - add r4,r4,r8 @ d+=h - eor r12,r12,r10 @ Maj(a,b,c) - add r8,r8,r0,ror#2 @ h+=Sigma0(a) - @ add r8,r8,r12 @ h+=Maj(a,b,c) -#if __ARM_ARCH__>=7 - @ ldr r2,[r1],#4 @ 12 -# if 12==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r4,r4,ror#5 - add r8,r8,r12 @ h+=Maj(a,b,c) from the past - eor r0,r0,r4,ror#19 @ Sigma1(e) -# ifndef __ARMEB__ - rev r2,r2 -# endif -#else - @ ldrb r2,[r1,#3] @ 12 - add r8,r8,r12 @ h+=Maj(a,b,c) from the past - ldrb r12,[r1,#2] - ldrb r0,[r1,#1] - orr r2,r2,r12,lsl#8 - ldrb r12,[r1],#4 - orr r2,r2,r0,lsl#16 -# if 12==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r4,r4,ror#5 - orr r2,r2,r12,lsl#24 - eor r0,r0,r4,ror#19 @ Sigma1(e) -#endif - ldr r12,[r14],#4 @ *K256++ - add r7,r7,r2 @ h+=X[i] - str r2,[sp,#12*4] - eor r2,r5,r6 - add r7,r7,r0,ror#6 @ h+=Sigma1(e) - and r2,r2,r4 - add r7,r7,r12 @ h+=K256[i] - eor r2,r2,r6 @ Ch(e,f,g) - eor r0,r8,r8,ror#11 - add r7,r7,r2 @ h+=Ch(e,f,g) -#if 12==31 - and r12,r12,#0xff - cmp r12,#0xf2 @ done? -#endif -#if 12<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r12,r8,r9 @ a^b, b^c in next round -#else - ldr r2,[sp,#14*4] @ from future BODY_16_xx - eor r12,r8,r9 @ a^b, b^c in next round - ldr r1,[sp,#11*4] @ from future BODY_16_xx -#endif - eor r0,r0,r8,ror#20 @ Sigma0(a) - and r3,r3,r12 @ (b^c)&=(a^b) - add r11,r11,r7 @ d+=h - eor r3,r3,r9 @ Maj(a,b,c) - add r7,r7,r0,ror#2 @ h+=Sigma0(a) - @ add r7,r7,r3 @ h+=Maj(a,b,c) -#if __ARM_ARCH__>=7 - @ ldr r2,[r1],#4 @ 13 -# if 13==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r11,r11,ror#5 - add r7,r7,r3 @ h+=Maj(a,b,c) from the past - eor r0,r0,r11,ror#19 @ Sigma1(e) -# ifndef __ARMEB__ - rev r2,r2 -# endif -#else - @ ldrb r2,[r1,#3] @ 13 - add r7,r7,r3 @ h+=Maj(a,b,c) from the past - ldrb r3,[r1,#2] - ldrb r0,[r1,#1] - orr r2,r2,r3,lsl#8 - ldrb r3,[r1],#4 - orr r2,r2,r0,lsl#16 -# if 13==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r11,r11,ror#5 - orr r2,r2,r3,lsl#24 - eor r0,r0,r11,ror#19 @ Sigma1(e) -#endif - ldr r3,[r14],#4 @ *K256++ - add r6,r6,r2 @ h+=X[i] - str r2,[sp,#13*4] - eor r2,r4,r5 - add r6,r6,r0,ror#6 @ h+=Sigma1(e) - and r2,r2,r11 - add r6,r6,r3 @ h+=K256[i] - eor r2,r2,r5 @ Ch(e,f,g) - eor r0,r7,r7,ror#11 - add r6,r6,r2 @ h+=Ch(e,f,g) -#if 13==31 - and r3,r3,#0xff - cmp r3,#0xf2 @ done? -#endif -#if 13<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r3,r7,r8 @ a^b, b^c in next round -#else - ldr r2,[sp,#15*4] @ from future BODY_16_xx - eor r3,r7,r8 @ a^b, b^c in next round - ldr r1,[sp,#12*4] @ from future BODY_16_xx -#endif - eor r0,r0,r7,ror#20 @ Sigma0(a) - and r12,r12,r3 @ (b^c)&=(a^b) - add r10,r10,r6 @ d+=h - eor r12,r12,r8 @ Maj(a,b,c) - add r6,r6,r0,ror#2 @ h+=Sigma0(a) - @ add r6,r6,r12 @ h+=Maj(a,b,c) -#if __ARM_ARCH__>=7 - @ ldr r2,[r1],#4 @ 14 -# if 14==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r10,r10,ror#5 - add r6,r6,r12 @ h+=Maj(a,b,c) from the past - eor r0,r0,r10,ror#19 @ Sigma1(e) -# ifndef __ARMEB__ - rev r2,r2 -# endif -#else - @ ldrb r2,[r1,#3] @ 14 - add r6,r6,r12 @ h+=Maj(a,b,c) from the past - ldrb r12,[r1,#2] - ldrb r0,[r1,#1] - orr r2,r2,r12,lsl#8 - ldrb r12,[r1],#4 - orr r2,r2,r0,lsl#16 -# if 14==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r10,r10,ror#5 - orr r2,r2,r12,lsl#24 - eor r0,r0,r10,ror#19 @ Sigma1(e) -#endif - ldr r12,[r14],#4 @ *K256++ - add r5,r5,r2 @ h+=X[i] - str r2,[sp,#14*4] - eor r2,r11,r4 - add r5,r5,r0,ror#6 @ h+=Sigma1(e) - and r2,r2,r10 - add r5,r5,r12 @ h+=K256[i] - eor r2,r2,r4 @ Ch(e,f,g) - eor r0,r6,r6,ror#11 - add r5,r5,r2 @ h+=Ch(e,f,g) -#if 14==31 - and r12,r12,#0xff - cmp r12,#0xf2 @ done? -#endif -#if 14<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r12,r6,r7 @ a^b, b^c in next round -#else - ldr r2,[sp,#0*4] @ from future BODY_16_xx - eor r12,r6,r7 @ a^b, b^c in next round - ldr r1,[sp,#13*4] @ from future BODY_16_xx -#endif - eor r0,r0,r6,ror#20 @ Sigma0(a) - and r3,r3,r12 @ (b^c)&=(a^b) - add r9,r9,r5 @ d+=h - eor r3,r3,r7 @ Maj(a,b,c) - add r5,r5,r0,ror#2 @ h+=Sigma0(a) - @ add r5,r5,r3 @ h+=Maj(a,b,c) -#if __ARM_ARCH__>=7 - @ ldr r2,[r1],#4 @ 15 -# if 15==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r9,r9,ror#5 - add r5,r5,r3 @ h+=Maj(a,b,c) from the past - eor r0,r0,r9,ror#19 @ Sigma1(e) -# ifndef __ARMEB__ - rev r2,r2 -# endif -#else - @ ldrb r2,[r1,#3] @ 15 - add r5,r5,r3 @ h+=Maj(a,b,c) from the past - ldrb r3,[r1,#2] - ldrb r0,[r1,#1] - orr r2,r2,r3,lsl#8 - ldrb r3,[r1],#4 - orr r2,r2,r0,lsl#16 -# if 15==15 - str r1,[sp,#17*4] @ make room for r1 -# endif - eor r0,r9,r9,ror#5 - orr r2,r2,r3,lsl#24 - eor r0,r0,r9,ror#19 @ Sigma1(e) -#endif - ldr r3,[r14],#4 @ *K256++ - add r4,r4,r2 @ h+=X[i] - str r2,[sp,#15*4] - eor r2,r10,r11 - add r4,r4,r0,ror#6 @ h+=Sigma1(e) - and r2,r2,r9 - add r4,r4,r3 @ h+=K256[i] - eor r2,r2,r11 @ Ch(e,f,g) - eor r0,r5,r5,ror#11 - add r4,r4,r2 @ h+=Ch(e,f,g) -#if 15==31 - and r3,r3,#0xff - cmp r3,#0xf2 @ done? -#endif -#if 15<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r3,r5,r6 @ a^b, b^c in next round -#else - ldr r2,[sp,#1*4] @ from future BODY_16_xx - eor r3,r5,r6 @ a^b, b^c in next round - ldr r1,[sp,#14*4] @ from future BODY_16_xx -#endif - eor r0,r0,r5,ror#20 @ Sigma0(a) - and r12,r12,r3 @ (b^c)&=(a^b) - add r8,r8,r4 @ d+=h - eor r12,r12,r6 @ Maj(a,b,c) - add r4,r4,r0,ror#2 @ h+=Sigma0(a) - @ add r4,r4,r12 @ h+=Maj(a,b,c) -.Lrounds_16_xx: - @ ldr r2,[sp,#1*4] @ 16 - @ ldr r1,[sp,#14*4] - mov r0,r2,ror#7 - add r4,r4,r12 @ h+=Maj(a,b,c) from the past - mov r12,r1,ror#17 - eor r0,r0,r2,ror#18 - eor r12,r12,r1,ror#19 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - ldr r2,[sp,#0*4] - eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) - ldr r1,[sp,#9*4] - - add r12,r12,r0 - eor r0,r8,r8,ror#5 @ from BODY_00_15 - add r2,r2,r12 - eor r0,r0,r8,ror#19 @ Sigma1(e) - add r2,r2,r1 @ X[i] - ldr r12,[r14],#4 @ *K256++ - add r11,r11,r2 @ h+=X[i] - str r2,[sp,#0*4] - eor r2,r9,r10 - add r11,r11,r0,ror#6 @ h+=Sigma1(e) - and r2,r2,r8 - add r11,r11,r12 @ h+=K256[i] - eor r2,r2,r10 @ Ch(e,f,g) - eor r0,r4,r4,ror#11 - add r11,r11,r2 @ h+=Ch(e,f,g) -#if 16==31 - and r12,r12,#0xff - cmp r12,#0xf2 @ done? -#endif -#if 16<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r12,r4,r5 @ a^b, b^c in next round -#else - ldr r2,[sp,#2*4] @ from future BODY_16_xx - eor r12,r4,r5 @ a^b, b^c in next round - ldr r1,[sp,#15*4] @ from future BODY_16_xx -#endif - eor r0,r0,r4,ror#20 @ Sigma0(a) - and r3,r3,r12 @ (b^c)&=(a^b) - add r7,r7,r11 @ d+=h - eor r3,r3,r5 @ Maj(a,b,c) - add r11,r11,r0,ror#2 @ h+=Sigma0(a) - @ add r11,r11,r3 @ h+=Maj(a,b,c) - @ ldr r2,[sp,#2*4] @ 17 - @ ldr r1,[sp,#15*4] - mov r0,r2,ror#7 - add r11,r11,r3 @ h+=Maj(a,b,c) from the past - mov r3,r1,ror#17 - eor r0,r0,r2,ror#18 - eor r3,r3,r1,ror#19 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - ldr r2,[sp,#1*4] - eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) - ldr r1,[sp,#10*4] - - add r3,r3,r0 - eor r0,r7,r7,ror#5 @ from BODY_00_15 - add r2,r2,r3 - eor r0,r0,r7,ror#19 @ Sigma1(e) - add r2,r2,r1 @ X[i] - ldr r3,[r14],#4 @ *K256++ - add r10,r10,r2 @ h+=X[i] - str r2,[sp,#1*4] - eor r2,r8,r9 - add r10,r10,r0,ror#6 @ h+=Sigma1(e) - and r2,r2,r7 - add r10,r10,r3 @ h+=K256[i] - eor r2,r2,r9 @ Ch(e,f,g) - eor r0,r11,r11,ror#11 - add r10,r10,r2 @ h+=Ch(e,f,g) -#if 17==31 - and r3,r3,#0xff - cmp r3,#0xf2 @ done? -#endif -#if 17<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r3,r11,r4 @ a^b, b^c in next round -#else - ldr r2,[sp,#3*4] @ from future BODY_16_xx - eor r3,r11,r4 @ a^b, b^c in next round - ldr r1,[sp,#0*4] @ from future BODY_16_xx -#endif - eor r0,r0,r11,ror#20 @ Sigma0(a) - and r12,r12,r3 @ (b^c)&=(a^b) - add r6,r6,r10 @ d+=h - eor r12,r12,r4 @ Maj(a,b,c) - add r10,r10,r0,ror#2 @ h+=Sigma0(a) - @ add r10,r10,r12 @ h+=Maj(a,b,c) - @ ldr r2,[sp,#3*4] @ 18 - @ ldr r1,[sp,#0*4] - mov r0,r2,ror#7 - add r10,r10,r12 @ h+=Maj(a,b,c) from the past - mov r12,r1,ror#17 - eor r0,r0,r2,ror#18 - eor r12,r12,r1,ror#19 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - ldr r2,[sp,#2*4] - eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) - ldr r1,[sp,#11*4] - - add r12,r12,r0 - eor r0,r6,r6,ror#5 @ from BODY_00_15 - add r2,r2,r12 - eor r0,r0,r6,ror#19 @ Sigma1(e) - add r2,r2,r1 @ X[i] - ldr r12,[r14],#4 @ *K256++ - add r9,r9,r2 @ h+=X[i] - str r2,[sp,#2*4] - eor r2,r7,r8 - add r9,r9,r0,ror#6 @ h+=Sigma1(e) - and r2,r2,r6 - add r9,r9,r12 @ h+=K256[i] - eor r2,r2,r8 @ Ch(e,f,g) - eor r0,r10,r10,ror#11 - add r9,r9,r2 @ h+=Ch(e,f,g) -#if 18==31 - and r12,r12,#0xff - cmp r12,#0xf2 @ done? -#endif -#if 18<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r12,r10,r11 @ a^b, b^c in next round -#else - ldr r2,[sp,#4*4] @ from future BODY_16_xx - eor r12,r10,r11 @ a^b, b^c in next round - ldr r1,[sp,#1*4] @ from future BODY_16_xx -#endif - eor r0,r0,r10,ror#20 @ Sigma0(a) - and r3,r3,r12 @ (b^c)&=(a^b) - add r5,r5,r9 @ d+=h - eor r3,r3,r11 @ Maj(a,b,c) - add r9,r9,r0,ror#2 @ h+=Sigma0(a) - @ add r9,r9,r3 @ h+=Maj(a,b,c) - @ ldr r2,[sp,#4*4] @ 19 - @ ldr r1,[sp,#1*4] - mov r0,r2,ror#7 - add r9,r9,r3 @ h+=Maj(a,b,c) from the past - mov r3,r1,ror#17 - eor r0,r0,r2,ror#18 - eor r3,r3,r1,ror#19 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - ldr r2,[sp,#3*4] - eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) - ldr r1,[sp,#12*4] - - add r3,r3,r0 - eor r0,r5,r5,ror#5 @ from BODY_00_15 - add r2,r2,r3 - eor r0,r0,r5,ror#19 @ Sigma1(e) - add r2,r2,r1 @ X[i] - ldr r3,[r14],#4 @ *K256++ - add r8,r8,r2 @ h+=X[i] - str r2,[sp,#3*4] - eor r2,r6,r7 - add r8,r8,r0,ror#6 @ h+=Sigma1(e) - and r2,r2,r5 - add r8,r8,r3 @ h+=K256[i] - eor r2,r2,r7 @ Ch(e,f,g) - eor r0,r9,r9,ror#11 - add r8,r8,r2 @ h+=Ch(e,f,g) -#if 19==31 - and r3,r3,#0xff - cmp r3,#0xf2 @ done? -#endif -#if 19<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r3,r9,r10 @ a^b, b^c in next round -#else - ldr r2,[sp,#5*4] @ from future BODY_16_xx - eor r3,r9,r10 @ a^b, b^c in next round - ldr r1,[sp,#2*4] @ from future BODY_16_xx -#endif - eor r0,r0,r9,ror#20 @ Sigma0(a) - and r12,r12,r3 @ (b^c)&=(a^b) - add r4,r4,r8 @ d+=h - eor r12,r12,r10 @ Maj(a,b,c) - add r8,r8,r0,ror#2 @ h+=Sigma0(a) - @ add r8,r8,r12 @ h+=Maj(a,b,c) - @ ldr r2,[sp,#5*4] @ 20 - @ ldr r1,[sp,#2*4] - mov r0,r2,ror#7 - add r8,r8,r12 @ h+=Maj(a,b,c) from the past - mov r12,r1,ror#17 - eor r0,r0,r2,ror#18 - eor r12,r12,r1,ror#19 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - ldr r2,[sp,#4*4] - eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) - ldr r1,[sp,#13*4] - - add r12,r12,r0 - eor r0,r4,r4,ror#5 @ from BODY_00_15 - add r2,r2,r12 - eor r0,r0,r4,ror#19 @ Sigma1(e) - add r2,r2,r1 @ X[i] - ldr r12,[r14],#4 @ *K256++ - add r7,r7,r2 @ h+=X[i] - str r2,[sp,#4*4] - eor r2,r5,r6 - add r7,r7,r0,ror#6 @ h+=Sigma1(e) - and r2,r2,r4 - add r7,r7,r12 @ h+=K256[i] - eor r2,r2,r6 @ Ch(e,f,g) - eor r0,r8,r8,ror#11 - add r7,r7,r2 @ h+=Ch(e,f,g) -#if 20==31 - and r12,r12,#0xff - cmp r12,#0xf2 @ done? -#endif -#if 20<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r12,r8,r9 @ a^b, b^c in next round -#else - ldr r2,[sp,#6*4] @ from future BODY_16_xx - eor r12,r8,r9 @ a^b, b^c in next round - ldr r1,[sp,#3*4] @ from future BODY_16_xx -#endif - eor r0,r0,r8,ror#20 @ Sigma0(a) - and r3,r3,r12 @ (b^c)&=(a^b) - add r11,r11,r7 @ d+=h - eor r3,r3,r9 @ Maj(a,b,c) - add r7,r7,r0,ror#2 @ h+=Sigma0(a) - @ add r7,r7,r3 @ h+=Maj(a,b,c) - @ ldr r2,[sp,#6*4] @ 21 - @ ldr r1,[sp,#3*4] - mov r0,r2,ror#7 - add r7,r7,r3 @ h+=Maj(a,b,c) from the past - mov r3,r1,ror#17 - eor r0,r0,r2,ror#18 - eor r3,r3,r1,ror#19 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - ldr r2,[sp,#5*4] - eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) - ldr r1,[sp,#14*4] - - add r3,r3,r0 - eor r0,r11,r11,ror#5 @ from BODY_00_15 - add r2,r2,r3 - eor r0,r0,r11,ror#19 @ Sigma1(e) - add r2,r2,r1 @ X[i] - ldr r3,[r14],#4 @ *K256++ - add r6,r6,r2 @ h+=X[i] - str r2,[sp,#5*4] - eor r2,r4,r5 - add r6,r6,r0,ror#6 @ h+=Sigma1(e) - and r2,r2,r11 - add r6,r6,r3 @ h+=K256[i] - eor r2,r2,r5 @ Ch(e,f,g) - eor r0,r7,r7,ror#11 - add r6,r6,r2 @ h+=Ch(e,f,g) -#if 21==31 - and r3,r3,#0xff - cmp r3,#0xf2 @ done? -#endif -#if 21<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r3,r7,r8 @ a^b, b^c in next round -#else - ldr r2,[sp,#7*4] @ from future BODY_16_xx - eor r3,r7,r8 @ a^b, b^c in next round - ldr r1,[sp,#4*4] @ from future BODY_16_xx -#endif - eor r0,r0,r7,ror#20 @ Sigma0(a) - and r12,r12,r3 @ (b^c)&=(a^b) - add r10,r10,r6 @ d+=h - eor r12,r12,r8 @ Maj(a,b,c) - add r6,r6,r0,ror#2 @ h+=Sigma0(a) - @ add r6,r6,r12 @ h+=Maj(a,b,c) - @ ldr r2,[sp,#7*4] @ 22 - @ ldr r1,[sp,#4*4] - mov r0,r2,ror#7 - add r6,r6,r12 @ h+=Maj(a,b,c) from the past - mov r12,r1,ror#17 - eor r0,r0,r2,ror#18 - eor r12,r12,r1,ror#19 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - ldr r2,[sp,#6*4] - eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) - ldr r1,[sp,#15*4] - - add r12,r12,r0 - eor r0,r10,r10,ror#5 @ from BODY_00_15 - add r2,r2,r12 - eor r0,r0,r10,ror#19 @ Sigma1(e) - add r2,r2,r1 @ X[i] - ldr r12,[r14],#4 @ *K256++ - add r5,r5,r2 @ h+=X[i] - str r2,[sp,#6*4] - eor r2,r11,r4 - add r5,r5,r0,ror#6 @ h+=Sigma1(e) - and r2,r2,r10 - add r5,r5,r12 @ h+=K256[i] - eor r2,r2,r4 @ Ch(e,f,g) - eor r0,r6,r6,ror#11 - add r5,r5,r2 @ h+=Ch(e,f,g) -#if 22==31 - and r12,r12,#0xff - cmp r12,#0xf2 @ done? -#endif -#if 22<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r12,r6,r7 @ a^b, b^c in next round -#else - ldr r2,[sp,#8*4] @ from future BODY_16_xx - eor r12,r6,r7 @ a^b, b^c in next round - ldr r1,[sp,#5*4] @ from future BODY_16_xx -#endif - eor r0,r0,r6,ror#20 @ Sigma0(a) - and r3,r3,r12 @ (b^c)&=(a^b) - add r9,r9,r5 @ d+=h - eor r3,r3,r7 @ Maj(a,b,c) - add r5,r5,r0,ror#2 @ h+=Sigma0(a) - @ add r5,r5,r3 @ h+=Maj(a,b,c) - @ ldr r2,[sp,#8*4] @ 23 - @ ldr r1,[sp,#5*4] - mov r0,r2,ror#7 - add r5,r5,r3 @ h+=Maj(a,b,c) from the past - mov r3,r1,ror#17 - eor r0,r0,r2,ror#18 - eor r3,r3,r1,ror#19 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - ldr r2,[sp,#7*4] - eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) - ldr r1,[sp,#0*4] - - add r3,r3,r0 - eor r0,r9,r9,ror#5 @ from BODY_00_15 - add r2,r2,r3 - eor r0,r0,r9,ror#19 @ Sigma1(e) - add r2,r2,r1 @ X[i] - ldr r3,[r14],#4 @ *K256++ - add r4,r4,r2 @ h+=X[i] - str r2,[sp,#7*4] - eor r2,r10,r11 - add r4,r4,r0,ror#6 @ h+=Sigma1(e) - and r2,r2,r9 - add r4,r4,r3 @ h+=K256[i] - eor r2,r2,r11 @ Ch(e,f,g) - eor r0,r5,r5,ror#11 - add r4,r4,r2 @ h+=Ch(e,f,g) -#if 23==31 - and r3,r3,#0xff - cmp r3,#0xf2 @ done? -#endif -#if 23<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r3,r5,r6 @ a^b, b^c in next round -#else - ldr r2,[sp,#9*4] @ from future BODY_16_xx - eor r3,r5,r6 @ a^b, b^c in next round - ldr r1,[sp,#6*4] @ from future BODY_16_xx -#endif - eor r0,r0,r5,ror#20 @ Sigma0(a) - and r12,r12,r3 @ (b^c)&=(a^b) - add r8,r8,r4 @ d+=h - eor r12,r12,r6 @ Maj(a,b,c) - add r4,r4,r0,ror#2 @ h+=Sigma0(a) - @ add r4,r4,r12 @ h+=Maj(a,b,c) - @ ldr r2,[sp,#9*4] @ 24 - @ ldr r1,[sp,#6*4] - mov r0,r2,ror#7 - add r4,r4,r12 @ h+=Maj(a,b,c) from the past - mov r12,r1,ror#17 - eor r0,r0,r2,ror#18 - eor r12,r12,r1,ror#19 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - ldr r2,[sp,#8*4] - eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) - ldr r1,[sp,#1*4] - - add r12,r12,r0 - eor r0,r8,r8,ror#5 @ from BODY_00_15 - add r2,r2,r12 - eor r0,r0,r8,ror#19 @ Sigma1(e) - add r2,r2,r1 @ X[i] - ldr r12,[r14],#4 @ *K256++ - add r11,r11,r2 @ h+=X[i] - str r2,[sp,#8*4] - eor r2,r9,r10 - add r11,r11,r0,ror#6 @ h+=Sigma1(e) - and r2,r2,r8 - add r11,r11,r12 @ h+=K256[i] - eor r2,r2,r10 @ Ch(e,f,g) - eor r0,r4,r4,ror#11 - add r11,r11,r2 @ h+=Ch(e,f,g) -#if 24==31 - and r12,r12,#0xff - cmp r12,#0xf2 @ done? -#endif -#if 24<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r12,r4,r5 @ a^b, b^c in next round -#else - ldr r2,[sp,#10*4] @ from future BODY_16_xx - eor r12,r4,r5 @ a^b, b^c in next round - ldr r1,[sp,#7*4] @ from future BODY_16_xx -#endif - eor r0,r0,r4,ror#20 @ Sigma0(a) - and r3,r3,r12 @ (b^c)&=(a^b) - add r7,r7,r11 @ d+=h - eor r3,r3,r5 @ Maj(a,b,c) - add r11,r11,r0,ror#2 @ h+=Sigma0(a) - @ add r11,r11,r3 @ h+=Maj(a,b,c) - @ ldr r2,[sp,#10*4] @ 25 - @ ldr r1,[sp,#7*4] - mov r0,r2,ror#7 - add r11,r11,r3 @ h+=Maj(a,b,c) from the past - mov r3,r1,ror#17 - eor r0,r0,r2,ror#18 - eor r3,r3,r1,ror#19 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - ldr r2,[sp,#9*4] - eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) - ldr r1,[sp,#2*4] - - add r3,r3,r0 - eor r0,r7,r7,ror#5 @ from BODY_00_15 - add r2,r2,r3 - eor r0,r0,r7,ror#19 @ Sigma1(e) - add r2,r2,r1 @ X[i] - ldr r3,[r14],#4 @ *K256++ - add r10,r10,r2 @ h+=X[i] - str r2,[sp,#9*4] - eor r2,r8,r9 - add r10,r10,r0,ror#6 @ h+=Sigma1(e) - and r2,r2,r7 - add r10,r10,r3 @ h+=K256[i] - eor r2,r2,r9 @ Ch(e,f,g) - eor r0,r11,r11,ror#11 - add r10,r10,r2 @ h+=Ch(e,f,g) -#if 25==31 - and r3,r3,#0xff - cmp r3,#0xf2 @ done? -#endif -#if 25<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r3,r11,r4 @ a^b, b^c in next round -#else - ldr r2,[sp,#11*4] @ from future BODY_16_xx - eor r3,r11,r4 @ a^b, b^c in next round - ldr r1,[sp,#8*4] @ from future BODY_16_xx -#endif - eor r0,r0,r11,ror#20 @ Sigma0(a) - and r12,r12,r3 @ (b^c)&=(a^b) - add r6,r6,r10 @ d+=h - eor r12,r12,r4 @ Maj(a,b,c) - add r10,r10,r0,ror#2 @ h+=Sigma0(a) - @ add r10,r10,r12 @ h+=Maj(a,b,c) - @ ldr r2,[sp,#11*4] @ 26 - @ ldr r1,[sp,#8*4] - mov r0,r2,ror#7 - add r10,r10,r12 @ h+=Maj(a,b,c) from the past - mov r12,r1,ror#17 - eor r0,r0,r2,ror#18 - eor r12,r12,r1,ror#19 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - ldr r2,[sp,#10*4] - eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) - ldr r1,[sp,#3*4] - - add r12,r12,r0 - eor r0,r6,r6,ror#5 @ from BODY_00_15 - add r2,r2,r12 - eor r0,r0,r6,ror#19 @ Sigma1(e) - add r2,r2,r1 @ X[i] - ldr r12,[r14],#4 @ *K256++ - add r9,r9,r2 @ h+=X[i] - str r2,[sp,#10*4] - eor r2,r7,r8 - add r9,r9,r0,ror#6 @ h+=Sigma1(e) - and r2,r2,r6 - add r9,r9,r12 @ h+=K256[i] - eor r2,r2,r8 @ Ch(e,f,g) - eor r0,r10,r10,ror#11 - add r9,r9,r2 @ h+=Ch(e,f,g) -#if 26==31 - and r12,r12,#0xff - cmp r12,#0xf2 @ done? -#endif -#if 26<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r12,r10,r11 @ a^b, b^c in next round -#else - ldr r2,[sp,#12*4] @ from future BODY_16_xx - eor r12,r10,r11 @ a^b, b^c in next round - ldr r1,[sp,#9*4] @ from future BODY_16_xx -#endif - eor r0,r0,r10,ror#20 @ Sigma0(a) - and r3,r3,r12 @ (b^c)&=(a^b) - add r5,r5,r9 @ d+=h - eor r3,r3,r11 @ Maj(a,b,c) - add r9,r9,r0,ror#2 @ h+=Sigma0(a) - @ add r9,r9,r3 @ h+=Maj(a,b,c) - @ ldr r2,[sp,#12*4] @ 27 - @ ldr r1,[sp,#9*4] - mov r0,r2,ror#7 - add r9,r9,r3 @ h+=Maj(a,b,c) from the past - mov r3,r1,ror#17 - eor r0,r0,r2,ror#18 - eor r3,r3,r1,ror#19 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - ldr r2,[sp,#11*4] - eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) - ldr r1,[sp,#4*4] - - add r3,r3,r0 - eor r0,r5,r5,ror#5 @ from BODY_00_15 - add r2,r2,r3 - eor r0,r0,r5,ror#19 @ Sigma1(e) - add r2,r2,r1 @ X[i] - ldr r3,[r14],#4 @ *K256++ - add r8,r8,r2 @ h+=X[i] - str r2,[sp,#11*4] - eor r2,r6,r7 - add r8,r8,r0,ror#6 @ h+=Sigma1(e) - and r2,r2,r5 - add r8,r8,r3 @ h+=K256[i] - eor r2,r2,r7 @ Ch(e,f,g) - eor r0,r9,r9,ror#11 - add r8,r8,r2 @ h+=Ch(e,f,g) -#if 27==31 - and r3,r3,#0xff - cmp r3,#0xf2 @ done? -#endif -#if 27<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r3,r9,r10 @ a^b, b^c in next round -#else - ldr r2,[sp,#13*4] @ from future BODY_16_xx - eor r3,r9,r10 @ a^b, b^c in next round - ldr r1,[sp,#10*4] @ from future BODY_16_xx -#endif - eor r0,r0,r9,ror#20 @ Sigma0(a) - and r12,r12,r3 @ (b^c)&=(a^b) - add r4,r4,r8 @ d+=h - eor r12,r12,r10 @ Maj(a,b,c) - add r8,r8,r0,ror#2 @ h+=Sigma0(a) - @ add r8,r8,r12 @ h+=Maj(a,b,c) - @ ldr r2,[sp,#13*4] @ 28 - @ ldr r1,[sp,#10*4] - mov r0,r2,ror#7 - add r8,r8,r12 @ h+=Maj(a,b,c) from the past - mov r12,r1,ror#17 - eor r0,r0,r2,ror#18 - eor r12,r12,r1,ror#19 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - ldr r2,[sp,#12*4] - eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) - ldr r1,[sp,#5*4] - - add r12,r12,r0 - eor r0,r4,r4,ror#5 @ from BODY_00_15 - add r2,r2,r12 - eor r0,r0,r4,ror#19 @ Sigma1(e) - add r2,r2,r1 @ X[i] - ldr r12,[r14],#4 @ *K256++ - add r7,r7,r2 @ h+=X[i] - str r2,[sp,#12*4] - eor r2,r5,r6 - add r7,r7,r0,ror#6 @ h+=Sigma1(e) - and r2,r2,r4 - add r7,r7,r12 @ h+=K256[i] - eor r2,r2,r6 @ Ch(e,f,g) - eor r0,r8,r8,ror#11 - add r7,r7,r2 @ h+=Ch(e,f,g) -#if 28==31 - and r12,r12,#0xff - cmp r12,#0xf2 @ done? -#endif -#if 28<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r12,r8,r9 @ a^b, b^c in next round -#else - ldr r2,[sp,#14*4] @ from future BODY_16_xx - eor r12,r8,r9 @ a^b, b^c in next round - ldr r1,[sp,#11*4] @ from future BODY_16_xx -#endif - eor r0,r0,r8,ror#20 @ Sigma0(a) - and r3,r3,r12 @ (b^c)&=(a^b) - add r11,r11,r7 @ d+=h - eor r3,r3,r9 @ Maj(a,b,c) - add r7,r7,r0,ror#2 @ h+=Sigma0(a) - @ add r7,r7,r3 @ h+=Maj(a,b,c) - @ ldr r2,[sp,#14*4] @ 29 - @ ldr r1,[sp,#11*4] - mov r0,r2,ror#7 - add r7,r7,r3 @ h+=Maj(a,b,c) from the past - mov r3,r1,ror#17 - eor r0,r0,r2,ror#18 - eor r3,r3,r1,ror#19 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - ldr r2,[sp,#13*4] - eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) - ldr r1,[sp,#6*4] - - add r3,r3,r0 - eor r0,r11,r11,ror#5 @ from BODY_00_15 - add r2,r2,r3 - eor r0,r0,r11,ror#19 @ Sigma1(e) - add r2,r2,r1 @ X[i] - ldr r3,[r14],#4 @ *K256++ - add r6,r6,r2 @ h+=X[i] - str r2,[sp,#13*4] - eor r2,r4,r5 - add r6,r6,r0,ror#6 @ h+=Sigma1(e) - and r2,r2,r11 - add r6,r6,r3 @ h+=K256[i] - eor r2,r2,r5 @ Ch(e,f,g) - eor r0,r7,r7,ror#11 - add r6,r6,r2 @ h+=Ch(e,f,g) -#if 29==31 - and r3,r3,#0xff - cmp r3,#0xf2 @ done? -#endif -#if 29<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r3,r7,r8 @ a^b, b^c in next round -#else - ldr r2,[sp,#15*4] @ from future BODY_16_xx - eor r3,r7,r8 @ a^b, b^c in next round - ldr r1,[sp,#12*4] @ from future BODY_16_xx -#endif - eor r0,r0,r7,ror#20 @ Sigma0(a) - and r12,r12,r3 @ (b^c)&=(a^b) - add r10,r10,r6 @ d+=h - eor r12,r12,r8 @ Maj(a,b,c) - add r6,r6,r0,ror#2 @ h+=Sigma0(a) - @ add r6,r6,r12 @ h+=Maj(a,b,c) - @ ldr r2,[sp,#15*4] @ 30 - @ ldr r1,[sp,#12*4] - mov r0,r2,ror#7 - add r6,r6,r12 @ h+=Maj(a,b,c) from the past - mov r12,r1,ror#17 - eor r0,r0,r2,ror#18 - eor r12,r12,r1,ror#19 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - ldr r2,[sp,#14*4] - eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) - ldr r1,[sp,#7*4] - - add r12,r12,r0 - eor r0,r10,r10,ror#5 @ from BODY_00_15 - add r2,r2,r12 - eor r0,r0,r10,ror#19 @ Sigma1(e) - add r2,r2,r1 @ X[i] - ldr r12,[r14],#4 @ *K256++ - add r5,r5,r2 @ h+=X[i] - str r2,[sp,#14*4] - eor r2,r11,r4 - add r5,r5,r0,ror#6 @ h+=Sigma1(e) - and r2,r2,r10 - add r5,r5,r12 @ h+=K256[i] - eor r2,r2,r4 @ Ch(e,f,g) - eor r0,r6,r6,ror#11 - add r5,r5,r2 @ h+=Ch(e,f,g) -#if 30==31 - and r12,r12,#0xff - cmp r12,#0xf2 @ done? -#endif -#if 30<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r12,r6,r7 @ a^b, b^c in next round -#else - ldr r2,[sp,#0*4] @ from future BODY_16_xx - eor r12,r6,r7 @ a^b, b^c in next round - ldr r1,[sp,#13*4] @ from future BODY_16_xx -#endif - eor r0,r0,r6,ror#20 @ Sigma0(a) - and r3,r3,r12 @ (b^c)&=(a^b) - add r9,r9,r5 @ d+=h - eor r3,r3,r7 @ Maj(a,b,c) - add r5,r5,r0,ror#2 @ h+=Sigma0(a) - @ add r5,r5,r3 @ h+=Maj(a,b,c) - @ ldr r2,[sp,#0*4] @ 31 - @ ldr r1,[sp,#13*4] - mov r0,r2,ror#7 - add r5,r5,r3 @ h+=Maj(a,b,c) from the past - mov r3,r1,ror#17 - eor r0,r0,r2,ror#18 - eor r3,r3,r1,ror#19 - eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) - ldr r2,[sp,#15*4] - eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) - ldr r1,[sp,#8*4] - - add r3,r3,r0 - eor r0,r9,r9,ror#5 @ from BODY_00_15 - add r2,r2,r3 - eor r0,r0,r9,ror#19 @ Sigma1(e) - add r2,r2,r1 @ X[i] - ldr r3,[r14],#4 @ *K256++ - add r4,r4,r2 @ h+=X[i] - str r2,[sp,#15*4] - eor r2,r10,r11 - add r4,r4,r0,ror#6 @ h+=Sigma1(e) - and r2,r2,r9 - add r4,r4,r3 @ h+=K256[i] - eor r2,r2,r11 @ Ch(e,f,g) - eor r0,r5,r5,ror#11 - add r4,r4,r2 @ h+=Ch(e,f,g) -#if 31==31 - and r3,r3,#0xff - cmp r3,#0xf2 @ done? -#endif -#if 31<15 -# if __ARM_ARCH__>=7 - ldr r2,[r1],#4 @ prefetch -# else - ldrb r2,[r1,#3] -# endif - eor r3,r5,r6 @ a^b, b^c in next round -#else - ldr r2,[sp,#1*4] @ from future BODY_16_xx - eor r3,r5,r6 @ a^b, b^c in next round - ldr r1,[sp,#14*4] @ from future BODY_16_xx -#endif - eor r0,r0,r5,ror#20 @ Sigma0(a) - and r12,r12,r3 @ (b^c)&=(a^b) - add r8,r8,r4 @ d+=h - eor r12,r12,r6 @ Maj(a,b,c) - add r4,r4,r0,ror#2 @ h+=Sigma0(a) - @ add r4,r4,r12 @ h+=Maj(a,b,c) -#if __ARM_ARCH__>=7 - ite eq @ Thumb2 thing, sanity check in ARM -#endif - ldreq r3,[sp,#16*4] @ pull ctx - bne .Lrounds_16_xx - - add r4,r4,r12 @ h+=Maj(a,b,c) from the past - ldr r0,[r3,#0] - ldr r2,[r3,#4] - ldr r12,[r3,#8] - add r4,r4,r0 - ldr r0,[r3,#12] - add r5,r5,r2 - ldr r2,[r3,#16] - add r6,r6,r12 - ldr r12,[r3,#20] - add r7,r7,r0 - ldr r0,[r3,#24] - add r8,r8,r2 - ldr r2,[r3,#28] - add r9,r9,r12 - ldr r1,[sp,#17*4] @ pull inp - ldr r12,[sp,#18*4] @ pull inp+len - add r10,r10,r0 - add r11,r11,r2 - stmia r3,{r4,r5,r6,r7,r8,r9,r10,r11} - cmp r1,r12 - sub r14,r14,#256 @ rewind Ktbl - bne .Loop - - add sp,sp,#19*4 @ destroy frame -#if __ARM_ARCH__>=5 - ldmia sp!,{r4-r11,pc} -#else - ldmia sp!,{r4-r11,lr} - tst lr,#1 - moveq pc,lr @ be binary compatible with V4, yet - .word 0xe12fff1e @ interoperable with Thumb ISA:-) -#endif -.size sha256_block_data_order,.-sha256_block_data_order -#if __ARM_MAX_ARCH__>=7 -.arch armv7-a -.fpu neon - -.global sha256_block_data_order_neon -.type sha256_block_data_order_neon,%function -.align 4 -sha256_block_data_order_neon: -.LNEON: - stmdb sp!,{r4-r12,lr} - - sub r11,sp,#16*4+16 - adr r14,.Lsha256_block_data_order - sub r14,r14,#.Lsha256_block_data_order-K256 - bic r11,r11,#15 @ align for 128-bit stores - mov r12,sp - mov sp,r11 @ alloca - add r2,r1,r2,lsl#6 @ len to point at the end of inp - - vld1.8 {q0},[r1]! - vld1.8 {q1},[r1]! - vld1.8 {q2},[r1]! - vld1.8 {q3},[r1]! - vld1.32 {q8},[r14,:128]! - vld1.32 {q9},[r14,:128]! - vld1.32 {q10},[r14,:128]! - vld1.32 {q11},[r14,:128]! - vrev32.8 q0,q0 @ yes, even on - str r0,[sp,#64] - vrev32.8 q1,q1 @ big-endian - str r1,[sp,#68] - mov r1,sp - vrev32.8 q2,q2 - str r2,[sp,#72] - vrev32.8 q3,q3 - str r12,[sp,#76] @ save original sp - vadd.i32 q8,q8,q0 - vadd.i32 q9,q9,q1 - vst1.32 {q8},[r1,:128]! - vadd.i32 q10,q10,q2 - vst1.32 {q9},[r1,:128]! - vadd.i32 q11,q11,q3 - vst1.32 {q10},[r1,:128]! - vst1.32 {q11},[r1,:128]! - - ldmia r0,{r4-r11} - sub r1,r1,#64 - ldr r2,[sp,#0] - eor r12,r12,r12 - eor r3,r5,r6 - b .L_00_48 - -.align 4 -.L_00_48: - vext.8 q8,q0,q1,#4 - add r11,r11,r2 - eor r2,r9,r10 - eor r0,r8,r8,ror#5 - vext.8 q9,q2,q3,#4 - add r4,r4,r12 - and r2,r2,r8 - eor r12,r0,r8,ror#19 - vshr.u32 q10,q8,#7 - eor r0,r4,r4,ror#11 - eor r2,r2,r10 - vadd.i32 q0,q0,q9 - add r11,r11,r12,ror#6 - eor r12,r4,r5 - vshr.u32 q9,q8,#3 - eor r0,r0,r4,ror#20 - add r11,r11,r2 - vsli.32 q10,q8,#25 - ldr r2,[sp,#4] - and r3,r3,r12 - vshr.u32 q11,q8,#18 - add r7,r7,r11 - add r11,r11,r0,ror#2 - eor r3,r3,r5 - veor q9,q9,q10 - add r10,r10,r2 - vsli.32 q11,q8,#14 - eor r2,r8,r9 - eor r0,r7,r7,ror#5 - vshr.u32 d24,d7,#17 - add r11,r11,r3 - and r2,r2,r7 - veor q9,q9,q11 - eor r3,r0,r7,ror#19 - eor r0,r11,r11,ror#11 - vsli.32 d24,d7,#15 - eor r2,r2,r9 - add r10,r10,r3,ror#6 - vshr.u32 d25,d7,#10 - eor r3,r11,r4 - eor r0,r0,r11,ror#20 - vadd.i32 q0,q0,q9 - add r10,r10,r2 - ldr r2,[sp,#8] - veor d25,d25,d24 - and r12,r12,r3 - add r6,r6,r10 - vshr.u32 d24,d7,#19 - add r10,r10,r0,ror#2 - eor r12,r12,r4 - vsli.32 d24,d7,#13 - add r9,r9,r2 - eor r2,r7,r8 - veor d25,d25,d24 - eor r0,r6,r6,ror#5 - add r10,r10,r12 - vadd.i32 d0,d0,d25 - and r2,r2,r6 - eor r12,r0,r6,ror#19 - vshr.u32 d24,d0,#17 - eor r0,r10,r10,ror#11 - eor r2,r2,r8 - vsli.32 d24,d0,#15 - add r9,r9,r12,ror#6 - eor r12,r10,r11 - vshr.u32 d25,d0,#10 - eor r0,r0,r10,ror#20 - add r9,r9,r2 - veor d25,d25,d24 - ldr r2,[sp,#12] - and r3,r3,r12 - vshr.u32 d24,d0,#19 - add r5,r5,r9 - add r9,r9,r0,ror#2 - eor r3,r3,r11 - vld1.32 {q8},[r14,:128]! - add r8,r8,r2 - vsli.32 d24,d0,#13 - eor r2,r6,r7 - eor r0,r5,r5,ror#5 - veor d25,d25,d24 - add r9,r9,r3 - and r2,r2,r5 - vadd.i32 d1,d1,d25 - eor r3,r0,r5,ror#19 - eor r0,r9,r9,ror#11 - vadd.i32 q8,q8,q0 - eor r2,r2,r7 - add r8,r8,r3,ror#6 - eor r3,r9,r10 - eor r0,r0,r9,ror#20 - add r8,r8,r2 - ldr r2,[sp,#16] - and r12,r12,r3 - add r4,r4,r8 - vst1.32 {q8},[r1,:128]! - add r8,r8,r0,ror#2 - eor r12,r12,r10 - vext.8 q8,q1,q2,#4 - add r7,r7,r2 - eor r2,r5,r6 - eor r0,r4,r4,ror#5 - vext.8 q9,q3,q0,#4 - add r8,r8,r12 - and r2,r2,r4 - eor r12,r0,r4,ror#19 - vshr.u32 q10,q8,#7 - eor r0,r8,r8,ror#11 - eor r2,r2,r6 - vadd.i32 q1,q1,q9 - add r7,r7,r12,ror#6 - eor r12,r8,r9 - vshr.u32 q9,q8,#3 - eor r0,r0,r8,ror#20 - add r7,r7,r2 - vsli.32 q10,q8,#25 - ldr r2,[sp,#20] - and r3,r3,r12 - vshr.u32 q11,q8,#18 - add r11,r11,r7 - add r7,r7,r0,ror#2 - eor r3,r3,r9 - veor q9,q9,q10 - add r6,r6,r2 - vsli.32 q11,q8,#14 - eor r2,r4,r5 - eor r0,r11,r11,ror#5 - vshr.u32 d24,d1,#17 - add r7,r7,r3 - and r2,r2,r11 - veor q9,q9,q11 - eor r3,r0,r11,ror#19 - eor r0,r7,r7,ror#11 - vsli.32 d24,d1,#15 - eor r2,r2,r5 - add r6,r6,r3,ror#6 - vshr.u32 d25,d1,#10 - eor r3,r7,r8 - eor r0,r0,r7,ror#20 - vadd.i32 q1,q1,q9 - add r6,r6,r2 - ldr r2,[sp,#24] - veor d25,d25,d24 - and r12,r12,r3 - add r10,r10,r6 - vshr.u32 d24,d1,#19 - add r6,r6,r0,ror#2 - eor r12,r12,r8 - vsli.32 d24,d1,#13 - add r5,r5,r2 - eor r2,r11,r4 - veor d25,d25,d24 - eor r0,r10,r10,ror#5 - add r6,r6,r12 - vadd.i32 d2,d2,d25 - and r2,r2,r10 - eor r12,r0,r10,ror#19 - vshr.u32 d24,d2,#17 - eor r0,r6,r6,ror#11 - eor r2,r2,r4 - vsli.32 d24,d2,#15 - add r5,r5,r12,ror#6 - eor r12,r6,r7 - vshr.u32 d25,d2,#10 - eor r0,r0,r6,ror#20 - add r5,r5,r2 - veor d25,d25,d24 - ldr r2,[sp,#28] - and r3,r3,r12 - vshr.u32 d24,d2,#19 - add r9,r9,r5 - add r5,r5,r0,ror#2 - eor r3,r3,r7 - vld1.32 {q8},[r14,:128]! - add r4,r4,r2 - vsli.32 d24,d2,#13 - eor r2,r10,r11 - eor r0,r9,r9,ror#5 - veor d25,d25,d24 - add r5,r5,r3 - and r2,r2,r9 - vadd.i32 d3,d3,d25 - eor r3,r0,r9,ror#19 - eor r0,r5,r5,ror#11 - vadd.i32 q8,q8,q1 - eor r2,r2,r11 - add r4,r4,r3,ror#6 - eor r3,r5,r6 - eor r0,r0,r5,ror#20 - add r4,r4,r2 - ldr r2,[sp,#32] - and r12,r12,r3 - add r8,r8,r4 - vst1.32 {q8},[r1,:128]! - add r4,r4,r0,ror#2 - eor r12,r12,r6 - vext.8 q8,q2,q3,#4 - add r11,r11,r2 - eor r2,r9,r10 - eor r0,r8,r8,ror#5 - vext.8 q9,q0,q1,#4 - add r4,r4,r12 - and r2,r2,r8 - eor r12,r0,r8,ror#19 - vshr.u32 q10,q8,#7 - eor r0,r4,r4,ror#11 - eor r2,r2,r10 - vadd.i32 q2,q2,q9 - add r11,r11,r12,ror#6 - eor r12,r4,r5 - vshr.u32 q9,q8,#3 - eor r0,r0,r4,ror#20 - add r11,r11,r2 - vsli.32 q10,q8,#25 - ldr r2,[sp,#36] - and r3,r3,r12 - vshr.u32 q11,q8,#18 - add r7,r7,r11 - add r11,r11,r0,ror#2 - eor r3,r3,r5 - veor q9,q9,q10 - add r10,r10,r2 - vsli.32 q11,q8,#14 - eor r2,r8,r9 - eor r0,r7,r7,ror#5 - vshr.u32 d24,d3,#17 - add r11,r11,r3 - and r2,r2,r7 - veor q9,q9,q11 - eor r3,r0,r7,ror#19 - eor r0,r11,r11,ror#11 - vsli.32 d24,d3,#15 - eor r2,r2,r9 - add r10,r10,r3,ror#6 - vshr.u32 d25,d3,#10 - eor r3,r11,r4 - eor r0,r0,r11,ror#20 - vadd.i32 q2,q2,q9 - add r10,r10,r2 - ldr r2,[sp,#40] - veor d25,d25,d24 - and r12,r12,r3 - add r6,r6,r10 - vshr.u32 d24,d3,#19 - add r10,r10,r0,ror#2 - eor r12,r12,r4 - vsli.32 d24,d3,#13 - add r9,r9,r2 - eor r2,r7,r8 - veor d25,d25,d24 - eor r0,r6,r6,ror#5 - add r10,r10,r12 - vadd.i32 d4,d4,d25 - and r2,r2,r6 - eor r12,r0,r6,ror#19 - vshr.u32 d24,d4,#17 - eor r0,r10,r10,ror#11 - eor r2,r2,r8 - vsli.32 d24,d4,#15 - add r9,r9,r12,ror#6 - eor r12,r10,r11 - vshr.u32 d25,d4,#10 - eor r0,r0,r10,ror#20 - add r9,r9,r2 - veor d25,d25,d24 - ldr r2,[sp,#44] - and r3,r3,r12 - vshr.u32 d24,d4,#19 - add r5,r5,r9 - add r9,r9,r0,ror#2 - eor r3,r3,r11 - vld1.32 {q8},[r14,:128]! - add r8,r8,r2 - vsli.32 d24,d4,#13 - eor r2,r6,r7 - eor r0,r5,r5,ror#5 - veor d25,d25,d24 - add r9,r9,r3 - and r2,r2,r5 - vadd.i32 d5,d5,d25 - eor r3,r0,r5,ror#19 - eor r0,r9,r9,ror#11 - vadd.i32 q8,q8,q2 - eor r2,r2,r7 - add r8,r8,r3,ror#6 - eor r3,r9,r10 - eor r0,r0,r9,ror#20 - add r8,r8,r2 - ldr r2,[sp,#48] - and r12,r12,r3 - add r4,r4,r8 - vst1.32 {q8},[r1,:128]! - add r8,r8,r0,ror#2 - eor r12,r12,r10 - vext.8 q8,q3,q0,#4 - add r7,r7,r2 - eor r2,r5,r6 - eor r0,r4,r4,ror#5 - vext.8 q9,q1,q2,#4 - add r8,r8,r12 - and r2,r2,r4 - eor r12,r0,r4,ror#19 - vshr.u32 q10,q8,#7 - eor r0,r8,r8,ror#11 - eor r2,r2,r6 - vadd.i32 q3,q3,q9 - add r7,r7,r12,ror#6 - eor r12,r8,r9 - vshr.u32 q9,q8,#3 - eor r0,r0,r8,ror#20 - add r7,r7,r2 - vsli.32 q10,q8,#25 - ldr r2,[sp,#52] - and r3,r3,r12 - vshr.u32 q11,q8,#18 - add r11,r11,r7 - add r7,r7,r0,ror#2 - eor r3,r3,r9 - veor q9,q9,q10 - add r6,r6,r2 - vsli.32 q11,q8,#14 - eor r2,r4,r5 - eor r0,r11,r11,ror#5 - vshr.u32 d24,d5,#17 - add r7,r7,r3 - and r2,r2,r11 - veor q9,q9,q11 - eor r3,r0,r11,ror#19 - eor r0,r7,r7,ror#11 - vsli.32 d24,d5,#15 - eor r2,r2,r5 - add r6,r6,r3,ror#6 - vshr.u32 d25,d5,#10 - eor r3,r7,r8 - eor r0,r0,r7,ror#20 - vadd.i32 q3,q3,q9 - add r6,r6,r2 - ldr r2,[sp,#56] - veor d25,d25,d24 - and r12,r12,r3 - add r10,r10,r6 - vshr.u32 d24,d5,#19 - add r6,r6,r0,ror#2 - eor r12,r12,r8 - vsli.32 d24,d5,#13 - add r5,r5,r2 - eor r2,r11,r4 - veor d25,d25,d24 - eor r0,r10,r10,ror#5 - add r6,r6,r12 - vadd.i32 d6,d6,d25 - and r2,r2,r10 - eor r12,r0,r10,ror#19 - vshr.u32 d24,d6,#17 - eor r0,r6,r6,ror#11 - eor r2,r2,r4 - vsli.32 d24,d6,#15 - add r5,r5,r12,ror#6 - eor r12,r6,r7 - vshr.u32 d25,d6,#10 - eor r0,r0,r6,ror#20 - add r5,r5,r2 - veor d25,d25,d24 - ldr r2,[sp,#60] - and r3,r3,r12 - vshr.u32 d24,d6,#19 - add r9,r9,r5 - add r5,r5,r0,ror#2 - eor r3,r3,r7 - vld1.32 {q8},[r14,:128]! - add r4,r4,r2 - vsli.32 d24,d6,#13 - eor r2,r10,r11 - eor r0,r9,r9,ror#5 - veor d25,d25,d24 - add r5,r5,r3 - and r2,r2,r9 - vadd.i32 d7,d7,d25 - eor r3,r0,r9,ror#19 - eor r0,r5,r5,ror#11 - vadd.i32 q8,q8,q3 - eor r2,r2,r11 - add r4,r4,r3,ror#6 - eor r3,r5,r6 - eor r0,r0,r5,ror#20 - add r4,r4,r2 - ldr r2,[r14] - and r12,r12,r3 - add r8,r8,r4 - vst1.32 {q8},[r1,:128]! - add r4,r4,r0,ror#2 - eor r12,r12,r6 - teq r2,#0 @ check for K256 terminator - ldr r2,[sp,#0] - sub r1,r1,#64 - bne .L_00_48 - - ldr r1,[sp,#68] - ldr r0,[sp,#72] - sub r14,r14,#256 @ rewind r14 - teq r1,r0 - it eq - subeq r1,r1,#64 @ avoid SEGV - vld1.8 {q0},[r1]! @ load next input block - vld1.8 {q1},[r1]! - vld1.8 {q2},[r1]! - vld1.8 {q3},[r1]! - it ne - strne r1,[sp,#68] - mov r1,sp - add r11,r11,r2 - eor r2,r9,r10 - eor r0,r8,r8,ror#5 - add r4,r4,r12 - vld1.32 {q8},[r14,:128]! - and r2,r2,r8 - eor r12,r0,r8,ror#19 - eor r0,r4,r4,ror#11 - eor r2,r2,r10 - vrev32.8 q0,q0 - add r11,r11,r12,ror#6 - eor r12,r4,r5 - eor r0,r0,r4,ror#20 - add r11,r11,r2 - vadd.i32 q8,q8,q0 - ldr r2,[sp,#4] - and r3,r3,r12 - add r7,r7,r11 - add r11,r11,r0,ror#2 - eor r3,r3,r5 - add r10,r10,r2 - eor r2,r8,r9 - eor r0,r7,r7,ror#5 - add r11,r11,r3 - and r2,r2,r7 - eor r3,r0,r7,ror#19 - eor r0,r11,r11,ror#11 - eor r2,r2,r9 - add r10,r10,r3,ror#6 - eor r3,r11,r4 - eor r0,r0,r11,ror#20 - add r10,r10,r2 - ldr r2,[sp,#8] - and r12,r12,r3 - add r6,r6,r10 - add r10,r10,r0,ror#2 - eor r12,r12,r4 - add r9,r9,r2 - eor r2,r7,r8 - eor r0,r6,r6,ror#5 - add r10,r10,r12 - and r2,r2,r6 - eor r12,r0,r6,ror#19 - eor r0,r10,r10,ror#11 - eor r2,r2,r8 - add r9,r9,r12,ror#6 - eor r12,r10,r11 - eor r0,r0,r10,ror#20 - add r9,r9,r2 - ldr r2,[sp,#12] - and r3,r3,r12 - add r5,r5,r9 - add r9,r9,r0,ror#2 - eor r3,r3,r11 - add r8,r8,r2 - eor r2,r6,r7 - eor r0,r5,r5,ror#5 - add r9,r9,r3 - and r2,r2,r5 - eor r3,r0,r5,ror#19 - eor r0,r9,r9,ror#11 - eor r2,r2,r7 - add r8,r8,r3,ror#6 - eor r3,r9,r10 - eor r0,r0,r9,ror#20 - add r8,r8,r2 - ldr r2,[sp,#16] - and r12,r12,r3 - add r4,r4,r8 - add r8,r8,r0,ror#2 - eor r12,r12,r10 - vst1.32 {q8},[r1,:128]! - add r7,r7,r2 - eor r2,r5,r6 - eor r0,r4,r4,ror#5 - add r8,r8,r12 - vld1.32 {q8},[r14,:128]! - and r2,r2,r4 - eor r12,r0,r4,ror#19 - eor r0,r8,r8,ror#11 - eor r2,r2,r6 - vrev32.8 q1,q1 - add r7,r7,r12,ror#6 - eor r12,r8,r9 - eor r0,r0,r8,ror#20 - add r7,r7,r2 - vadd.i32 q8,q8,q1 - ldr r2,[sp,#20] - and r3,r3,r12 - add r11,r11,r7 - add r7,r7,r0,ror#2 - eor r3,r3,r9 - add r6,r6,r2 - eor r2,r4,r5 - eor r0,r11,r11,ror#5 - add r7,r7,r3 - and r2,r2,r11 - eor r3,r0,r11,ror#19 - eor r0,r7,r7,ror#11 - eor r2,r2,r5 - add r6,r6,r3,ror#6 - eor r3,r7,r8 - eor r0,r0,r7,ror#20 - add r6,r6,r2 - ldr r2,[sp,#24] - and r12,r12,r3 - add r10,r10,r6 - add r6,r6,r0,ror#2 - eor r12,r12,r8 - add r5,r5,r2 - eor r2,r11,r4 - eor r0,r10,r10,ror#5 - add r6,r6,r12 - and r2,r2,r10 - eor r12,r0,r10,ror#19 - eor r0,r6,r6,ror#11 - eor r2,r2,r4 - add r5,r5,r12,ror#6 - eor r12,r6,r7 - eor r0,r0,r6,ror#20 - add r5,r5,r2 - ldr r2,[sp,#28] - and r3,r3,r12 - add r9,r9,r5 - add r5,r5,r0,ror#2 - eor r3,r3,r7 - add r4,r4,r2 - eor r2,r10,r11 - eor r0,r9,r9,ror#5 - add r5,r5,r3 - and r2,r2,r9 - eor r3,r0,r9,ror#19 - eor r0,r5,r5,ror#11 - eor r2,r2,r11 - add r4,r4,r3,ror#6 - eor r3,r5,r6 - eor r0,r0,r5,ror#20 - add r4,r4,r2 - ldr r2,[sp,#32] - and r12,r12,r3 - add r8,r8,r4 - add r4,r4,r0,ror#2 - eor r12,r12,r6 - vst1.32 {q8},[r1,:128]! - add r11,r11,r2 - eor r2,r9,r10 - eor r0,r8,r8,ror#5 - add r4,r4,r12 - vld1.32 {q8},[r14,:128]! - and r2,r2,r8 - eor r12,r0,r8,ror#19 - eor r0,r4,r4,ror#11 - eor r2,r2,r10 - vrev32.8 q2,q2 - add r11,r11,r12,ror#6 - eor r12,r4,r5 - eor r0,r0,r4,ror#20 - add r11,r11,r2 - vadd.i32 q8,q8,q2 - ldr r2,[sp,#36] - and r3,r3,r12 - add r7,r7,r11 - add r11,r11,r0,ror#2 - eor r3,r3,r5 - add r10,r10,r2 - eor r2,r8,r9 - eor r0,r7,r7,ror#5 - add r11,r11,r3 - and r2,r2,r7 - eor r3,r0,r7,ror#19 - eor r0,r11,r11,ror#11 - eor r2,r2,r9 - add r10,r10,r3,ror#6 - eor r3,r11,r4 - eor r0,r0,r11,ror#20 - add r10,r10,r2 - ldr r2,[sp,#40] - and r12,r12,r3 - add r6,r6,r10 - add r10,r10,r0,ror#2 - eor r12,r12,r4 - add r9,r9,r2 - eor r2,r7,r8 - eor r0,r6,r6,ror#5 - add r10,r10,r12 - and r2,r2,r6 - eor r12,r0,r6,ror#19 - eor r0,r10,r10,ror#11 - eor r2,r2,r8 - add r9,r9,r12,ror#6 - eor r12,r10,r11 - eor r0,r0,r10,ror#20 - add r9,r9,r2 - ldr r2,[sp,#44] - and r3,r3,r12 - add r5,r5,r9 - add r9,r9,r0,ror#2 - eor r3,r3,r11 - add r8,r8,r2 - eor r2,r6,r7 - eor r0,r5,r5,ror#5 - add r9,r9,r3 - and r2,r2,r5 - eor r3,r0,r5,ror#19 - eor r0,r9,r9,ror#11 - eor r2,r2,r7 - add r8,r8,r3,ror#6 - eor r3,r9,r10 - eor r0,r0,r9,ror#20 - add r8,r8,r2 - ldr r2,[sp,#48] - and r12,r12,r3 - add r4,r4,r8 - add r8,r8,r0,ror#2 - eor r12,r12,r10 - vst1.32 {q8},[r1,:128]! - add r7,r7,r2 - eor r2,r5,r6 - eor r0,r4,r4,ror#5 - add r8,r8,r12 - vld1.32 {q8},[r14,:128]! - and r2,r2,r4 - eor r12,r0,r4,ror#19 - eor r0,r8,r8,ror#11 - eor r2,r2,r6 - vrev32.8 q3,q3 - add r7,r7,r12,ror#6 - eor r12,r8,r9 - eor r0,r0,r8,ror#20 - add r7,r7,r2 - vadd.i32 q8,q8,q3 - ldr r2,[sp,#52] - and r3,r3,r12 - add r11,r11,r7 - add r7,r7,r0,ror#2 - eor r3,r3,r9 - add r6,r6,r2 - eor r2,r4,r5 - eor r0,r11,r11,ror#5 - add r7,r7,r3 - and r2,r2,r11 - eor r3,r0,r11,ror#19 - eor r0,r7,r7,ror#11 - eor r2,r2,r5 - add r6,r6,r3,ror#6 - eor r3,r7,r8 - eor r0,r0,r7,ror#20 - add r6,r6,r2 - ldr r2,[sp,#56] - and r12,r12,r3 - add r10,r10,r6 - add r6,r6,r0,ror#2 - eor r12,r12,r8 - add r5,r5,r2 - eor r2,r11,r4 - eor r0,r10,r10,ror#5 - add r6,r6,r12 - and r2,r2,r10 - eor r12,r0,r10,ror#19 - eor r0,r6,r6,ror#11 - eor r2,r2,r4 - add r5,r5,r12,ror#6 - eor r12,r6,r7 - eor r0,r0,r6,ror#20 - add r5,r5,r2 - ldr r2,[sp,#60] - and r3,r3,r12 - add r9,r9,r5 - add r5,r5,r0,ror#2 - eor r3,r3,r7 - add r4,r4,r2 - eor r2,r10,r11 - eor r0,r9,r9,ror#5 - add r5,r5,r3 - and r2,r2,r9 - eor r3,r0,r9,ror#19 - eor r0,r5,r5,ror#11 - eor r2,r2,r11 - add r4,r4,r3,ror#6 - eor r3,r5,r6 - eor r0,r0,r5,ror#20 - add r4,r4,r2 - ldr r2,[sp,#64] - and r12,r12,r3 - add r8,r8,r4 - add r4,r4,r0,ror#2 - eor r12,r12,r6 - vst1.32 {q8},[r1,:128]! - ldr r0,[r2,#0] - add r4,r4,r12 @ h+=Maj(a,b,c) from the past - ldr r12,[r2,#4] - ldr r3,[r2,#8] - ldr r1,[r2,#12] - add r4,r4,r0 @ accumulate - ldr r0,[r2,#16] - add r5,r5,r12 - ldr r12,[r2,#20] - add r6,r6,r3 - ldr r3,[r2,#24] - add r7,r7,r1 - ldr r1,[r2,#28] - add r8,r8,r0 - str r4,[r2],#4 - add r9,r9,r12 - str r5,[r2],#4 - add r10,r10,r3 - str r6,[r2],#4 - add r11,r11,r1 - str r7,[r2],#4 - stmia r2,{r8-r11} - - ittte ne - movne r1,sp - ldrne r2,[sp,#0] - eorne r12,r12,r12 - ldreq sp,[sp,#76] @ restore original sp - itt ne - eorne r3,r5,r6 - bne .L_00_48 - - ldmia sp!,{r4-r12,pc} -.size sha256_block_data_order_neon,.-sha256_block_data_order_neon -#endif -#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) - -# ifdef __thumb2__ -# define INST(a,b,c,d) .byte c,d|0xc,a,b -# else -# define INST(a,b,c,d) .byte a,b,c,d -# endif - -.type sha256_block_data_order_armv8,%function -.align 5 -sha256_block_data_order_armv8: -.LARMv8: - vld1.32 {q0,q1},[r0] -# ifdef __thumb2__ - adr r3,.LARMv8 - sub r3,r3,#.LARMv8-K256 -# else - adrl r3,K256 -# endif - add r2,r1,r2,lsl#6 @ len to point at the end of inp - -.Loop_v8: - vld1.8 {q8-q9},[r1]! - vld1.8 {q10-q11},[r1]! - vld1.32 {q12},[r3]! - vrev32.8 q8,q8 - vrev32.8 q9,q9 - vrev32.8 q10,q10 - vrev32.8 q11,q11 - vmov q14,q0 @ offload - vmov q15,q1 - teq r1,r2 - vld1.32 {q13},[r3]! - vadd.i32 q12,q12,q8 - INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 - vmov q2,q0 - INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 - INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 - INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 - vld1.32 {q12},[r3]! - vadd.i32 q13,q13,q9 - INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 - vmov q2,q0 - INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 - INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 - INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 - vld1.32 {q13},[r3]! - vadd.i32 q12,q12,q10 - INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 - vmov q2,q0 - INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 - INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 - INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 - vld1.32 {q12},[r3]! - vadd.i32 q13,q13,q11 - INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 - vmov q2,q0 - INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 - INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 - INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 - vld1.32 {q13},[r3]! - vadd.i32 q12,q12,q8 - INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 - vmov q2,q0 - INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 - INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 - INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 - vld1.32 {q12},[r3]! - vadd.i32 q13,q13,q9 - INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 - vmov q2,q0 - INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 - INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 - INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 - vld1.32 {q13},[r3]! - vadd.i32 q12,q12,q10 - INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 - vmov q2,q0 - INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 - INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 - INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 - vld1.32 {q12},[r3]! - vadd.i32 q13,q13,q11 - INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 - vmov q2,q0 - INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 - INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 - INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 - vld1.32 {q13},[r3]! - vadd.i32 q12,q12,q8 - INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 - vmov q2,q0 - INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 - INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 - INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 - vld1.32 {q12},[r3]! - vadd.i32 q13,q13,q9 - INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 - vmov q2,q0 - INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 - INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 - INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 - vld1.32 {q13},[r3]! - vadd.i32 q12,q12,q10 - INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 - vmov q2,q0 - INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 - INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 - INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 - vld1.32 {q12},[r3]! - vadd.i32 q13,q13,q11 - INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 - vmov q2,q0 - INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 - INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 - INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 - vld1.32 {q13},[r3]! - vadd.i32 q12,q12,q8 - vmov q2,q0 - INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 - INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 - - vld1.32 {q12},[r3]! - vadd.i32 q13,q13,q9 - vmov q2,q0 - INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 - INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 - - vld1.32 {q13},[r3] - vadd.i32 q12,q12,q10 - sub r3,r3,#256-16 @ rewind - vmov q2,q0 - INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 - INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 - - vadd.i32 q13,q13,q11 - vmov q2,q0 - INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 - INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 - - vadd.i32 q0,q0,q14 - vadd.i32 q1,q1,q15 - it ne - bne .Loop_v8 - - vst1.32 {q0,q1},[r0] - - bx lr @ bx lr -.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8 -#endif -.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by " -.align 2 -#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) -.comm OPENSSL_armcap_P,4,4 -#endif diff --git a/arch/arm/crypto/sha512-core.S_shipped b/arch/arm/crypto/sha512-core.S_shipped deleted file mode 100644 index 03014624f2ab5..0000000000000 --- a/arch/arm/crypto/sha512-core.S_shipped +++ /dev/null @@ -1,1869 +0,0 @@ -@ SPDX-License-Identifier: GPL-2.0 - -@ This code is taken from the OpenSSL project but the author (Andy Polyakov) -@ has relicensed it under the GPLv2. Therefore this program is free software; -@ you can redistribute it and/or modify it under the terms of the GNU General -@ Public License version 2 as published by the Free Software Foundation. -@ -@ The original headers, including the original license headers, are -@ included below for completeness. - -@ ==================================================================== -@ Written by Andy Polyakov for the OpenSSL -@ project. The module is, however, dual licensed under OpenSSL and -@ CRYPTOGAMS licenses depending on where you obtain it. For further -@ details see https://www.openssl.org/~appro/cryptogams/. -@ ==================================================================== - -@ SHA512 block procedure for ARMv4. September 2007. - -@ This code is ~4.5 (four and a half) times faster than code generated -@ by gcc 3.4 and it spends ~72 clock cycles per byte [on single-issue -@ Xscale PXA250 core]. -@ -@ July 2010. -@ -@ Rescheduling for dual-issue pipeline resulted in 6% improvement on -@ Cortex A8 core and ~40 cycles per processed byte. - -@ February 2011. -@ -@ Profiler-assisted and platform-specific optimization resulted in 7% -@ improvement on Coxtex A8 core and ~38 cycles per byte. - -@ March 2011. -@ -@ Add NEON implementation. On Cortex A8 it was measured to process -@ one byte in 23.3 cycles or ~60% faster than integer-only code. - -@ August 2012. -@ -@ Improve NEON performance by 12% on Snapdragon S4. In absolute -@ terms it's 22.6 cycles per byte, which is disappointing result. -@ Technical writers asserted that 3-way S4 pipeline can sustain -@ multiple NEON instructions per cycle, but dual NEON issue could -@ not be observed, see https://www.openssl.org/~appro/Snapdragon-S4.html -@ for further details. On side note Cortex-A15 processes one byte in -@ 16 cycles. - -@ Byte order [in]dependence. ========================================= -@ -@ Originally caller was expected to maintain specific *dword* order in -@ h[0-7], namely with most significant dword at *lower* address, which -@ was reflected in below two parameters as 0 and 4. Now caller is -@ expected to maintain native byte order for whole 64-bit values. -#ifndef __KERNEL__ -# include "arm_arch.h" -# define VFP_ABI_PUSH vstmdb sp!,{d8-d15} -# define VFP_ABI_POP vldmia sp!,{d8-d15} -#else -# define __ARM_ARCH__ __LINUX_ARM_ARCH__ -# define __ARM_MAX_ARCH__ 7 -# define VFP_ABI_PUSH -# define VFP_ABI_POP -#endif - -#ifdef __ARMEL__ -# define LO 0 -# define HI 4 -# define WORD64(hi0,lo0,hi1,lo1) .word lo0,hi0, lo1,hi1 -#else -# define HI 0 -# define LO 4 -# define WORD64(hi0,lo0,hi1,lo1) .word hi0,lo0, hi1,lo1 -#endif - -.text -#if __ARM_ARCH__<7 -.code 32 -#else -.syntax unified -# ifdef __thumb2__ -.thumb -# else -.code 32 -# endif -#endif - -.type K512,%object -.align 5 -K512: -WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd) -WORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc) -WORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019) -WORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118) -WORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe) -WORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2) -WORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1) -WORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694) -WORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3) -WORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65) -WORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483) -WORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5) -WORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210) -WORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4) -WORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725) -WORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70) -WORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926) -WORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df) -WORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8) -WORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b) -WORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001) -WORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30) -WORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910) -WORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8) -WORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53) -WORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8) -WORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb) -WORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3) -WORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60) -WORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec) -WORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9) -WORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b) -WORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207) -WORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178) -WORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6) -WORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b) -WORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493) -WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c) -WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a) -WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817) -.size K512,.-K512 -#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) -.LOPENSSL_armcap: -.word OPENSSL_armcap_P-sha512_block_data_order -.skip 32-4 -#else -.skip 32 -#endif - -.global sha512_block_data_order -.type sha512_block_data_order,%function -sha512_block_data_order: -.Lsha512_block_data_order: -#if __ARM_ARCH__<7 - sub r3,pc,#8 @ sha512_block_data_order -#else - adr r3,.Lsha512_block_data_order -#endif -#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) - ldr r12,.LOPENSSL_armcap - ldr r12,[r3,r12] @ OPENSSL_armcap_P - tst r12,#1 - bne .LNEON -#endif - add r2,r1,r2,lsl#7 @ len to point at the end of inp - stmdb sp!,{r4-r12,lr} - sub r14,r3,#672 @ K512 - sub sp,sp,#9*8 - - ldr r7,[r0,#32+LO] - ldr r8,[r0,#32+HI] - ldr r9, [r0,#48+LO] - ldr r10, [r0,#48+HI] - ldr r11, [r0,#56+LO] - ldr r12, [r0,#56+HI] -.Loop: - str r9, [sp,#48+0] - str r10, [sp,#48+4] - str r11, [sp,#56+0] - str r12, [sp,#56+4] - ldr r5,[r0,#0+LO] - ldr r6,[r0,#0+HI] - ldr r3,[r0,#8+LO] - ldr r4,[r0,#8+HI] - ldr r9, [r0,#16+LO] - ldr r10, [r0,#16+HI] - ldr r11, [r0,#24+LO] - ldr r12, [r0,#24+HI] - str r3,[sp,#8+0] - str r4,[sp,#8+4] - str r9, [sp,#16+0] - str r10, [sp,#16+4] - str r11, [sp,#24+0] - str r12, [sp,#24+4] - ldr r3,[r0,#40+LO] - ldr r4,[r0,#40+HI] - str r3,[sp,#40+0] - str r4,[sp,#40+4] - -.L00_15: -#if __ARM_ARCH__<7 - ldrb r3,[r1,#7] - ldrb r9, [r1,#6] - ldrb r10, [r1,#5] - ldrb r11, [r1,#4] - ldrb r4,[r1,#3] - ldrb r12, [r1,#2] - orr r3,r3,r9,lsl#8 - ldrb r9, [r1,#1] - orr r3,r3,r10,lsl#16 - ldrb r10, [r1],#8 - orr r3,r3,r11,lsl#24 - orr r4,r4,r12,lsl#8 - orr r4,r4,r9,lsl#16 - orr r4,r4,r10,lsl#24 -#else - ldr r3,[r1,#4] - ldr r4,[r1],#8 -#ifdef __ARMEL__ - rev r3,r3 - rev r4,r4 -#endif -#endif - @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41)) - @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23 - @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23 - mov r9,r7,lsr#14 - str r3,[sp,#64+0] - mov r10,r8,lsr#14 - str r4,[sp,#64+4] - eor r9,r9,r8,lsl#18 - ldr r11,[sp,#56+0] @ h.lo - eor r10,r10,r7,lsl#18 - ldr r12,[sp,#56+4] @ h.hi - eor r9,r9,r7,lsr#18 - eor r10,r10,r8,lsr#18 - eor r9,r9,r8,lsl#14 - eor r10,r10,r7,lsl#14 - eor r9,r9,r8,lsr#9 - eor r10,r10,r7,lsr#9 - eor r9,r9,r7,lsl#23 - eor r10,r10,r8,lsl#23 @ Sigma1(e) - adds r3,r3,r9 - ldr r9,[sp,#40+0] @ f.lo - adc r4,r4,r10 @ T += Sigma1(e) - ldr r10,[sp,#40+4] @ f.hi - adds r3,r3,r11 - ldr r11,[sp,#48+0] @ g.lo - adc r4,r4,r12 @ T += h - ldr r12,[sp,#48+4] @ g.hi - - eor r9,r9,r11 - str r7,[sp,#32+0] - eor r10,r10,r12 - str r8,[sp,#32+4] - and r9,r9,r7 - str r5,[sp,#0+0] - and r10,r10,r8 - str r6,[sp,#0+4] - eor r9,r9,r11 - ldr r11,[r14,#LO] @ K[i].lo - eor r10,r10,r12 @ Ch(e,f,g) - ldr r12,[r14,#HI] @ K[i].hi - - adds r3,r3,r9 - ldr r7,[sp,#24+0] @ d.lo - adc r4,r4,r10 @ T += Ch(e,f,g) - ldr r8,[sp,#24+4] @ d.hi - adds r3,r3,r11 - and r9,r11,#0xff - adc r4,r4,r12 @ T += K[i] - adds r7,r7,r3 - ldr r11,[sp,#8+0] @ b.lo - adc r8,r8,r4 @ d += T - teq r9,#148 - - ldr r12,[sp,#16+0] @ c.lo -#if __ARM_ARCH__>=7 - it eq @ Thumb2 thing, sanity check in ARM -#endif - orreq r14,r14,#1 - @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) - @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25 - @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25 - mov r9,r5,lsr#28 - mov r10,r6,lsr#28 - eor r9,r9,r6,lsl#4 - eor r10,r10,r5,lsl#4 - eor r9,r9,r6,lsr#2 - eor r10,r10,r5,lsr#2 - eor r9,r9,r5,lsl#30 - eor r10,r10,r6,lsl#30 - eor r9,r9,r6,lsr#7 - eor r10,r10,r5,lsr#7 - eor r9,r9,r5,lsl#25 - eor r10,r10,r6,lsl#25 @ Sigma0(a) - adds r3,r3,r9 - and r9,r5,r11 - adc r4,r4,r10 @ T += Sigma0(a) - - ldr r10,[sp,#8+4] @ b.hi - orr r5,r5,r11 - ldr r11,[sp,#16+4] @ c.hi - and r5,r5,r12 - and r12,r6,r10 - orr r6,r6,r10 - orr r5,r5,r9 @ Maj(a,b,c).lo - and r6,r6,r11 - adds r5,r5,r3 - orr r6,r6,r12 @ Maj(a,b,c).hi - sub sp,sp,#8 - adc r6,r6,r4 @ h += T - tst r14,#1 - add r14,r14,#8 - tst r14,#1 - beq .L00_15 - ldr r9,[sp,#184+0] - ldr r10,[sp,#184+4] - bic r14,r14,#1 -.L16_79: - @ sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7)) - @ LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25 - @ HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7 - mov r3,r9,lsr#1 - ldr r11,[sp,#80+0] - mov r4,r10,lsr#1 - ldr r12,[sp,#80+4] - eor r3,r3,r10,lsl#31 - eor r4,r4,r9,lsl#31 - eor r3,r3,r9,lsr#8 - eor r4,r4,r10,lsr#8 - eor r3,r3,r10,lsl#24 - eor r4,r4,r9,lsl#24 - eor r3,r3,r9,lsr#7 - eor r4,r4,r10,lsr#7 - eor r3,r3,r10,lsl#25 - - @ sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6)) - @ LO lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26 - @ HI hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6 - mov r9,r11,lsr#19 - mov r10,r12,lsr#19 - eor r9,r9,r12,lsl#13 - eor r10,r10,r11,lsl#13 - eor r9,r9,r12,lsr#29 - eor r10,r10,r11,lsr#29 - eor r9,r9,r11,lsl#3 - eor r10,r10,r12,lsl#3 - eor r9,r9,r11,lsr#6 - eor r10,r10,r12,lsr#6 - ldr r11,[sp,#120+0] - eor r9,r9,r12,lsl#26 - - ldr r12,[sp,#120+4] - adds r3,r3,r9 - ldr r9,[sp,#192+0] - adc r4,r4,r10 - - ldr r10,[sp,#192+4] - adds r3,r3,r11 - adc r4,r4,r12 - adds r3,r3,r9 - adc r4,r4,r10 - @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41)) - @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23 - @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23 - mov r9,r7,lsr#14 - str r3,[sp,#64+0] - mov r10,r8,lsr#14 - str r4,[sp,#64+4] - eor r9,r9,r8,lsl#18 - ldr r11,[sp,#56+0] @ h.lo - eor r10,r10,r7,lsl#18 - ldr r12,[sp,#56+4] @ h.hi - eor r9,r9,r7,lsr#18 - eor r10,r10,r8,lsr#18 - eor r9,r9,r8,lsl#14 - eor r10,r10,r7,lsl#14 - eor r9,r9,r8,lsr#9 - eor r10,r10,r7,lsr#9 - eor r9,r9,r7,lsl#23 - eor r10,r10,r8,lsl#23 @ Sigma1(e) - adds r3,r3,r9 - ldr r9,[sp,#40+0] @ f.lo - adc r4,r4,r10 @ T += Sigma1(e) - ldr r10,[sp,#40+4] @ f.hi - adds r3,r3,r11 - ldr r11,[sp,#48+0] @ g.lo - adc r4,r4,r12 @ T += h - ldr r12,[sp,#48+4] @ g.hi - - eor r9,r9,r11 - str r7,[sp,#32+0] - eor r10,r10,r12 - str r8,[sp,#32+4] - and r9,r9,r7 - str r5,[sp,#0+0] - and r10,r10,r8 - str r6,[sp,#0+4] - eor r9,r9,r11 - ldr r11,[r14,#LO] @ K[i].lo - eor r10,r10,r12 @ Ch(e,f,g) - ldr r12,[r14,#HI] @ K[i].hi - - adds r3,r3,r9 - ldr r7,[sp,#24+0] @ d.lo - adc r4,r4,r10 @ T += Ch(e,f,g) - ldr r8,[sp,#24+4] @ d.hi - adds r3,r3,r11 - and r9,r11,#0xff - adc r4,r4,r12 @ T += K[i] - adds r7,r7,r3 - ldr r11,[sp,#8+0] @ b.lo - adc r8,r8,r4 @ d += T - teq r9,#23 - - ldr r12,[sp,#16+0] @ c.lo -#if __ARM_ARCH__>=7 - it eq @ Thumb2 thing, sanity check in ARM -#endif - orreq r14,r14,#1 - @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) - @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25 - @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25 - mov r9,r5,lsr#28 - mov r10,r6,lsr#28 - eor r9,r9,r6,lsl#4 - eor r10,r10,r5,lsl#4 - eor r9,r9,r6,lsr#2 - eor r10,r10,r5,lsr#2 - eor r9,r9,r5,lsl#30 - eor r10,r10,r6,lsl#30 - eor r9,r9,r6,lsr#7 - eor r10,r10,r5,lsr#7 - eor r9,r9,r5,lsl#25 - eor r10,r10,r6,lsl#25 @ Sigma0(a) - adds r3,r3,r9 - and r9,r5,r11 - adc r4,r4,r10 @ T += Sigma0(a) - - ldr r10,[sp,#8+4] @ b.hi - orr r5,r5,r11 - ldr r11,[sp,#16+4] @ c.hi - and r5,r5,r12 - and r12,r6,r10 - orr r6,r6,r10 - orr r5,r5,r9 @ Maj(a,b,c).lo - and r6,r6,r11 - adds r5,r5,r3 - orr r6,r6,r12 @ Maj(a,b,c).hi - sub sp,sp,#8 - adc r6,r6,r4 @ h += T - tst r14,#1 - add r14,r14,#8 -#if __ARM_ARCH__>=7 - ittt eq @ Thumb2 thing, sanity check in ARM -#endif - ldreq r9,[sp,#184+0] - ldreq r10,[sp,#184+4] - beq .L16_79 - bic r14,r14,#1 - - ldr r3,[sp,#8+0] - ldr r4,[sp,#8+4] - ldr r9, [r0,#0+LO] - ldr r10, [r0,#0+HI] - ldr r11, [r0,#8+LO] - ldr r12, [r0,#8+HI] - adds r9,r5,r9 - str r9, [r0,#0+LO] - adc r10,r6,r10 - str r10, [r0,#0+HI] - adds r11,r3,r11 - str r11, [r0,#8+LO] - adc r12,r4,r12 - str r12, [r0,#8+HI] - - ldr r5,[sp,#16+0] - ldr r6,[sp,#16+4] - ldr r3,[sp,#24+0] - ldr r4,[sp,#24+4] - ldr r9, [r0,#16+LO] - ldr r10, [r0,#16+HI] - ldr r11, [r0,#24+LO] - ldr r12, [r0,#24+HI] - adds r9,r5,r9 - str r9, [r0,#16+LO] - adc r10,r6,r10 - str r10, [r0,#16+HI] - adds r11,r3,r11 - str r11, [r0,#24+LO] - adc r12,r4,r12 - str r12, [r0,#24+HI] - - ldr r3,[sp,#40+0] - ldr r4,[sp,#40+4] - ldr r9, [r0,#32+LO] - ldr r10, [r0,#32+HI] - ldr r11, [r0,#40+LO] - ldr r12, [r0,#40+HI] - adds r7,r7,r9 - str r7,[r0,#32+LO] - adc r8,r8,r10 - str r8,[r0,#32+HI] - adds r11,r3,r11 - str r11, [r0,#40+LO] - adc r12,r4,r12 - str r12, [r0,#40+HI] - - ldr r5,[sp,#48+0] - ldr r6,[sp,#48+4] - ldr r3,[sp,#56+0] - ldr r4,[sp,#56+4] - ldr r9, [r0,#48+LO] - ldr r10, [r0,#48+HI] - ldr r11, [r0,#56+LO] - ldr r12, [r0,#56+HI] - adds r9,r5,r9 - str r9, [r0,#48+LO] - adc r10,r6,r10 - str r10, [r0,#48+HI] - adds r11,r3,r11 - str r11, [r0,#56+LO] - adc r12,r4,r12 - str r12, [r0,#56+HI] - - add sp,sp,#640 - sub r14,r14,#640 - - teq r1,r2 - bne .Loop - - add sp,sp,#8*9 @ destroy frame -#if __ARM_ARCH__>=5 - ldmia sp!,{r4-r12,pc} -#else - ldmia sp!,{r4-r12,lr} - tst lr,#1 - moveq pc,lr @ be binary compatible with V4, yet - .word 0xe12fff1e @ interoperable with Thumb ISA:-) -#endif -.size sha512_block_data_order,.-sha512_block_data_order -#if __ARM_MAX_ARCH__>=7 -.arch armv7-a -.fpu neon - -.global sha512_block_data_order_neon -.type sha512_block_data_order_neon,%function -.align 4 -sha512_block_data_order_neon: -.LNEON: - dmb @ errata #451034 on early Cortex A8 - add r2,r1,r2,lsl#7 @ len to point at the end of inp - VFP_ABI_PUSH - adr r3,.Lsha512_block_data_order - sub r3,r3,.Lsha512_block_data_order-K512 - vldmia r0,{d16-d23} @ load context -.Loop_neon: - vshr.u64 d24,d20,#14 @ 0 -#if 0<16 - vld1.64 {d0},[r1]! @ handles unaligned -#endif - vshr.u64 d25,d20,#18 -#if 0>0 - vadd.i64 d16,d30 @ h+=Maj from the past -#endif - vshr.u64 d26,d20,#41 - vld1.64 {d28},[r3,:64]! @ K[i++] - vsli.64 d24,d20,#50 - vsli.64 d25,d20,#46 - vmov d29,d20 - vsli.64 d26,d20,#23 -#if 0<16 && defined(__ARMEL__) - vrev64.8 d0,d0 -#endif - veor d25,d24 - vbsl d29,d21,d22 @ Ch(e,f,g) - vshr.u64 d24,d16,#28 - veor d26,d25 @ Sigma1(e) - vadd.i64 d27,d29,d23 - vshr.u64 d25,d16,#34 - vsli.64 d24,d16,#36 - vadd.i64 d27,d26 - vshr.u64 d26,d16,#39 - vadd.i64 d28,d0 - vsli.64 d25,d16,#30 - veor d30,d16,d17 - vsli.64 d26,d16,#25 - veor d23,d24,d25 - vadd.i64 d27,d28 - vbsl d30,d18,d17 @ Maj(a,b,c) - veor d23,d26 @ Sigma0(a) - vadd.i64 d19,d27 - vadd.i64 d30,d27 - @ vadd.i64 d23,d30 - vshr.u64 d24,d19,#14 @ 1 -#if 1<16 - vld1.64 {d1},[r1]! @ handles unaligned -#endif - vshr.u64 d25,d19,#18 -#if 1>0 - vadd.i64 d23,d30 @ h+=Maj from the past -#endif - vshr.u64 d26,d19,#41 - vld1.64 {d28},[r3,:64]! @ K[i++] - vsli.64 d24,d19,#50 - vsli.64 d25,d19,#46 - vmov d29,d19 - vsli.64 d26,d19,#23 -#if 1<16 && defined(__ARMEL__) - vrev64.8 d1,d1 -#endif - veor d25,d24 - vbsl d29,d20,d21 @ Ch(e,f,g) - vshr.u64 d24,d23,#28 - veor d26,d25 @ Sigma1(e) - vadd.i64 d27,d29,d22 - vshr.u64 d25,d23,#34 - vsli.64 d24,d23,#36 - vadd.i64 d27,d26 - vshr.u64 d26,d23,#39 - vadd.i64 d28,d1 - vsli.64 d25,d23,#30 - veor d30,d23,d16 - vsli.64 d26,d23,#25 - veor d22,d24,d25 - vadd.i64 d27,d28 - vbsl d30,d17,d16 @ Maj(a,b,c) - veor d22,d26 @ Sigma0(a) - vadd.i64 d18,d27 - vadd.i64 d30,d27 - @ vadd.i64 d22,d30 - vshr.u64 d24,d18,#14 @ 2 -#if 2<16 - vld1.64 {d2},[r1]! @ handles unaligned -#endif - vshr.u64 d25,d18,#18 -#if 2>0 - vadd.i64 d22,d30 @ h+=Maj from the past -#endif - vshr.u64 d26,d18,#41 - vld1.64 {d28},[r3,:64]! @ K[i++] - vsli.64 d24,d18,#50 - vsli.64 d25,d18,#46 - vmov d29,d18 - vsli.64 d26,d18,#23 -#if 2<16 && defined(__ARMEL__) - vrev64.8 d2,d2 -#endif - veor d25,d24 - vbsl d29,d19,d20 @ Ch(e,f,g) - vshr.u64 d24,d22,#28 - veor d26,d25 @ Sigma1(e) - vadd.i64 d27,d29,d21 - vshr.u64 d25,d22,#34 - vsli.64 d24,d22,#36 - vadd.i64 d27,d26 - vshr.u64 d26,d22,#39 - vadd.i64 d28,d2 - vsli.64 d25,d22,#30 - veor d30,d22,d23 - vsli.64 d26,d22,#25 - veor d21,d24,d25 - vadd.i64 d27,d28 - vbsl d30,d16,d23 @ Maj(a,b,c) - veor d21,d26 @ Sigma0(a) - vadd.i64 d17,d27 - vadd.i64 d30,d27 - @ vadd.i64 d21,d30 - vshr.u64 d24,d17,#14 @ 3 -#if 3<16 - vld1.64 {d3},[r1]! @ handles unaligned -#endif - vshr.u64 d25,d17,#18 -#if 3>0 - vadd.i64 d21,d30 @ h+=Maj from the past -#endif - vshr.u64 d26,d17,#41 - vld1.64 {d28},[r3,:64]! @ K[i++] - vsli.64 d24,d17,#50 - vsli.64 d25,d17,#46 - vmov d29,d17 - vsli.64 d26,d17,#23 -#if 3<16 && defined(__ARMEL__) - vrev64.8 d3,d3 -#endif - veor d25,d24 - vbsl d29,d18,d19 @ Ch(e,f,g) - vshr.u64 d24,d21,#28 - veor d26,d25 @ Sigma1(e) - vadd.i64 d27,d29,d20 - vshr.u64 d25,d21,#34 - vsli.64 d24,d21,#36 - vadd.i64 d27,d26 - vshr.u64 d26,d21,#39 - vadd.i64 d28,d3 - vsli.64 d25,d21,#30 - veor d30,d21,d22 - vsli.64 d26,d21,#25 - veor d20,d24,d25 - vadd.i64 d27,d28 - vbsl d30,d23,d22 @ Maj(a,b,c) - veor d20,d26 @ Sigma0(a) - vadd.i64 d16,d27 - vadd.i64 d30,d27 - @ vadd.i64 d20,d30 - vshr.u64 d24,d16,#14 @ 4 -#if 4<16 - vld1.64 {d4},[r1]! @ handles unaligned -#endif - vshr.u64 d25,d16,#18 -#if 4>0 - vadd.i64 d20,d30 @ h+=Maj from the past -#endif - vshr.u64 d26,d16,#41 - vld1.64 {d28},[r3,:64]! @ K[i++] - vsli.64 d24,d16,#50 - vsli.64 d25,d16,#46 - vmov d29,d16 - vsli.64 d26,d16,#23 -#if 4<16 && defined(__ARMEL__) - vrev64.8 d4,d4 -#endif - veor d25,d24 - vbsl d29,d17,d18 @ Ch(e,f,g) - vshr.u64 d24,d20,#28 - veor d26,d25 @ Sigma1(e) - vadd.i64 d27,d29,d19 - vshr.u64 d25,d20,#34 - vsli.64 d24,d20,#36 - vadd.i64 d27,d26 - vshr.u64 d26,d20,#39 - vadd.i64 d28,d4 - vsli.64 d25,d20,#30 - veor d30,d20,d21 - vsli.64 d26,d20,#25 - veor d19,d24,d25 - vadd.i64 d27,d28 - vbsl d30,d22,d21 @ Maj(a,b,c) - veor d19,d26 @ Sigma0(a) - vadd.i64 d23,d27 - vadd.i64 d30,d27 - @ vadd.i64 d19,d30 - vshr.u64 d24,d23,#14 @ 5 -#if 5<16 - vld1.64 {d5},[r1]! @ handles unaligned -#endif - vshr.u64 d25,d23,#18 -#if 5>0 - vadd.i64 d19,d30 @ h+=Maj from the past -#endif - vshr.u64 d26,d23,#41 - vld1.64 {d28},[r3,:64]! @ K[i++] - vsli.64 d24,d23,#50 - vsli.64 d25,d23,#46 - vmov d29,d23 - vsli.64 d26,d23,#23 -#if 5<16 && defined(__ARMEL__) - vrev64.8 d5,d5 -#endif - veor d25,d24 - vbsl d29,d16,d17 @ Ch(e,f,g) - vshr.u64 d24,d19,#28 - veor d26,d25 @ Sigma1(e) - vadd.i64 d27,d29,d18 - vshr.u64 d25,d19,#34 - vsli.64 d24,d19,#36 - vadd.i64 d27,d26 - vshr.u64 d26,d19,#39 - vadd.i64 d28,d5 - vsli.64 d25,d19,#30 - veor d30,d19,d20 - vsli.64 d26,d19,#25 - veor d18,d24,d25 - vadd.i64 d27,d28 - vbsl d30,d21,d20 @ Maj(a,b,c) - veor d18,d26 @ Sigma0(a) - vadd.i64 d22,d27 - vadd.i64 d30,d27 - @ vadd.i64 d18,d30 - vshr.u64 d24,d22,#14 @ 6 -#if 6<16 - vld1.64 {d6},[r1]! @ handles unaligned -#endif - vshr.u64 d25,d22,#18 -#if 6>0 - vadd.i64 d18,d30 @ h+=Maj from the past -#endif - vshr.u64 d26,d22,#41 - vld1.64 {d28},[r3,:64]! @ K[i++] - vsli.64 d24,d22,#50 - vsli.64 d25,d22,#46 - vmov d29,d22 - vsli.64 d26,d22,#23 -#if 6<16 && defined(__ARMEL__) - vrev64.8 d6,d6 -#endif - veor d25,d24 - vbsl d29,d23,d16 @ Ch(e,f,g) - vshr.u64 d24,d18,#28 - veor d26,d25 @ Sigma1(e) - vadd.i64 d27,d29,d17 - vshr.u64 d25,d18,#34 - vsli.64 d24,d18,#36 - vadd.i64 d27,d26 - vshr.u64 d26,d18,#39 - vadd.i64 d28,d6 - vsli.64 d25,d18,#30 - veor d30,d18,d19 - vsli.64 d26,d18,#25 - veor d17,d24,d25 - vadd.i64 d27,d28 - vbsl d30,d20,d19 @ Maj(a,b,c) - veor d17,d26 @ Sigma0(a) - vadd.i64 d21,d27 - vadd.i64 d30,d27 - @ vadd.i64 d17,d30 - vshr.u64 d24,d21,#14 @ 7 -#if 7<16 - vld1.64 {d7},[r1]! @ handles unaligned -#endif - vshr.u64 d25,d21,#18 -#if 7>0 - vadd.i64 d17,d30 @ h+=Maj from the past -#endif - vshr.u64 d26,d21,#41 - vld1.64 {d28},[r3,:64]! @ K[i++] - vsli.64 d24,d21,#50 - vsli.64 d25,d21,#46 - vmov d29,d21 - vsli.64 d26,d21,#23 -#if 7<16 && defined(__ARMEL__) - vrev64.8 d7,d7 -#endif - veor d25,d24 - vbsl d29,d22,d23 @ Ch(e,f,g) - vshr.u64 d24,d17,#28 - veor d26,d25 @ Sigma1(e) - vadd.i64 d27,d29,d16 - vshr.u64 d25,d17,#34 - vsli.64 d24,d17,#36 - vadd.i64 d27,d26 - vshr.u64 d26,d17,#39 - vadd.i64 d28,d7 - vsli.64 d25,d17,#30 - veor d30,d17,d18 - vsli.64 d26,d17,#25 - veor d16,d24,d25 - vadd.i64 d27,d28 - vbsl d30,d19,d18 @ Maj(a,b,c) - veor d16,d26 @ Sigma0(a) - vadd.i64 d20,d27 - vadd.i64 d30,d27 - @ vadd.i64 d16,d30 - vshr.u64 d24,d20,#14 @ 8 -#if 8<16 - vld1.64 {d8},[r1]! @ handles unaligned -#endif - vshr.u64 d25,d20,#18 -#if 8>0 - vadd.i64 d16,d30 @ h+=Maj from the past -#endif - vshr.u64 d26,d20,#41 - vld1.64 {d28},[r3,:64]! @ K[i++] - vsli.64 d24,d20,#50 - vsli.64 d25,d20,#46 - vmov d29,d20 - vsli.64 d26,d20,#23 -#if 8<16 && defined(__ARMEL__) - vrev64.8 d8,d8 -#endif - veor d25,d24 - vbsl d29,d21,d22 @ Ch(e,f,g) - vshr.u64 d24,d16,#28 - veor d26,d25 @ Sigma1(e) - vadd.i64 d27,d29,d23 - vshr.u64 d25,d16,#34 - vsli.64 d24,d16,#36 - vadd.i64 d27,d26 - vshr.u64 d26,d16,#39 - vadd.i64 d28,d8 - vsli.64 d25,d16,#30 - veor d30,d16,d17 - vsli.64 d26,d16,#25 - veor d23,d24,d25 - vadd.i64 d27,d28 - vbsl d30,d18,d17 @ Maj(a,b,c) - veor d23,d26 @ Sigma0(a) - vadd.i64 d19,d27 - vadd.i64 d30,d27 - @ vadd.i64 d23,d30 - vshr.u64 d24,d19,#14 @ 9 -#if 9<16 - vld1.64 {d9},[r1]! @ handles unaligned -#endif - vshr.u64 d25,d19,#18 -#if 9>0 - vadd.i64 d23,d30 @ h+=Maj from the past -#endif - vshr.u64 d26,d19,#41 - vld1.64 {d28},[r3,:64]! @ K[i++] - vsli.64 d24,d19,#50 - vsli.64 d25,d19,#46 - vmov d29,d19 - vsli.64 d26,d19,#23 -#if 9<16 && defined(__ARMEL__) - vrev64.8 d9,d9 -#endif - veor d25,d24 - vbsl d29,d20,d21 @ Ch(e,f,g) - vshr.u64 d24,d23,#28 - veor d26,d25 @ Sigma1(e) - vadd.i64 d27,d29,d22 - vshr.u64 d25,d23,#34 - vsli.64 d24,d23,#36 - vadd.i64 d27,d26 - vshr.u64 d26,d23,#39 - vadd.i64 d28,d9 - vsli.64 d25,d23,#30 - veor d30,d23,d16 - vsli.64 d26,d23,#25 - veor d22,d24,d25 - vadd.i64 d27,d28 - vbsl d30,d17,d16 @ Maj(a,b,c) - veor d22,d26 @ Sigma0(a) - vadd.i64 d18,d27 - vadd.i64 d30,d27 - @ vadd.i64 d22,d30 - vshr.u64 d24,d18,#14 @ 10 -#if 10<16 - vld1.64 {d10},[r1]! @ handles unaligned -#endif - vshr.u64 d25,d18,#18 -#if 10>0 - vadd.i64 d22,d30 @ h+=Maj from the past -#endif - vshr.u64 d26,d18,#41 - vld1.64 {d28},[r3,:64]! @ K[i++] - vsli.64 d24,d18,#50 - vsli.64 d25,d18,#46 - vmov d29,d18 - vsli.64 d26,d18,#23 -#if 10<16 && defined(__ARMEL__) - vrev64.8 d10,d10 -#endif - veor d25,d24 - vbsl d29,d19,d20 @ Ch(e,f,g) - vshr.u64 d24,d22,#28 - veor d26,d25 @ Sigma1(e) - vadd.i64 d27,d29,d21 - vshr.u64 d25,d22,#34 - vsli.64 d24,d22,#36 - vadd.i64 d27,d26 - vshr.u64 d26,d22,#39 - vadd.i64 d28,d10 - vsli.64 d25,d22,#30 - veor d30,d22,d23 - vsli.64 d26,d22,#25 - veor d21,d24,d25 - vadd.i64 d27,d28 - vbsl d30,d16,d23 @ Maj(a,b,c) - veor d21,d26 @ Sigma0(a) - vadd.i64 d17,d27 - vadd.i64 d30,d27 - @ vadd.i64 d21,d30 - vshr.u64 d24,d17,#14 @ 11 -#if 11<16 - vld1.64 {d11},[r1]! @ handles unaligned -#endif - vshr.u64 d25,d17,#18 -#if 11>0 - vadd.i64 d21,d30 @ h+=Maj from the past -#endif - vshr.u64 d26,d17,#41 - vld1.64 {d28},[r3,:64]! @ K[i++] - vsli.64 d24,d17,#50 - vsli.64 d25,d17,#46 - vmov d29,d17 - vsli.64 d26,d17,#23 -#if 11<16 && defined(__ARMEL__) - vrev64.8 d11,d11 -#endif - veor d25,d24 - vbsl d29,d18,d19 @ Ch(e,f,g) - vshr.u64 d24,d21,#28 - veor d26,d25 @ Sigma1(e) - vadd.i64 d27,d29,d20 - vshr.u64 d25,d21,#34 - vsli.64 d24,d21,#36 - vadd.i64 d27,d26 - vshr.u64 d26,d21,#39 - vadd.i64 d28,d11 - vsli.64 d25,d21,#30 - veor d30,d21,d22 - vsli.64 d26,d21,#25 - veor d20,d24,d25 - vadd.i64 d27,d28 - vbsl d30,d23,d22 @ Maj(a,b,c) - veor d20,d26 @ Sigma0(a) - vadd.i64 d16,d27 - vadd.i64 d30,d27 - @ vadd.i64 d20,d30 - vshr.u64 d24,d16,#14 @ 12 -#if 12<16 - vld1.64 {d12},[r1]! @ handles unaligned -#endif - vshr.u64 d25,d16,#18 -#if 12>0 - vadd.i64 d20,d30 @ h+=Maj from the past -#endif - vshr.u64 d26,d16,#41 - vld1.64 {d28},[r3,:64]! @ K[i++] - vsli.64 d24,d16,#50 - vsli.64 d25,d16,#46 - vmov d29,d16 - vsli.64 d26,d16,#23 -#if 12<16 && defined(__ARMEL__) - vrev64.8 d12,d12 -#endif - veor d25,d24 - vbsl d29,d17,d18 @ Ch(e,f,g) - vshr.u64 d24,d20,#28 - veor d26,d25 @ Sigma1(e) - vadd.i64 d27,d29,d19 - vshr.u64 d25,d20,#34 - vsli.64 d24,d20,#36 - vadd.i64 d27,d26 - vshr.u64 d26,d20,#39 - vadd.i64 d28,d12 - vsli.64 d25,d20,#30 - veor d30,d20,d21 - vsli.64 d26,d20,#25 - veor d19,d24,d25 - vadd.i64 d27,d28 - vbsl d30,d22,d21 @ Maj(a,b,c) - veor d19,d26 @ Sigma0(a) - vadd.i64 d23,d27 - vadd.i64 d30,d27 - @ vadd.i64 d19,d30 - vshr.u64 d24,d23,#14 @ 13 -#if 13<16 - vld1.64 {d13},[r1]! @ handles unaligned -#endif - vshr.u64 d25,d23,#18 -#if 13>0 - vadd.i64 d19,d30 @ h+=Maj from the past -#endif - vshr.u64 d26,d23,#41 - vld1.64 {d28},[r3,:64]! @ K[i++] - vsli.64 d24,d23,#50 - vsli.64 d25,d23,#46 - vmov d29,d23 - vsli.64 d26,d23,#23 -#if 13<16 && defined(__ARMEL__) - vrev64.8 d13,d13 -#endif - veor d25,d24 - vbsl d29,d16,d17 @ Ch(e,f,g) - vshr.u64 d24,d19,#28 - veor d26,d25 @ Sigma1(e) - vadd.i64 d27,d29,d18 - vshr.u64 d25,d19,#34 - vsli.64 d24,d19,#36 - vadd.i64 d27,d26 - vshr.u64 d26,d19,#39 - vadd.i64 d28,d13 - vsli.64 d25,d19,#30 - veor d30,d19,d20 - vsli.64 d26,d19,#25 - veor d18,d24,d25 - vadd.i64 d27,d28 - vbsl d30,d21,d20 @ Maj(a,b,c) - veor d18,d26 @ Sigma0(a) - vadd.i64 d22,d27 - vadd.i64 d30,d27 - @ vadd.i64 d18,d30 - vshr.u64 d24,d22,#14 @ 14 -#if 14<16 - vld1.64 {d14},[r1]! @ handles unaligned -#endif - vshr.u64 d25,d22,#18 -#if 14>0 - vadd.i64 d18,d30 @ h+=Maj from the past -#endif - vshr.u64 d26,d22,#41 - vld1.64 {d28},[r3,:64]! @ K[i++] - vsli.64 d24,d22,#50 - vsli.64 d25,d22,#46 - vmov d29,d22 - vsli.64 d26,d22,#23 -#if 14<16 && defined(__ARMEL__) - vrev64.8 d14,d14 -#endif - veor d25,d24 - vbsl d29,d23,d16 @ Ch(e,f,g) - vshr.u64 d24,d18,#28 - veor d26,d25 @ Sigma1(e) - vadd.i64 d27,d29,d17 - vshr.u64 d25,d18,#34 - vsli.64 d24,d18,#36 - vadd.i64 d27,d26 - vshr.u64 d26,d18,#39 - vadd.i64 d28,d14 - vsli.64 d25,d18,#30 - veor d30,d18,d19 - vsli.64 d26,d18,#25 - veor d17,d24,d25 - vadd.i64 d27,d28 - vbsl d30,d20,d19 @ Maj(a,b,c) - veor d17,d26 @ Sigma0(a) - vadd.i64 d21,d27 - vadd.i64 d30,d27 - @ vadd.i64 d17,d30 - vshr.u64 d24,d21,#14 @ 15 -#if 15<16 - vld1.64 {d15},[r1]! @ handles unaligned -#endif - vshr.u64 d25,d21,#18 -#if 15>0 - vadd.i64 d17,d30 @ h+=Maj from the past -#endif - vshr.u64 d26,d21,#41 - vld1.64 {d28},[r3,:64]! @ K[i++] - vsli.64 d24,d21,#50 - vsli.64 d25,d21,#46 - vmov d29,d21 - vsli.64 d26,d21,#23 -#if 15<16 && defined(__ARMEL__) - vrev64.8 d15,d15 -#endif - veor d25,d24 - vbsl d29,d22,d23 @ Ch(e,f,g) - vshr.u64 d24,d17,#28 - veor d26,d25 @ Sigma1(e) - vadd.i64 d27,d29,d16 - vshr.u64 d25,d17,#34 - vsli.64 d24,d17,#36 - vadd.i64 d27,d26 - vshr.u64 d26,d17,#39 - vadd.i64 d28,d15 - vsli.64 d25,d17,#30 - veor d30,d17,d18 - vsli.64 d26,d17,#25 - veor d16,d24,d25 - vadd.i64 d27,d28 - vbsl d30,d19,d18 @ Maj(a,b,c) - veor d16,d26 @ Sigma0(a) - vadd.i64 d20,d27 - vadd.i64 d30,d27 - @ vadd.i64 d16,d30 - mov r12,#4 -.L16_79_neon: - subs r12,#1 - vshr.u64 q12,q7,#19 - vshr.u64 q13,q7,#61 - vadd.i64 d16,d30 @ h+=Maj from the past - vshr.u64 q15,q7,#6 - vsli.64 q12,q7,#45 - vext.8 q14,q0,q1,#8 @ X[i+1] - vsli.64 q13,q7,#3 - veor q15,q12 - vshr.u64 q12,q14,#1 - veor q15,q13 @ sigma1(X[i+14]) - vshr.u64 q13,q14,#8 - vadd.i64 q0,q15 - vshr.u64 q15,q14,#7 - vsli.64 q12,q14,#63 - vsli.64 q13,q14,#56 - vext.8 q14,q4,q5,#8 @ X[i+9] - veor q15,q12 - vshr.u64 d24,d20,#14 @ from NEON_00_15 - vadd.i64 q0,q14 - vshr.u64 d25,d20,#18 @ from NEON_00_15 - veor q15,q13 @ sigma0(X[i+1]) - vshr.u64 d26,d20,#41 @ from NEON_00_15 - vadd.i64 q0,q15 - vld1.64 {d28},[r3,:64]! @ K[i++] - vsli.64 d24,d20,#50 - vsli.64 d25,d20,#46 - vmov d29,d20 - vsli.64 d26,d20,#23 -#if 16<16 && defined(__ARMEL__) - vrev64.8 , -#endif - veor d25,d24 - vbsl d29,d21,d22 @ Ch(e,f,g) - vshr.u64 d24,d16,#28 - veor d26,d25 @ Sigma1(e) - vadd.i64 d27,d29,d23 - vshr.u64 d25,d16,#34 - vsli.64 d24,d16,#36 - vadd.i64 d27,d26 - vshr.u64 d26,d16,#39 - vadd.i64 d28,d0 - vsli.64 d25,d16,#30 - veor d30,d16,d17 - vsli.64 d26,d16,#25 - veor d23,d24,d25 - vadd.i64 d27,d28 - vbsl d30,d18,d17 @ Maj(a,b,c) - veor d23,d26 @ Sigma0(a) - vadd.i64 d19,d27 - vadd.i64 d30,d27 - @ vadd.i64 d23,d30 - vshr.u64 d24,d19,#14 @ 17 -#if 17<16 - vld1.64 {d1},[r1]! @ handles unaligned -#endif - vshr.u64 d25,d19,#18 -#if 17>0 - vadd.i64 d23,d30 @ h+=Maj from the past -#endif - vshr.u64 d26,d19,#41 - vld1.64 {d28},[r3,:64]! @ K[i++] - vsli.64 d24,d19,#50 - vsli.64 d25,d19,#46 - vmov d29,d19 - vsli.64 d26,d19,#23 -#if 17<16 && defined(__ARMEL__) - vrev64.8 , -#endif - veor d25,d24 - vbsl d29,d20,d21 @ Ch(e,f,g) - vshr.u64 d24,d23,#28 - veor d26,d25 @ Sigma1(e) - vadd.i64 d27,d29,d22 - vshr.u64 d25,d23,#34 - vsli.64 d24,d23,#36 - vadd.i64 d27,d26 - vshr.u64 d26,d23,#39 - vadd.i64 d28,d1 - vsli.64 d25,d23,#30 - veor d30,d23,d16 - vsli.64 d26,d23,#25 - veor d22,d24,d25 - vadd.i64 d27,d28 - vbsl d30,d17,d16 @ Maj(a,b,c) - veor d22,d26 @ Sigma0(a) - vadd.i64 d18,d27 - vadd.i64 d30,d27 - @ vadd.i64 d22,d30 - vshr.u64 q12,q0,#19 - vshr.u64 q13,q0,#61 - vadd.i64 d22,d30 @ h+=Maj from the past - vshr.u64 q15,q0,#6 - vsli.64 q12,q0,#45 - vext.8 q14,q1,q2,#8 @ X[i+1] - vsli.64 q13,q0,#3 - veor q15,q12 - vshr.u64 q12,q14,#1 - veor q15,q13 @ sigma1(X[i+14]) - vshr.u64 q13,q14,#8 - vadd.i64 q1,q15 - vshr.u64 q15,q14,#7 - vsli.64 q12,q14,#63 - vsli.64 q13,q14,#56 - vext.8 q14,q5,q6,#8 @ X[i+9] - veor q15,q12 - vshr.u64 d24,d18,#14 @ from NEON_00_15 - vadd.i64 q1,q14 - vshr.u64 d25,d18,#18 @ from NEON_00_15 - veor q15,q13 @ sigma0(X[i+1]) - vshr.u64 d26,d18,#41 @ from NEON_00_15 - vadd.i64 q1,q15 - vld1.64 {d28},[r3,:64]! @ K[i++] - vsli.64 d24,d18,#50 - vsli.64 d25,d18,#46 - vmov d29,d18 - vsli.64 d26,d18,#23 -#if 18<16 && defined(__ARMEL__) - vrev64.8 , -#endif - veor d25,d24 - vbsl d29,d19,d20 @ Ch(e,f,g) - vshr.u64 d24,d22,#28 - veor d26,d25 @ Sigma1(e) - vadd.i64 d27,d29,d21 - vshr.u64 d25,d22,#34 - vsli.64 d24,d22,#36 - vadd.i64 d27,d26 - vshr.u64 d26,d22,#39 - vadd.i64 d28,d2 - vsli.64 d25,d22,#30 - veor d30,d22,d23 - vsli.64 d26,d22,#25 - veor d21,d24,d25 - vadd.i64 d27,d28 - vbsl d30,d16,d23 @ Maj(a,b,c) - veor d21,d26 @ Sigma0(a) - vadd.i64 d17,d27 - vadd.i64 d30,d27 - @ vadd.i64 d21,d30 - vshr.u64 d24,d17,#14 @ 19 -#if 19<16 - vld1.64 {d3},[r1]! @ handles unaligned -#endif - vshr.u64 d25,d17,#18 -#if 19>0 - vadd.i64 d21,d30 @ h+=Maj from the past -#endif - vshr.u64 d26,d17,#41 - vld1.64 {d28},[r3,:64]! @ K[i++] - vsli.64 d24,d17,#50 - vsli.64 d25,d17,#46 - vmov d29,d17 - vsli.64 d26,d17,#23 -#if 19<16 && defined(__ARMEL__) - vrev64.8 , -#endif - veor d25,d24 - vbsl d29,d18,d19 @ Ch(e,f,g) - vshr.u64 d24,d21,#28 - veor d26,d25 @ Sigma1(e) - vadd.i64 d27,d29,d20 - vshr.u64 d25,d21,#34 - vsli.64 d24,d21,#36 - vadd.i64 d27,d26 - vshr.u64 d26,d21,#39 - vadd.i64 d28,d3 - vsli.64 d25,d21,#30 - veor d30,d21,d22 - vsli.64 d26,d21,#25 - veor d20,d24,d25 - vadd.i64 d27,d28 - vbsl d30,d23,d22 @ Maj(a,b,c) - veor d20,d26 @ Sigma0(a) - vadd.i64 d16,d27 - vadd.i64 d30,d27 - @ vadd.i64 d20,d30 - vshr.u64 q12,q1,#19 - vshr.u64 q13,q1,#61 - vadd.i64 d20,d30 @ h+=Maj from the past - vshr.u64 q15,q1,#6 - vsli.64 q12,q1,#45 - vext.8 q14,q2,q3,#8 @ X[i+1] - vsli.64 q13,q1,#3 - veor q15,q12 - vshr.u64 q12,q14,#1 - veor q15,q13 @ sigma1(X[i+14]) - vshr.u64 q13,q14,#8 - vadd.i64 q2,q15 - vshr.u64 q15,q14,#7 - vsli.64 q12,q14,#63 - vsli.64 q13,q14,#56 - vext.8 q14,q6,q7,#8 @ X[i+9] - veor q15,q12 - vshr.u64 d24,d16,#14 @ from NEON_00_15 - vadd.i64 q2,q14 - vshr.u64 d25,d16,#18 @ from NEON_00_15 - veor q15,q13 @ sigma0(X[i+1]) - vshr.u64 d26,d16,#41 @ from NEON_00_15 - vadd.i64 q2,q15 - vld1.64 {d28},[r3,:64]! @ K[i++] - vsli.64 d24,d16,#50 - vsli.64 d25,d16,#46 - vmov d29,d16 - vsli.64 d26,d16,#23 -#if 20<16 && defined(__ARMEL__) - vrev64.8 , -#endif - veor d25,d24 - vbsl d29,d17,d18 @ Ch(e,f,g) - vshr.u64 d24,d20,#28 - veor d26,d25 @ Sigma1(e) - vadd.i64 d27,d29,d19 - vshr.u64 d25,d20,#34 - vsli.64 d24,d20,#36 - vadd.i64 d27,d26 - vshr.u64 d26,d20,#39 - vadd.i64 d28,d4 - vsli.64 d25,d20,#30 - veor d30,d20,d21 - vsli.64 d26,d20,#25 - veor d19,d24,d25 - vadd.i64 d27,d28 - vbsl d30,d22,d21 @ Maj(a,b,c) - veor d19,d26 @ Sigma0(a) - vadd.i64 d23,d27 - vadd.i64 d30,d27 - @ vadd.i64 d19,d30 - vshr.u64 d24,d23,#14 @ 21 -#if 21<16 - vld1.64 {d5},[r1]! @ handles unaligned -#endif - vshr.u64 d25,d23,#18 -#if 21>0 - vadd.i64 d19,d30 @ h+=Maj from the past -#endif - vshr.u64 d26,d23,#41 - vld1.64 {d28},[r3,:64]! @ K[i++] - vsli.64 d24,d23,#50 - vsli.64 d25,d23,#46 - vmov d29,d23 - vsli.64 d26,d23,#23 -#if 21<16 && defined(__ARMEL__) - vrev64.8 , -#endif - veor d25,d24 - vbsl d29,d16,d17 @ Ch(e,f,g) - vshr.u64 d24,d19,#28 - veor d26,d25 @ Sigma1(e) - vadd.i64 d27,d29,d18 - vshr.u64 d25,d19,#34 - vsli.64 d24,d19,#36 - vadd.i64 d27,d26 - vshr.u64 d26,d19,#39 - vadd.i64 d28,d5 - vsli.64 d25,d19,#30 - veor d30,d19,d20 - vsli.64 d26,d19,#25 - veor d18,d24,d25 - vadd.i64 d27,d28 - vbsl d30,d21,d20 @ Maj(a,b,c) - veor d18,d26 @ Sigma0(a) - vadd.i64 d22,d27 - vadd.i64 d30,d27 - @ vadd.i64 d18,d30 - vshr.u64 q12,q2,#19 - vshr.u64 q13,q2,#61 - vadd.i64 d18,d30 @ h+=Maj from the past - vshr.u64 q15,q2,#6 - vsli.64 q12,q2,#45 - vext.8 q14,q3,q4,#8 @ X[i+1] - vsli.64 q13,q2,#3 - veor q15,q12 - vshr.u64 q12,q14,#1 - veor q15,q13 @ sigma1(X[i+14]) - vshr.u64 q13,q14,#8 - vadd.i64 q3,q15 - vshr.u64 q15,q14,#7 - vsli.64 q12,q14,#63 - vsli.64 q13,q14,#56 - vext.8 q14,q7,q0,#8 @ X[i+9] - veor q15,q12 - vshr.u64 d24,d22,#14 @ from NEON_00_15 - vadd.i64 q3,q14 - vshr.u64 d25,d22,#18 @ from NEON_00_15 - veor q15,q13 @ sigma0(X[i+1]) - vshr.u64 d26,d22,#41 @ from NEON_00_15 - vadd.i64 q3,q15 - vld1.64 {d28},[r3,:64]! @ K[i++] - vsli.64 d24,d22,#50 - vsli.64 d25,d22,#46 - vmov d29,d22 - vsli.64 d26,d22,#23 -#if 22<16 && defined(__ARMEL__) - vrev64.8 , -#endif - veor d25,d24 - vbsl d29,d23,d16 @ Ch(e,f,g) - vshr.u64 d24,d18,#28 - veor d26,d25 @ Sigma1(e) - vadd.i64 d27,d29,d17 - vshr.u64 d25,d18,#34 - vsli.64 d24,d18,#36 - vadd.i64 d27,d26 - vshr.u64 d26,d18,#39 - vadd.i64 d28,d6 - vsli.64 d25,d18,#30 - veor d30,d18,d19 - vsli.64 d26,d18,#25 - veor d17,d24,d25 - vadd.i64 d27,d28 - vbsl d30,d20,d19 @ Maj(a,b,c) - veor d17,d26 @ Sigma0(a) - vadd.i64 d21,d27 - vadd.i64 d30,d27 - @ vadd.i64 d17,d30 - vshr.u64 d24,d21,#14 @ 23 -#if 23<16 - vld1.64 {d7},[r1]! @ handles unaligned -#endif - vshr.u64 d25,d21,#18 -#if 23>0 - vadd.i64 d17,d30 @ h+=Maj from the past -#endif - vshr.u64 d26,d21,#41 - vld1.64 {d28},[r3,:64]! @ K[i++] - vsli.64 d24,d21,#50 - vsli.64 d25,d21,#46 - vmov d29,d21 - vsli.64 d26,d21,#23 -#if 23<16 && defined(__ARMEL__) - vrev64.8 , -#endif - veor d25,d24 - vbsl d29,d22,d23 @ Ch(e,f,g) - vshr.u64 d24,d17,#28 - veor d26,d25 @ Sigma1(e) - vadd.i64 d27,d29,d16 - vshr.u64 d25,d17,#34 - vsli.64 d24,d17,#36 - vadd.i64 d27,d26 - vshr.u64 d26,d17,#39 - vadd.i64 d28,d7 - vsli.64 d25,d17,#30 - veor d30,d17,d18 - vsli.64 d26,d17,#25 - veor d16,d24,d25 - vadd.i64 d27,d28 - vbsl d30,d19,d18 @ Maj(a,b,c) - veor d16,d26 @ Sigma0(a) - vadd.i64 d20,d27 - vadd.i64 d30,d27 - @ vadd.i64 d16,d30 - vshr.u64 q12,q3,#19 - vshr.u64 q13,q3,#61 - vadd.i64 d16,d30 @ h+=Maj from the past - vshr.u64 q15,q3,#6 - vsli.64 q12,q3,#45 - vext.8 q14,q4,q5,#8 @ X[i+1] - vsli.64 q13,q3,#3 - veor q15,q12 - vshr.u64 q12,q14,#1 - veor q15,q13 @ sigma1(X[i+14]) - vshr.u64 q13,q14,#8 - vadd.i64 q4,q15 - vshr.u64 q15,q14,#7 - vsli.64 q12,q14,#63 - vsli.64 q13,q14,#56 - vext.8 q14,q0,q1,#8 @ X[i+9] - veor q15,q12 - vshr.u64 d24,d20,#14 @ from NEON_00_15 - vadd.i64 q4,q14 - vshr.u64 d25,d20,#18 @ from NEON_00_15 - veor q15,q13 @ sigma0(X[i+1]) - vshr.u64 d26,d20,#41 @ from NEON_00_15 - vadd.i64 q4,q15 - vld1.64 {d28},[r3,:64]! @ K[i++] - vsli.64 d24,d20,#50 - vsli.64 d25,d20,#46 - vmov d29,d20 - vsli.64 d26,d20,#23 -#if 24<16 && defined(__ARMEL__) - vrev64.8 , -#endif - veor d25,d24 - vbsl d29,d21,d22 @ Ch(e,f,g) - vshr.u64 d24,d16,#28 - veor d26,d25 @ Sigma1(e) - vadd.i64 d27,d29,d23 - vshr.u64 d25,d16,#34 - vsli.64 d24,d16,#36 - vadd.i64 d27,d26 - vshr.u64 d26,d16,#39 - vadd.i64 d28,d8 - vsli.64 d25,d16,#30 - veor d30,d16,d17 - vsli.64 d26,d16,#25 - veor d23,d24,d25 - vadd.i64 d27,d28 - vbsl d30,d18,d17 @ Maj(a,b,c) - veor d23,d26 @ Sigma0(a) - vadd.i64 d19,d27 - vadd.i64 d30,d27 - @ vadd.i64 d23,d30 - vshr.u64 d24,d19,#14 @ 25 -#if 25<16 - vld1.64 {d9},[r1]! @ handles unaligned -#endif - vshr.u64 d25,d19,#18 -#if 25>0 - vadd.i64 d23,d30 @ h+=Maj from the past -#endif - vshr.u64 d26,d19,#41 - vld1.64 {d28},[r3,:64]! @ K[i++] - vsli.64 d24,d19,#50 - vsli.64 d25,d19,#46 - vmov d29,d19 - vsli.64 d26,d19,#23 -#if 25<16 && defined(__ARMEL__) - vrev64.8 , -#endif - veor d25,d24 - vbsl d29,d20,d21 @ Ch(e,f,g) - vshr.u64 d24,d23,#28 - veor d26,d25 @ Sigma1(e) - vadd.i64 d27,d29,d22 - vshr.u64 d25,d23,#34 - vsli.64 d24,d23,#36 - vadd.i64 d27,d26 - vshr.u64 d26,d23,#39 - vadd.i64 d28,d9 - vsli.64 d25,d23,#30 - veor d30,d23,d16 - vsli.64 d26,d23,#25 - veor d22,d24,d25 - vadd.i64 d27,d28 - vbsl d30,d17,d16 @ Maj(a,b,c) - veor d22,d26 @ Sigma0(a) - vadd.i64 d18,d27 - vadd.i64 d30,d27 - @ vadd.i64 d22,d30 - vshr.u64 q12,q4,#19 - vshr.u64 q13,q4,#61 - vadd.i64 d22,d30 @ h+=Maj from the past - vshr.u64 q15,q4,#6 - vsli.64 q12,q4,#45 - vext.8 q14,q5,q6,#8 @ X[i+1] - vsli.64 q13,q4,#3 - veor q15,q12 - vshr.u64 q12,q14,#1 - veor q15,q13 @ sigma1(X[i+14]) - vshr.u64 q13,q14,#8 - vadd.i64 q5,q15 - vshr.u64 q15,q14,#7 - vsli.64 q12,q14,#63 - vsli.64 q13,q14,#56 - vext.8 q14,q1,q2,#8 @ X[i+9] - veor q15,q12 - vshr.u64 d24,d18,#14 @ from NEON_00_15 - vadd.i64 q5,q14 - vshr.u64 d25,d18,#18 @ from NEON_00_15 - veor q15,q13 @ sigma0(X[i+1]) - vshr.u64 d26,d18,#41 @ from NEON_00_15 - vadd.i64 q5,q15 - vld1.64 {d28},[r3,:64]! @ K[i++] - vsli.64 d24,d18,#50 - vsli.64 d25,d18,#46 - vmov d29,d18 - vsli.64 d26,d18,#23 -#if 26<16 && defined(__ARMEL__) - vrev64.8 , -#endif - veor d25,d24 - vbsl d29,d19,d20 @ Ch(e,f,g) - vshr.u64 d24,d22,#28 - veor d26,d25 @ Sigma1(e) - vadd.i64 d27,d29,d21 - vshr.u64 d25,d22,#34 - vsli.64 d24,d22,#36 - vadd.i64 d27,d26 - vshr.u64 d26,d22,#39 - vadd.i64 d28,d10 - vsli.64 d25,d22,#30 - veor d30,d22,d23 - vsli.64 d26,d22,#25 - veor d21,d24,d25 - vadd.i64 d27,d28 - vbsl d30,d16,d23 @ Maj(a,b,c) - veor d21,d26 @ Sigma0(a) - vadd.i64 d17,d27 - vadd.i64 d30,d27 - @ vadd.i64 d21,d30 - vshr.u64 d24,d17,#14 @ 27 -#if 27<16 - vld1.64 {d11},[r1]! @ handles unaligned -#endif - vshr.u64 d25,d17,#18 -#if 27>0 - vadd.i64 d21,d30 @ h+=Maj from the past -#endif - vshr.u64 d26,d17,#41 - vld1.64 {d28},[r3,:64]! @ K[i++] - vsli.64 d24,d17,#50 - vsli.64 d25,d17,#46 - vmov d29,d17 - vsli.64 d26,d17,#23 -#if 27<16 && defined(__ARMEL__) - vrev64.8 , -#endif - veor d25,d24 - vbsl d29,d18,d19 @ Ch(e,f,g) - vshr.u64 d24,d21,#28 - veor d26,d25 @ Sigma1(e) - vadd.i64 d27,d29,d20 - vshr.u64 d25,d21,#34 - vsli.64 d24,d21,#36 - vadd.i64 d27,d26 - vshr.u64 d26,d21,#39 - vadd.i64 d28,d11 - vsli.64 d25,d21,#30 - veor d30,d21,d22 - vsli.64 d26,d21,#25 - veor d20,d24,d25 - vadd.i64 d27,d28 - vbsl d30,d23,d22 @ Maj(a,b,c) - veor d20,d26 @ Sigma0(a) - vadd.i64 d16,d27 - vadd.i64 d30,d27 - @ vadd.i64 d20,d30 - vshr.u64 q12,q5,#19 - vshr.u64 q13,q5,#61 - vadd.i64 d20,d30 @ h+=Maj from the past - vshr.u64 q15,q5,#6 - vsli.64 q12,q5,#45 - vext.8 q14,q6,q7,#8 @ X[i+1] - vsli.64 q13,q5,#3 - veor q15,q12 - vshr.u64 q12,q14,#1 - veor q15,q13 @ sigma1(X[i+14]) - vshr.u64 q13,q14,#8 - vadd.i64 q6,q15 - vshr.u64 q15,q14,#7 - vsli.64 q12,q14,#63 - vsli.64 q13,q14,#56 - vext.8 q14,q2,q3,#8 @ X[i+9] - veor q15,q12 - vshr.u64 d24,d16,#14 @ from NEON_00_15 - vadd.i64 q6,q14 - vshr.u64 d25,d16,#18 @ from NEON_00_15 - veor q15,q13 @ sigma0(X[i+1]) - vshr.u64 d26,d16,#41 @ from NEON_00_15 - vadd.i64 q6,q15 - vld1.64 {d28},[r3,:64]! @ K[i++] - vsli.64 d24,d16,#50 - vsli.64 d25,d16,#46 - vmov d29,d16 - vsli.64 d26,d16,#23 -#if 28<16 && defined(__ARMEL__) - vrev64.8 , -#endif - veor d25,d24 - vbsl d29,d17,d18 @ Ch(e,f,g) - vshr.u64 d24,d20,#28 - veor d26,d25 @ Sigma1(e) - vadd.i64 d27,d29,d19 - vshr.u64 d25,d20,#34 - vsli.64 d24,d20,#36 - vadd.i64 d27,d26 - vshr.u64 d26,d20,#39 - vadd.i64 d28,d12 - vsli.64 d25,d20,#30 - veor d30,d20,d21 - vsli.64 d26,d20,#25 - veor d19,d24,d25 - vadd.i64 d27,d28 - vbsl d30,d22,d21 @ Maj(a,b,c) - veor d19,d26 @ Sigma0(a) - vadd.i64 d23,d27 - vadd.i64 d30,d27 - @ vadd.i64 d19,d30 - vshr.u64 d24,d23,#14 @ 29 -#if 29<16 - vld1.64 {d13},[r1]! @ handles unaligned -#endif - vshr.u64 d25,d23,#18 -#if 29>0 - vadd.i64 d19,d30 @ h+=Maj from the past -#endif - vshr.u64 d26,d23,#41 - vld1.64 {d28},[r3,:64]! @ K[i++] - vsli.64 d24,d23,#50 - vsli.64 d25,d23,#46 - vmov d29,d23 - vsli.64 d26,d23,#23 -#if 29<16 && defined(__ARMEL__) - vrev64.8 , -#endif - veor d25,d24 - vbsl d29,d16,d17 @ Ch(e,f,g) - vshr.u64 d24,d19,#28 - veor d26,d25 @ Sigma1(e) - vadd.i64 d27,d29,d18 - vshr.u64 d25,d19,#34 - vsli.64 d24,d19,#36 - vadd.i64 d27,d26 - vshr.u64 d26,d19,#39 - vadd.i64 d28,d13 - vsli.64 d25,d19,#30 - veor d30,d19,d20 - vsli.64 d26,d19,#25 - veor d18,d24,d25 - vadd.i64 d27,d28 - vbsl d30,d21,d20 @ Maj(a,b,c) - veor d18,d26 @ Sigma0(a) - vadd.i64 d22,d27 - vadd.i64 d30,d27 - @ vadd.i64 d18,d30 - vshr.u64 q12,q6,#19 - vshr.u64 q13,q6,#61 - vadd.i64 d18,d30 @ h+=Maj from the past - vshr.u64 q15,q6,#6 - vsli.64 q12,q6,#45 - vext.8 q14,q7,q0,#8 @ X[i+1] - vsli.64 q13,q6,#3 - veor q15,q12 - vshr.u64 q12,q14,#1 - veor q15,q13 @ sigma1(X[i+14]) - vshr.u64 q13,q14,#8 - vadd.i64 q7,q15 - vshr.u64 q15,q14,#7 - vsli.64 q12,q14,#63 - vsli.64 q13,q14,#56 - vext.8 q14,q3,q4,#8 @ X[i+9] - veor q15,q12 - vshr.u64 d24,d22,#14 @ from NEON_00_15 - vadd.i64 q7,q14 - vshr.u64 d25,d22,#18 @ from NEON_00_15 - veor q15,q13 @ sigma0(X[i+1]) - vshr.u64 d26,d22,#41 @ from NEON_00_15 - vadd.i64 q7,q15 - vld1.64 {d28},[r3,:64]! @ K[i++] - vsli.64 d24,d22,#50 - vsli.64 d25,d22,#46 - vmov d29,d22 - vsli.64 d26,d22,#23 -#if 30<16 && defined(__ARMEL__) - vrev64.8 , -#endif - veor d25,d24 - vbsl d29,d23,d16 @ Ch(e,f,g) - vshr.u64 d24,d18,#28 - veor d26,d25 @ Sigma1(e) - vadd.i64 d27,d29,d17 - vshr.u64 d25,d18,#34 - vsli.64 d24,d18,#36 - vadd.i64 d27,d26 - vshr.u64 d26,d18,#39 - vadd.i64 d28,d14 - vsli.64 d25,d18,#30 - veor d30,d18,d19 - vsli.64 d26,d18,#25 - veor d17,d24,d25 - vadd.i64 d27,d28 - vbsl d30,d20,d19 @ Maj(a,b,c) - veor d17,d26 @ Sigma0(a) - vadd.i64 d21,d27 - vadd.i64 d30,d27 - @ vadd.i64 d17,d30 - vshr.u64 d24,d21,#14 @ 31 -#if 31<16 - vld1.64 {d15},[r1]! @ handles unaligned -#endif - vshr.u64 d25,d21,#18 -#if 31>0 - vadd.i64 d17,d30 @ h+=Maj from the past -#endif - vshr.u64 d26,d21,#41 - vld1.64 {d28},[r3,:64]! @ K[i++] - vsli.64 d24,d21,#50 - vsli.64 d25,d21,#46 - vmov d29,d21 - vsli.64 d26,d21,#23 -#if 31<16 && defined(__ARMEL__) - vrev64.8 , -#endif - veor d25,d24 - vbsl d29,d22,d23 @ Ch(e,f,g) - vshr.u64 d24,d17,#28 - veor d26,d25 @ Sigma1(e) - vadd.i64 d27,d29,d16 - vshr.u64 d25,d17,#34 - vsli.64 d24,d17,#36 - vadd.i64 d27,d26 - vshr.u64 d26,d17,#39 - vadd.i64 d28,d15 - vsli.64 d25,d17,#30 - veor d30,d17,d18 - vsli.64 d26,d17,#25 - veor d16,d24,d25 - vadd.i64 d27,d28 - vbsl d30,d19,d18 @ Maj(a,b,c) - veor d16,d26 @ Sigma0(a) - vadd.i64 d20,d27 - vadd.i64 d30,d27 - @ vadd.i64 d16,d30 - bne .L16_79_neon - - vadd.i64 d16,d30 @ h+=Maj from the past - vldmia r0,{d24-d31} @ load context to temp - vadd.i64 q8,q12 @ vectorized accumulate - vadd.i64 q9,q13 - vadd.i64 q10,q14 - vadd.i64 q11,q15 - vstmia r0,{d16-d23} @ save context - teq r1,r2 - sub r3,#640 @ rewind K512 - bne .Loop_neon - - VFP_ABI_POP - bx lr @ .word 0xe12fff1e -.size sha512_block_data_order_neon,.-sha512_block_data_order_neon -#endif -.asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by " -.align 2 -#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) -.comm OPENSSL_armcap_P,4,4 -#endif From 8116138cbfcee80b1bf9b57073278dcd86b44656 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 26 Apr 2021 02:57:32 +0900 Subject: [PATCH 002/142] crypto: arm - use a pattern rule for generating *.S files Unify similar build rules. Signed-off-by: Masahiro Yamada Signed-off-by: Herbert Xu --- arch/arm/crypto/Makefile | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index 51f160c61740f..eafa898ba6a73 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -48,13 +48,7 @@ curve25519-neon-y := curve25519-core.o curve25519-glue.o quiet_cmd_perl = PERL $@ cmd_perl = $(PERL) $(<) > $(@) -$(obj)/poly1305-core.S: $(src)/poly1305-armv4.pl - $(call cmd,perl) - -$(obj)/sha256-core.S: $(src)/sha256-armv4.pl - $(call cmd,perl) - -$(obj)/sha512-core.S: $(src)/sha512-armv4.pl +$(obj)/%-core.S: $(src)/%-armv4.pl $(call cmd,perl) clean-files += poly1305-core.S sha256-core.S sha512-core.S From 12dd461ebd1941afe821539419685ff9dea3a31d Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 26 Apr 2021 02:57:33 +0900 Subject: [PATCH 003/142] crypto: arm64 - generate *.S by Perl at build time instead of shipping them Generate *.S by Perl like arch/{mips,x86}/crypto/Makefile. Signed-off-by: Masahiro Yamada Signed-off-by: Herbert Xu --- arch/arm64/crypto/Makefile | 9 +- arch/arm64/crypto/poly1305-core.S_shipped | 835 --------- arch/arm64/crypto/sha256-core.S_shipped | 2069 --------------------- arch/arm64/crypto/sha512-core.S_shipped | 1093 ----------- 4 files changed, 3 insertions(+), 4003 deletions(-) delete mode 100644 arch/arm64/crypto/poly1305-core.S_shipped delete mode 100644 arch/arm64/crypto/sha256-core.S_shipped delete mode 100644 arch/arm64/crypto/sha512-core.S_shipped diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index d0901e610df3b..592e52a08c623 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -68,19 +68,16 @@ CFLAGS_aes-glue-ce.o := -DUSE_V8_CRYPTO_EXTENSIONS $(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE $(call if_changed_rule,cc_o_c) -ifdef REGENERATE_ARM64_CRYPTO quiet_cmd_perlasm = PERLASM $@ cmd_perlasm = $(PERL) $(<) void $(@) -$(src)/poly1305-core.S_shipped: $(src)/poly1305-armv8.pl +$(obj)/poly1305-core.S: $(src)/poly1305-armv8.pl $(call cmd,perlasm) -$(src)/sha256-core.S_shipped: $(src)/sha512-armv8.pl +$(obj)/sha256-core.S: $(src)/sha512-armv8.pl $(call cmd,perlasm) -$(src)/sha512-core.S_shipped: $(src)/sha512-armv8.pl +$(obj)/sha512-core.S: $(src)/sha512-armv8.pl $(call cmd,perlasm) -endif - clean-files += poly1305-core.S sha256-core.S sha512-core.S diff --git a/arch/arm64/crypto/poly1305-core.S_shipped b/arch/arm64/crypto/poly1305-core.S_shipped deleted file mode 100644 index fb2822abf63aa..0000000000000 --- a/arch/arm64/crypto/poly1305-core.S_shipped +++ /dev/null @@ -1,835 +0,0 @@ -#ifndef __KERNEL__ -# include "arm_arch.h" -.extern OPENSSL_armcap_P -#endif - -.text - -// forward "declarations" are required for Apple -.globl poly1305_blocks -.globl poly1305_emit - -.globl poly1305_init -.type poly1305_init,%function -.align 5 -poly1305_init: - cmp x1,xzr - stp xzr,xzr,[x0] // zero hash value - stp xzr,xzr,[x0,#16] // [along with is_base2_26] - - csel x0,xzr,x0,eq - b.eq .Lno_key - -#ifndef __KERNEL__ - adrp x17,OPENSSL_armcap_P - ldr w17,[x17,#:lo12:OPENSSL_armcap_P] -#endif - - ldp x7,x8,[x1] // load key - mov x9,#0xfffffffc0fffffff - movk x9,#0x0fff,lsl#48 -#ifdef __AARCH64EB__ - rev x7,x7 // flip bytes - rev x8,x8 -#endif - and x7,x7,x9 // &=0ffffffc0fffffff - and x9,x9,#-4 - and x8,x8,x9 // &=0ffffffc0ffffffc - mov w9,#-1 - stp x7,x8,[x0,#32] // save key value - str w9,[x0,#48] // impossible key power value - -#ifndef __KERNEL__ - tst w17,#ARMV7_NEON - - adr x12,.Lpoly1305_blocks - adr x7,.Lpoly1305_blocks_neon - adr x13,.Lpoly1305_emit - - csel x12,x12,x7,eq - -# ifdef __ILP32__ - stp w12,w13,[x2] -# else - stp x12,x13,[x2] -# endif -#endif - mov x0,#1 -.Lno_key: - ret -.size poly1305_init,.-poly1305_init - -.type poly1305_blocks,%function -.align 5 -poly1305_blocks: -.Lpoly1305_blocks: - ands x2,x2,#-16 - b.eq .Lno_data - - ldp x4,x5,[x0] // load hash value - ldp x6,x17,[x0,#16] // [along with is_base2_26] - ldp x7,x8,[x0,#32] // load key value - -#ifdef __AARCH64EB__ - lsr x12,x4,#32 - mov w13,w4 - lsr x14,x5,#32 - mov w15,w5 - lsr x16,x6,#32 -#else - mov w12,w4 - lsr x13,x4,#32 - mov w14,w5 - lsr x15,x5,#32 - mov w16,w6 -#endif - - add x12,x12,x13,lsl#26 // base 2^26 -> base 2^64 - lsr x13,x14,#12 - adds x12,x12,x14,lsl#52 - add x13,x13,x15,lsl#14 - adc x13,x13,xzr - lsr x14,x16,#24 - adds x13,x13,x16,lsl#40 - adc x14,x14,xzr - - cmp x17,#0 // is_base2_26? - add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2) - csel x4,x4,x12,eq // choose between radixes - csel x5,x5,x13,eq - csel x6,x6,x14,eq - -.Loop: - ldp x10,x11,[x1],#16 // load input - sub x2,x2,#16 -#ifdef __AARCH64EB__ - rev x10,x10 - rev x11,x11 -#endif - adds x4,x4,x10 // accumulate input - adcs x5,x5,x11 - - mul x12,x4,x7 // h0*r0 - adc x6,x6,x3 - umulh x13,x4,x7 - - mul x10,x5,x9 // h1*5*r1 - umulh x11,x5,x9 - - adds x12,x12,x10 - mul x10,x4,x8 // h0*r1 - adc x13,x13,x11 - umulh x14,x4,x8 - - adds x13,x13,x10 - mul x10,x5,x7 // h1*r0 - adc x14,x14,xzr - umulh x11,x5,x7 - - adds x13,x13,x10 - mul x10,x6,x9 // h2*5*r1 - adc x14,x14,x11 - mul x11,x6,x7 // h2*r0 - - adds x13,x13,x10 - adc x14,x14,x11 - - and x10,x14,#-4 // final reduction - and x6,x14,#3 - add x10,x10,x14,lsr#2 - adds x4,x12,x10 - adcs x5,x13,xzr - adc x6,x6,xzr - - cbnz x2,.Loop - - stp x4,x5,[x0] // store hash value - stp x6,xzr,[x0,#16] // [and clear is_base2_26] - -.Lno_data: - ret -.size poly1305_blocks,.-poly1305_blocks - -.type poly1305_emit,%function -.align 5 -poly1305_emit: -.Lpoly1305_emit: - ldp x4,x5,[x0] // load hash base 2^64 - ldp x6,x7,[x0,#16] // [along with is_base2_26] - ldp x10,x11,[x2] // load nonce - -#ifdef __AARCH64EB__ - lsr x12,x4,#32 - mov w13,w4 - lsr x14,x5,#32 - mov w15,w5 - lsr x16,x6,#32 -#else - mov w12,w4 - lsr x13,x4,#32 - mov w14,w5 - lsr x15,x5,#32 - mov w16,w6 -#endif - - add x12,x12,x13,lsl#26 // base 2^26 -> base 2^64 - lsr x13,x14,#12 - adds x12,x12,x14,lsl#52 - add x13,x13,x15,lsl#14 - adc x13,x13,xzr - lsr x14,x16,#24 - adds x13,x13,x16,lsl#40 - adc x14,x14,xzr - - cmp x7,#0 // is_base2_26? - csel x4,x4,x12,eq // choose between radixes - csel x5,x5,x13,eq - csel x6,x6,x14,eq - - adds x12,x4,#5 // compare to modulus - adcs x13,x5,xzr - adc x14,x6,xzr - - tst x14,#-4 // see if it's carried/borrowed - - csel x4,x4,x12,eq - csel x5,x5,x13,eq - -#ifdef __AARCH64EB__ - ror x10,x10,#32 // flip nonce words - ror x11,x11,#32 -#endif - adds x4,x4,x10 // accumulate nonce - adc x5,x5,x11 -#ifdef __AARCH64EB__ - rev x4,x4 // flip output bytes - rev x5,x5 -#endif - stp x4,x5,[x1] // write result - - ret -.size poly1305_emit,.-poly1305_emit -.type poly1305_mult,%function -.align 5 -poly1305_mult: - mul x12,x4,x7 // h0*r0 - umulh x13,x4,x7 - - mul x10,x5,x9 // h1*5*r1 - umulh x11,x5,x9 - - adds x12,x12,x10 - mul x10,x4,x8 // h0*r1 - adc x13,x13,x11 - umulh x14,x4,x8 - - adds x13,x13,x10 - mul x10,x5,x7 // h1*r0 - adc x14,x14,xzr - umulh x11,x5,x7 - - adds x13,x13,x10 - mul x10,x6,x9 // h2*5*r1 - adc x14,x14,x11 - mul x11,x6,x7 // h2*r0 - - adds x13,x13,x10 - adc x14,x14,x11 - - and x10,x14,#-4 // final reduction - and x6,x14,#3 - add x10,x10,x14,lsr#2 - adds x4,x12,x10 - adcs x5,x13,xzr - adc x6,x6,xzr - - ret -.size poly1305_mult,.-poly1305_mult - -.type poly1305_splat,%function -.align 4 -poly1305_splat: - and x12,x4,#0x03ffffff // base 2^64 -> base 2^26 - ubfx x13,x4,#26,#26 - extr x14,x5,x4,#52 - and x14,x14,#0x03ffffff - ubfx x15,x5,#14,#26 - extr x16,x6,x5,#40 - - str w12,[x0,#16*0] // r0 - add w12,w13,w13,lsl#2 // r1*5 - str w13,[x0,#16*1] // r1 - add w13,w14,w14,lsl#2 // r2*5 - str w12,[x0,#16*2] // s1 - str w14,[x0,#16*3] // r2 - add w14,w15,w15,lsl#2 // r3*5 - str w13,[x0,#16*4] // s2 - str w15,[x0,#16*5] // r3 - add w15,w16,w16,lsl#2 // r4*5 - str w14,[x0,#16*6] // s3 - str w16,[x0,#16*7] // r4 - str w15,[x0,#16*8] // s4 - - ret -.size poly1305_splat,.-poly1305_splat - -#ifdef __KERNEL__ -.globl poly1305_blocks_neon -#endif -.type poly1305_blocks_neon,%function -.align 5 -poly1305_blocks_neon: -.Lpoly1305_blocks_neon: - ldr x17,[x0,#24] - cmp x2,#128 - b.lo .Lpoly1305_blocks - - .inst 0xd503233f // paciasp - stp x29,x30,[sp,#-80]! - add x29,sp,#0 - - stp d8,d9,[sp,#16] // meet ABI requirements - stp d10,d11,[sp,#32] - stp d12,d13,[sp,#48] - stp d14,d15,[sp,#64] - - cbz x17,.Lbase2_64_neon - - ldp w10,w11,[x0] // load hash value base 2^26 - ldp w12,w13,[x0,#8] - ldr w14,[x0,#16] - - tst x2,#31 - b.eq .Leven_neon - - ldp x7,x8,[x0,#32] // load key value - - add x4,x10,x11,lsl#26 // base 2^26 -> base 2^64 - lsr x5,x12,#12 - adds x4,x4,x12,lsl#52 - add x5,x5,x13,lsl#14 - adc x5,x5,xzr - lsr x6,x14,#24 - adds x5,x5,x14,lsl#40 - adc x14,x6,xzr // can be partially reduced... - - ldp x12,x13,[x1],#16 // load input - sub x2,x2,#16 - add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2) - -#ifdef __AARCH64EB__ - rev x12,x12 - rev x13,x13 -#endif - adds x4,x4,x12 // accumulate input - adcs x5,x5,x13 - adc x6,x6,x3 - - bl poly1305_mult - - and x10,x4,#0x03ffffff // base 2^64 -> base 2^26 - ubfx x11,x4,#26,#26 - extr x12,x5,x4,#52 - and x12,x12,#0x03ffffff - ubfx x13,x5,#14,#26 - extr x14,x6,x5,#40 - - b .Leven_neon - -.align 4 -.Lbase2_64_neon: - ldp x7,x8,[x0,#32] // load key value - - ldp x4,x5,[x0] // load hash value base 2^64 - ldr x6,[x0,#16] - - tst x2,#31 - b.eq .Linit_neon - - ldp x12,x13,[x1],#16 // load input - sub x2,x2,#16 - add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2) -#ifdef __AARCH64EB__ - rev x12,x12 - rev x13,x13 -#endif - adds x4,x4,x12 // accumulate input - adcs x5,x5,x13 - adc x6,x6,x3 - - bl poly1305_mult - -.Linit_neon: - ldr w17,[x0,#48] // first table element - and x10,x4,#0x03ffffff // base 2^64 -> base 2^26 - ubfx x11,x4,#26,#26 - extr x12,x5,x4,#52 - and x12,x12,#0x03ffffff - ubfx x13,x5,#14,#26 - extr x14,x6,x5,#40 - - cmp w17,#-1 // is value impossible? - b.ne .Leven_neon - - fmov d24,x10 - fmov d25,x11 - fmov d26,x12 - fmov d27,x13 - fmov d28,x14 - - ////////////////////////////////// initialize r^n table - mov x4,x7 // r^1 - add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2) - mov x5,x8 - mov x6,xzr - add x0,x0,#48+12 - bl poly1305_splat - - bl poly1305_mult // r^2 - sub x0,x0,#4 - bl poly1305_splat - - bl poly1305_mult // r^3 - sub x0,x0,#4 - bl poly1305_splat - - bl poly1305_mult // r^4 - sub x0,x0,#4 - bl poly1305_splat - sub x0,x0,#48 // restore original x0 - b .Ldo_neon - -.align 4 -.Leven_neon: - fmov d24,x10 - fmov d25,x11 - fmov d26,x12 - fmov d27,x13 - fmov d28,x14 - -.Ldo_neon: - ldp x8,x12,[x1,#32] // inp[2:3] - subs x2,x2,#64 - ldp x9,x13,[x1,#48] - add x16,x1,#96 - adr x17,.Lzeros - - lsl x3,x3,#24 - add x15,x0,#48 - -#ifdef __AARCH64EB__ - rev x8,x8 - rev x12,x12 - rev x9,x9 - rev x13,x13 -#endif - and x4,x8,#0x03ffffff // base 2^64 -> base 2^26 - and x5,x9,#0x03ffffff - ubfx x6,x8,#26,#26 - ubfx x7,x9,#26,#26 - add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32 - extr x8,x12,x8,#52 - extr x9,x13,x9,#52 - add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32 - fmov d14,x4 - and x8,x8,#0x03ffffff - and x9,x9,#0x03ffffff - ubfx x10,x12,#14,#26 - ubfx x11,x13,#14,#26 - add x12,x3,x12,lsr#40 - add x13,x3,x13,lsr#40 - add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32 - fmov d15,x6 - add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32 - add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32 - fmov d16,x8 - fmov d17,x10 - fmov d18,x12 - - ldp x8,x12,[x1],#16 // inp[0:1] - ldp x9,x13,[x1],#48 - - ld1 {v0.4s,v1.4s,v2.4s,v3.4s},[x15],#64 - ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x15],#64 - ld1 {v8.4s},[x15] - -#ifdef __AARCH64EB__ - rev x8,x8 - rev x12,x12 - rev x9,x9 - rev x13,x13 -#endif - and x4,x8,#0x03ffffff // base 2^64 -> base 2^26 - and x5,x9,#0x03ffffff - ubfx x6,x8,#26,#26 - ubfx x7,x9,#26,#26 - add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32 - extr x8,x12,x8,#52 - extr x9,x13,x9,#52 - add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32 - fmov d9,x4 - and x8,x8,#0x03ffffff - and x9,x9,#0x03ffffff - ubfx x10,x12,#14,#26 - ubfx x11,x13,#14,#26 - add x12,x3,x12,lsr#40 - add x13,x3,x13,lsr#40 - add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32 - fmov d10,x6 - add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32 - add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32 - movi v31.2d,#-1 - fmov d11,x8 - fmov d12,x10 - fmov d13,x12 - ushr v31.2d,v31.2d,#38 - - b.ls .Lskip_loop - -.align 4 -.Loop_neon: - //////////////////////////////////////////////////////////////// - // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2 - // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r - // ___________________/ - // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2 - // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r - // ___________________/ ____________________/ - // - // Note that we start with inp[2:3]*r^2. This is because it - // doesn't depend on reduction in previous iteration. - //////////////////////////////////////////////////////////////// - // d4 = h0*r4 + h1*r3 + h2*r2 + h3*r1 + h4*r0 - // d3 = h0*r3 + h1*r2 + h2*r1 + h3*r0 + h4*5*r4 - // d2 = h0*r2 + h1*r1 + h2*r0 + h3*5*r4 + h4*5*r3 - // d1 = h0*r1 + h1*r0 + h2*5*r4 + h3*5*r3 + h4*5*r2 - // d0 = h0*r0 + h1*5*r4 + h2*5*r3 + h3*5*r2 + h4*5*r1 - - subs x2,x2,#64 - umull v23.2d,v14.2s,v7.s[2] - csel x16,x17,x16,lo - umull v22.2d,v14.2s,v5.s[2] - umull v21.2d,v14.2s,v3.s[2] - ldp x8,x12,[x16],#16 // inp[2:3] (or zero) - umull v20.2d,v14.2s,v1.s[2] - ldp x9,x13,[x16],#48 - umull v19.2d,v14.2s,v0.s[2] -#ifdef __AARCH64EB__ - rev x8,x8 - rev x12,x12 - rev x9,x9 - rev x13,x13 -#endif - - umlal v23.2d,v15.2s,v5.s[2] - and x4,x8,#0x03ffffff // base 2^64 -> base 2^26 - umlal v22.2d,v15.2s,v3.s[2] - and x5,x9,#0x03ffffff - umlal v21.2d,v15.2s,v1.s[2] - ubfx x6,x8,#26,#26 - umlal v20.2d,v15.2s,v0.s[2] - ubfx x7,x9,#26,#26 - umlal v19.2d,v15.2s,v8.s[2] - add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32 - - umlal v23.2d,v16.2s,v3.s[2] - extr x8,x12,x8,#52 - umlal v22.2d,v16.2s,v1.s[2] - extr x9,x13,x9,#52 - umlal v21.2d,v16.2s,v0.s[2] - add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32 - umlal v20.2d,v16.2s,v8.s[2] - fmov d14,x4 - umlal v19.2d,v16.2s,v6.s[2] - and x8,x8,#0x03ffffff - - umlal v23.2d,v17.2s,v1.s[2] - and x9,x9,#0x03ffffff - umlal v22.2d,v17.2s,v0.s[2] - ubfx x10,x12,#14,#26 - umlal v21.2d,v17.2s,v8.s[2] - ubfx x11,x13,#14,#26 - umlal v20.2d,v17.2s,v6.s[2] - add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32 - umlal v19.2d,v17.2s,v4.s[2] - fmov d15,x6 - - add v11.2s,v11.2s,v26.2s - add x12,x3,x12,lsr#40 - umlal v23.2d,v18.2s,v0.s[2] - add x13,x3,x13,lsr#40 - umlal v22.2d,v18.2s,v8.s[2] - add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32 - umlal v21.2d,v18.2s,v6.s[2] - add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32 - umlal v20.2d,v18.2s,v4.s[2] - fmov d16,x8 - umlal v19.2d,v18.2s,v2.s[2] - fmov d17,x10 - - //////////////////////////////////////////////////////////////// - // (hash+inp[0:1])*r^4 and accumulate - - add v9.2s,v9.2s,v24.2s - fmov d18,x12 - umlal v22.2d,v11.2s,v1.s[0] - ldp x8,x12,[x1],#16 // inp[0:1] - umlal v19.2d,v11.2s,v6.s[0] - ldp x9,x13,[x1],#48 - umlal v23.2d,v11.2s,v3.s[0] - umlal v20.2d,v11.2s,v8.s[0] - umlal v21.2d,v11.2s,v0.s[0] -#ifdef __AARCH64EB__ - rev x8,x8 - rev x12,x12 - rev x9,x9 - rev x13,x13 -#endif - - add v10.2s,v10.2s,v25.2s - umlal v22.2d,v9.2s,v5.s[0] - umlal v23.2d,v9.2s,v7.s[0] - and x4,x8,#0x03ffffff // base 2^64 -> base 2^26 - umlal v21.2d,v9.2s,v3.s[0] - and x5,x9,#0x03ffffff - umlal v19.2d,v9.2s,v0.s[0] - ubfx x6,x8,#26,#26 - umlal v20.2d,v9.2s,v1.s[0] - ubfx x7,x9,#26,#26 - - add v12.2s,v12.2s,v27.2s - add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32 - umlal v22.2d,v10.2s,v3.s[0] - extr x8,x12,x8,#52 - umlal v23.2d,v10.2s,v5.s[0] - extr x9,x13,x9,#52 - umlal v19.2d,v10.2s,v8.s[0] - add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32 - umlal v21.2d,v10.2s,v1.s[0] - fmov d9,x4 - umlal v20.2d,v10.2s,v0.s[0] - and x8,x8,#0x03ffffff - - add v13.2s,v13.2s,v28.2s - and x9,x9,#0x03ffffff - umlal v22.2d,v12.2s,v0.s[0] - ubfx x10,x12,#14,#26 - umlal v19.2d,v12.2s,v4.s[0] - ubfx x11,x13,#14,#26 - umlal v23.2d,v12.2s,v1.s[0] - add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32 - umlal v20.2d,v12.2s,v6.s[0] - fmov d10,x6 - umlal v21.2d,v12.2s,v8.s[0] - add x12,x3,x12,lsr#40 - - umlal v22.2d,v13.2s,v8.s[0] - add x13,x3,x13,lsr#40 - umlal v19.2d,v13.2s,v2.s[0] - add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32 - umlal v23.2d,v13.2s,v0.s[0] - add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32 - umlal v20.2d,v13.2s,v4.s[0] - fmov d11,x8 - umlal v21.2d,v13.2s,v6.s[0] - fmov d12,x10 - fmov d13,x12 - - ///////////////////////////////////////////////////////////////// - // lazy reduction as discussed in "NEON crypto" by D.J. Bernstein - // and P. Schwabe - // - // [see discussion in poly1305-armv4 module] - - ushr v29.2d,v22.2d,#26 - xtn v27.2s,v22.2d - ushr v30.2d,v19.2d,#26 - and v19.16b,v19.16b,v31.16b - add v23.2d,v23.2d,v29.2d // h3 -> h4 - bic v27.2s,#0xfc,lsl#24 // &=0x03ffffff - add v20.2d,v20.2d,v30.2d // h0 -> h1 - - ushr v29.2d,v23.2d,#26 - xtn v28.2s,v23.2d - ushr v30.2d,v20.2d,#26 - xtn v25.2s,v20.2d - bic v28.2s,#0xfc,lsl#24 - add v21.2d,v21.2d,v30.2d // h1 -> h2 - - add v19.2d,v19.2d,v29.2d - shl v29.2d,v29.2d,#2 - shrn v30.2s,v21.2d,#26 - xtn v26.2s,v21.2d - add v19.2d,v19.2d,v29.2d // h4 -> h0 - bic v25.2s,#0xfc,lsl#24 - add v27.2s,v27.2s,v30.2s // h2 -> h3 - bic v26.2s,#0xfc,lsl#24 - - shrn v29.2s,v19.2d,#26 - xtn v24.2s,v19.2d - ushr v30.2s,v27.2s,#26 - bic v27.2s,#0xfc,lsl#24 - bic v24.2s,#0xfc,lsl#24 - add v25.2s,v25.2s,v29.2s // h0 -> h1 - add v28.2s,v28.2s,v30.2s // h3 -> h4 - - b.hi .Loop_neon - -.Lskip_loop: - dup v16.2d,v16.d[0] - add v11.2s,v11.2s,v26.2s - - //////////////////////////////////////////////////////////////// - // multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1 - - adds x2,x2,#32 - b.ne .Long_tail - - dup v16.2d,v11.d[0] - add v14.2s,v9.2s,v24.2s - add v17.2s,v12.2s,v27.2s - add v15.2s,v10.2s,v25.2s - add v18.2s,v13.2s,v28.2s - -.Long_tail: - dup v14.2d,v14.d[0] - umull2 v19.2d,v16.4s,v6.4s - umull2 v22.2d,v16.4s,v1.4s - umull2 v23.2d,v16.4s,v3.4s - umull2 v21.2d,v16.4s,v0.4s - umull2 v20.2d,v16.4s,v8.4s - - dup v15.2d,v15.d[0] - umlal2 v19.2d,v14.4s,v0.4s - umlal2 v21.2d,v14.4s,v3.4s - umlal2 v22.2d,v14.4s,v5.4s - umlal2 v23.2d,v14.4s,v7.4s - umlal2 v20.2d,v14.4s,v1.4s - - dup v17.2d,v17.d[0] - umlal2 v19.2d,v15.4s,v8.4s - umlal2 v22.2d,v15.4s,v3.4s - umlal2 v21.2d,v15.4s,v1.4s - umlal2 v23.2d,v15.4s,v5.4s - umlal2 v20.2d,v15.4s,v0.4s - - dup v18.2d,v18.d[0] - umlal2 v22.2d,v17.4s,v0.4s - umlal2 v23.2d,v17.4s,v1.4s - umlal2 v19.2d,v17.4s,v4.4s - umlal2 v20.2d,v17.4s,v6.4s - umlal2 v21.2d,v17.4s,v8.4s - - umlal2 v22.2d,v18.4s,v8.4s - umlal2 v19.2d,v18.4s,v2.4s - umlal2 v23.2d,v18.4s,v0.4s - umlal2 v20.2d,v18.4s,v4.4s - umlal2 v21.2d,v18.4s,v6.4s - - b.eq .Lshort_tail - - //////////////////////////////////////////////////////////////// - // (hash+inp[0:1])*r^4:r^3 and accumulate - - add v9.2s,v9.2s,v24.2s - umlal v22.2d,v11.2s,v1.2s - umlal v19.2d,v11.2s,v6.2s - umlal v23.2d,v11.2s,v3.2s - umlal v20.2d,v11.2s,v8.2s - umlal v21.2d,v11.2s,v0.2s - - add v10.2s,v10.2s,v25.2s - umlal v22.2d,v9.2s,v5.2s - umlal v19.2d,v9.2s,v0.2s - umlal v23.2d,v9.2s,v7.2s - umlal v20.2d,v9.2s,v1.2s - umlal v21.2d,v9.2s,v3.2s - - add v12.2s,v12.2s,v27.2s - umlal v22.2d,v10.2s,v3.2s - umlal v19.2d,v10.2s,v8.2s - umlal v23.2d,v10.2s,v5.2s - umlal v20.2d,v10.2s,v0.2s - umlal v21.2d,v10.2s,v1.2s - - add v13.2s,v13.2s,v28.2s - umlal v22.2d,v12.2s,v0.2s - umlal v19.2d,v12.2s,v4.2s - umlal v23.2d,v12.2s,v1.2s - umlal v20.2d,v12.2s,v6.2s - umlal v21.2d,v12.2s,v8.2s - - umlal v22.2d,v13.2s,v8.2s - umlal v19.2d,v13.2s,v2.2s - umlal v23.2d,v13.2s,v0.2s - umlal v20.2d,v13.2s,v4.2s - umlal v21.2d,v13.2s,v6.2s - -.Lshort_tail: - //////////////////////////////////////////////////////////////// - // horizontal add - - addp v22.2d,v22.2d,v22.2d - ldp d8,d9,[sp,#16] // meet ABI requirements - addp v19.2d,v19.2d,v19.2d - ldp d10,d11,[sp,#32] - addp v23.2d,v23.2d,v23.2d - ldp d12,d13,[sp,#48] - addp v20.2d,v20.2d,v20.2d - ldp d14,d15,[sp,#64] - addp v21.2d,v21.2d,v21.2d - ldr x30,[sp,#8] - - //////////////////////////////////////////////////////////////// - // lazy reduction, but without narrowing - - ushr v29.2d,v22.2d,#26 - and v22.16b,v22.16b,v31.16b - ushr v30.2d,v19.2d,#26 - and v19.16b,v19.16b,v31.16b - - add v23.2d,v23.2d,v29.2d // h3 -> h4 - add v20.2d,v20.2d,v30.2d // h0 -> h1 - - ushr v29.2d,v23.2d,#26 - and v23.16b,v23.16b,v31.16b - ushr v30.2d,v20.2d,#26 - and v20.16b,v20.16b,v31.16b - add v21.2d,v21.2d,v30.2d // h1 -> h2 - - add v19.2d,v19.2d,v29.2d - shl v29.2d,v29.2d,#2 - ushr v30.2d,v21.2d,#26 - and v21.16b,v21.16b,v31.16b - add v19.2d,v19.2d,v29.2d // h4 -> h0 - add v22.2d,v22.2d,v30.2d // h2 -> h3 - - ushr v29.2d,v19.2d,#26 - and v19.16b,v19.16b,v31.16b - ushr v30.2d,v22.2d,#26 - and v22.16b,v22.16b,v31.16b - add v20.2d,v20.2d,v29.2d // h0 -> h1 - add v23.2d,v23.2d,v30.2d // h3 -> h4 - - //////////////////////////////////////////////////////////////// - // write the result, can be partially reduced - - st4 {v19.s,v20.s,v21.s,v22.s}[0],[x0],#16 - mov x4,#1 - st1 {v23.s}[0],[x0] - str x4,[x0,#8] // set is_base2_26 - - ldr x29,[sp],#80 - .inst 0xd50323bf // autiasp - ret -.size poly1305_blocks_neon,.-poly1305_blocks_neon - -.align 5 -.Lzeros: -.long 0,0,0,0,0,0,0,0 -.asciz "Poly1305 for ARMv8, CRYPTOGAMS by @dot-asm" -.align 2 -#if !defined(__KERNEL__) && !defined(_WIN64) -.comm OPENSSL_armcap_P,4,4 -.hidden OPENSSL_armcap_P -#endif diff --git a/arch/arm64/crypto/sha256-core.S_shipped b/arch/arm64/crypto/sha256-core.S_shipped deleted file mode 100644 index 7c7ce2e3bad6b..0000000000000 --- a/arch/arm64/crypto/sha256-core.S_shipped +++ /dev/null @@ -1,2069 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -// This code is taken from the OpenSSL project but the author (Andy Polyakov) -// has relicensed it under the GPLv2. Therefore this program is free software; -// you can redistribute it and/or modify it under the terms of the GNU General -// Public License version 2 as published by the Free Software Foundation. -// -// The original headers, including the original license headers, are -// included below for completeness. - -// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved. -// -// Licensed under the OpenSSL license (the "License"). You may not use -// this file except in compliance with the License. You can obtain a copy -// in the file LICENSE in the source distribution or at -// https://www.openssl.org/source/license.html - -// ==================================================================== -// Written by Andy Polyakov for the OpenSSL -// project. The module is, however, dual licensed under OpenSSL and -// CRYPTOGAMS licenses depending on where you obtain it. For further -// details see http://www.openssl.org/~appro/cryptogams/. -// ==================================================================== -// -// SHA256/512 for ARMv8. -// -// Performance in cycles per processed byte and improvement coefficient -// over code generated with "default" compiler: -// -// SHA256-hw SHA256(*) SHA512 -// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**)) -// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***)) -// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***)) -// Denver 2.01 10.5 (+26%) 6.70 (+8%) -// X-Gene 20.0 (+100%) 12.8 (+300%(***)) -// Mongoose 2.36 13.0 (+50%) 8.36 (+33%) -// -// (*) Software SHA256 results are of lesser relevance, presented -// mostly for informational purposes. -// (**) The result is a trade-off: it's possible to improve it by -// 10% (or by 1 cycle per round), but at the cost of 20% loss -// on Cortex-A53 (or by 4 cycles per round). -// (***) Super-impressive coefficients over gcc-generated code are -// indication of some compiler "pathology", most notably code -// generated with -mgeneral-regs-only is significanty faster -// and the gap is only 40-90%. -// -// October 2016. -// -// Originally it was reckoned that it makes no sense to implement NEON -// version of SHA256 for 64-bit processors. This is because performance -// improvement on most wide-spread Cortex-A5x processors was observed -// to be marginal, same on Cortex-A53 and ~10% on A57. But then it was -// observed that 32-bit NEON SHA256 performs significantly better than -// 64-bit scalar version on *some* of the more recent processors. As -// result 64-bit NEON version of SHA256 was added to provide best -// all-round performance. For example it executes ~30% faster on X-Gene -// and Mongoose. [For reference, NEON version of SHA512 is bound to -// deliver much less improvement, likely *negative* on Cortex-A5x. -// Which is why NEON support is limited to SHA256.] - -#ifndef __KERNEL__ -# include "arm_arch.h" -#endif - -.text - -.extern OPENSSL_armcap_P -.globl sha256_block_data_order -.type sha256_block_data_order,%function -.align 6 -sha256_block_data_order: -#ifndef __KERNEL__ -# ifdef __ILP32__ - ldrsw x16,.LOPENSSL_armcap_P -# else - ldr x16,.LOPENSSL_armcap_P -# endif - adr x17,.LOPENSSL_armcap_P - add x16,x16,x17 - ldr w16,[x16] - tst w16,#ARMV8_SHA256 - b.ne .Lv8_entry - tst w16,#ARMV7_NEON - b.ne .Lneon_entry -#endif - stp x29,x30,[sp,#-128]! - add x29,sp,#0 - - stp x19,x20,[sp,#16] - stp x21,x22,[sp,#32] - stp x23,x24,[sp,#48] - stp x25,x26,[sp,#64] - stp x27,x28,[sp,#80] - sub sp,sp,#4*4 - - ldp w20,w21,[x0] // load context - ldp w22,w23,[x0,#2*4] - ldp w24,w25,[x0,#4*4] - add x2,x1,x2,lsl#6 // end of input - ldp w26,w27,[x0,#6*4] - adr x30,.LK256 - stp x0,x2,[x29,#96] - -.Loop: - ldp w3,w4,[x1],#2*4 - ldr w19,[x30],#4 // *K++ - eor w28,w21,w22 // magic seed - str x1,[x29,#112] -#ifndef __AARCH64EB__ - rev w3,w3 // 0 -#endif - ror w16,w24,#6 - add w27,w27,w19 // h+=K[i] - eor w6,w24,w24,ror#14 - and w17,w25,w24 - bic w19,w26,w24 - add w27,w27,w3 // h+=X[i] - orr w17,w17,w19 // Ch(e,f,g) - eor w19,w20,w21 // a^b, b^c in next round - eor w16,w16,w6,ror#11 // Sigma1(e) - ror w6,w20,#2 - add w27,w27,w17 // h+=Ch(e,f,g) - eor w17,w20,w20,ror#9 - add w27,w27,w16 // h+=Sigma1(e) - and w28,w28,w19 // (b^c)&=(a^b) - add w23,w23,w27 // d+=h - eor w28,w28,w21 // Maj(a,b,c) - eor w17,w6,w17,ror#13 // Sigma0(a) - add w27,w27,w28 // h+=Maj(a,b,c) - ldr w28,[x30],#4 // *K++, w19 in next round - //add w27,w27,w17 // h+=Sigma0(a) -#ifndef __AARCH64EB__ - rev w4,w4 // 1 -#endif - ldp w5,w6,[x1],#2*4 - add w27,w27,w17 // h+=Sigma0(a) - ror w16,w23,#6 - add w26,w26,w28 // h+=K[i] - eor w7,w23,w23,ror#14 - and w17,w24,w23 - bic w28,w25,w23 - add w26,w26,w4 // h+=X[i] - orr w17,w17,w28 // Ch(e,f,g) - eor w28,w27,w20 // a^b, b^c in next round - eor w16,w16,w7,ror#11 // Sigma1(e) - ror w7,w27,#2 - add w26,w26,w17 // h+=Ch(e,f,g) - eor w17,w27,w27,ror#9 - add w26,w26,w16 // h+=Sigma1(e) - and w19,w19,w28 // (b^c)&=(a^b) - add w22,w22,w26 // d+=h - eor w19,w19,w20 // Maj(a,b,c) - eor w17,w7,w17,ror#13 // Sigma0(a) - add w26,w26,w19 // h+=Maj(a,b,c) - ldr w19,[x30],#4 // *K++, w28 in next round - //add w26,w26,w17 // h+=Sigma0(a) -#ifndef __AARCH64EB__ - rev w5,w5 // 2 -#endif - add w26,w26,w17 // h+=Sigma0(a) - ror w16,w22,#6 - add w25,w25,w19 // h+=K[i] - eor w8,w22,w22,ror#14 - and w17,w23,w22 - bic w19,w24,w22 - add w25,w25,w5 // h+=X[i] - orr w17,w17,w19 // Ch(e,f,g) - eor w19,w26,w27 // a^b, b^c in next round - eor w16,w16,w8,ror#11 // Sigma1(e) - ror w8,w26,#2 - add w25,w25,w17 // h+=Ch(e,f,g) - eor w17,w26,w26,ror#9 - add w25,w25,w16 // h+=Sigma1(e) - and w28,w28,w19 // (b^c)&=(a^b) - add w21,w21,w25 // d+=h - eor w28,w28,w27 // Maj(a,b,c) - eor w17,w8,w17,ror#13 // Sigma0(a) - add w25,w25,w28 // h+=Maj(a,b,c) - ldr w28,[x30],#4 // *K++, w19 in next round - //add w25,w25,w17 // h+=Sigma0(a) -#ifndef __AARCH64EB__ - rev w6,w6 // 3 -#endif - ldp w7,w8,[x1],#2*4 - add w25,w25,w17 // h+=Sigma0(a) - ror w16,w21,#6 - add w24,w24,w28 // h+=K[i] - eor w9,w21,w21,ror#14 - and w17,w22,w21 - bic w28,w23,w21 - add w24,w24,w6 // h+=X[i] - orr w17,w17,w28 // Ch(e,f,g) - eor w28,w25,w26 // a^b, b^c in next round - eor w16,w16,w9,ror#11 // Sigma1(e) - ror w9,w25,#2 - add w24,w24,w17 // h+=Ch(e,f,g) - eor w17,w25,w25,ror#9 - add w24,w24,w16 // h+=Sigma1(e) - and w19,w19,w28 // (b^c)&=(a^b) - add w20,w20,w24 // d+=h - eor w19,w19,w26 // Maj(a,b,c) - eor w17,w9,w17,ror#13 // Sigma0(a) - add w24,w24,w19 // h+=Maj(a,b,c) - ldr w19,[x30],#4 // *K++, w28 in next round - //add w24,w24,w17 // h+=Sigma0(a) -#ifndef __AARCH64EB__ - rev w7,w7 // 4 -#endif - add w24,w24,w17 // h+=Sigma0(a) - ror w16,w20,#6 - add w23,w23,w19 // h+=K[i] - eor w10,w20,w20,ror#14 - and w17,w21,w20 - bic w19,w22,w20 - add w23,w23,w7 // h+=X[i] - orr w17,w17,w19 // Ch(e,f,g) - eor w19,w24,w25 // a^b, b^c in next round - eor w16,w16,w10,ror#11 // Sigma1(e) - ror w10,w24,#2 - add w23,w23,w17 // h+=Ch(e,f,g) - eor w17,w24,w24,ror#9 - add w23,w23,w16 // h+=Sigma1(e) - and w28,w28,w19 // (b^c)&=(a^b) - add w27,w27,w23 // d+=h - eor w28,w28,w25 // Maj(a,b,c) - eor w17,w10,w17,ror#13 // Sigma0(a) - add w23,w23,w28 // h+=Maj(a,b,c) - ldr w28,[x30],#4 // *K++, w19 in next round - //add w23,w23,w17 // h+=Sigma0(a) -#ifndef __AARCH64EB__ - rev w8,w8 // 5 -#endif - ldp w9,w10,[x1],#2*4 - add w23,w23,w17 // h+=Sigma0(a) - ror w16,w27,#6 - add w22,w22,w28 // h+=K[i] - eor w11,w27,w27,ror#14 - and w17,w20,w27 - bic w28,w21,w27 - add w22,w22,w8 // h+=X[i] - orr w17,w17,w28 // Ch(e,f,g) - eor w28,w23,w24 // a^b, b^c in next round - eor w16,w16,w11,ror#11 // Sigma1(e) - ror w11,w23,#2 - add w22,w22,w17 // h+=Ch(e,f,g) - eor w17,w23,w23,ror#9 - add w22,w22,w16 // h+=Sigma1(e) - and w19,w19,w28 // (b^c)&=(a^b) - add w26,w26,w22 // d+=h - eor w19,w19,w24 // Maj(a,b,c) - eor w17,w11,w17,ror#13 // Sigma0(a) - add w22,w22,w19 // h+=Maj(a,b,c) - ldr w19,[x30],#4 // *K++, w28 in next round - //add w22,w22,w17 // h+=Sigma0(a) -#ifndef __AARCH64EB__ - rev w9,w9 // 6 -#endif - add w22,w22,w17 // h+=Sigma0(a) - ror w16,w26,#6 - add w21,w21,w19 // h+=K[i] - eor w12,w26,w26,ror#14 - and w17,w27,w26 - bic w19,w20,w26 - add w21,w21,w9 // h+=X[i] - orr w17,w17,w19 // Ch(e,f,g) - eor w19,w22,w23 // a^b, b^c in next round - eor w16,w16,w12,ror#11 // Sigma1(e) - ror w12,w22,#2 - add w21,w21,w17 // h+=Ch(e,f,g) - eor w17,w22,w22,ror#9 - add w21,w21,w16 // h+=Sigma1(e) - and w28,w28,w19 // (b^c)&=(a^b) - add w25,w25,w21 // d+=h - eor w28,w28,w23 // Maj(a,b,c) - eor w17,w12,w17,ror#13 // Sigma0(a) - add w21,w21,w28 // h+=Maj(a,b,c) - ldr w28,[x30],#4 // *K++, w19 in next round - //add w21,w21,w17 // h+=Sigma0(a) -#ifndef __AARCH64EB__ - rev w10,w10 // 7 -#endif - ldp w11,w12,[x1],#2*4 - add w21,w21,w17 // h+=Sigma0(a) - ror w16,w25,#6 - add w20,w20,w28 // h+=K[i] - eor w13,w25,w25,ror#14 - and w17,w26,w25 - bic w28,w27,w25 - add w20,w20,w10 // h+=X[i] - orr w17,w17,w28 // Ch(e,f,g) - eor w28,w21,w22 // a^b, b^c in next round - eor w16,w16,w13,ror#11 // Sigma1(e) - ror w13,w21,#2 - add w20,w20,w17 // h+=Ch(e,f,g) - eor w17,w21,w21,ror#9 - add w20,w20,w16 // h+=Sigma1(e) - and w19,w19,w28 // (b^c)&=(a^b) - add w24,w24,w20 // d+=h - eor w19,w19,w22 // Maj(a,b,c) - eor w17,w13,w17,ror#13 // Sigma0(a) - add w20,w20,w19 // h+=Maj(a,b,c) - ldr w19,[x30],#4 // *K++, w28 in next round - //add w20,w20,w17 // h+=Sigma0(a) -#ifndef __AARCH64EB__ - rev w11,w11 // 8 -#endif - add w20,w20,w17 // h+=Sigma0(a) - ror w16,w24,#6 - add w27,w27,w19 // h+=K[i] - eor w14,w24,w24,ror#14 - and w17,w25,w24 - bic w19,w26,w24 - add w27,w27,w11 // h+=X[i] - orr w17,w17,w19 // Ch(e,f,g) - eor w19,w20,w21 // a^b, b^c in next round - eor w16,w16,w14,ror#11 // Sigma1(e) - ror w14,w20,#2 - add w27,w27,w17 // h+=Ch(e,f,g) - eor w17,w20,w20,ror#9 - add w27,w27,w16 // h+=Sigma1(e) - and w28,w28,w19 // (b^c)&=(a^b) - add w23,w23,w27 // d+=h - eor w28,w28,w21 // Maj(a,b,c) - eor w17,w14,w17,ror#13 // Sigma0(a) - add w27,w27,w28 // h+=Maj(a,b,c) - ldr w28,[x30],#4 // *K++, w19 in next round - //add w27,w27,w17 // h+=Sigma0(a) -#ifndef __AARCH64EB__ - rev w12,w12 // 9 -#endif - ldp w13,w14,[x1],#2*4 - add w27,w27,w17 // h+=Sigma0(a) - ror w16,w23,#6 - add w26,w26,w28 // h+=K[i] - eor w15,w23,w23,ror#14 - and w17,w24,w23 - bic w28,w25,w23 - add w26,w26,w12 // h+=X[i] - orr w17,w17,w28 // Ch(e,f,g) - eor w28,w27,w20 // a^b, b^c in next round - eor w16,w16,w15,ror#11 // Sigma1(e) - ror w15,w27,#2 - add w26,w26,w17 // h+=Ch(e,f,g) - eor w17,w27,w27,ror#9 - add w26,w26,w16 // h+=Sigma1(e) - and w19,w19,w28 // (b^c)&=(a^b) - add w22,w22,w26 // d+=h - eor w19,w19,w20 // Maj(a,b,c) - eor w17,w15,w17,ror#13 // Sigma0(a) - add w26,w26,w19 // h+=Maj(a,b,c) - ldr w19,[x30],#4 // *K++, w28 in next round - //add w26,w26,w17 // h+=Sigma0(a) -#ifndef __AARCH64EB__ - rev w13,w13 // 10 -#endif - add w26,w26,w17 // h+=Sigma0(a) - ror w16,w22,#6 - add w25,w25,w19 // h+=K[i] - eor w0,w22,w22,ror#14 - and w17,w23,w22 - bic w19,w24,w22 - add w25,w25,w13 // h+=X[i] - orr w17,w17,w19 // Ch(e,f,g) - eor w19,w26,w27 // a^b, b^c in next round - eor w16,w16,w0,ror#11 // Sigma1(e) - ror w0,w26,#2 - add w25,w25,w17 // h+=Ch(e,f,g) - eor w17,w26,w26,ror#9 - add w25,w25,w16 // h+=Sigma1(e) - and w28,w28,w19 // (b^c)&=(a^b) - add w21,w21,w25 // d+=h - eor w28,w28,w27 // Maj(a,b,c) - eor w17,w0,w17,ror#13 // Sigma0(a) - add w25,w25,w28 // h+=Maj(a,b,c) - ldr w28,[x30],#4 // *K++, w19 in next round - //add w25,w25,w17 // h+=Sigma0(a) -#ifndef __AARCH64EB__ - rev w14,w14 // 11 -#endif - ldp w15,w0,[x1],#2*4 - add w25,w25,w17 // h+=Sigma0(a) - str w6,[sp,#12] - ror w16,w21,#6 - add w24,w24,w28 // h+=K[i] - eor w6,w21,w21,ror#14 - and w17,w22,w21 - bic w28,w23,w21 - add w24,w24,w14 // h+=X[i] - orr w17,w17,w28 // Ch(e,f,g) - eor w28,w25,w26 // a^b, b^c in next round - eor w16,w16,w6,ror#11 // Sigma1(e) - ror w6,w25,#2 - add w24,w24,w17 // h+=Ch(e,f,g) - eor w17,w25,w25,ror#9 - add w24,w24,w16 // h+=Sigma1(e) - and w19,w19,w28 // (b^c)&=(a^b) - add w20,w20,w24 // d+=h - eor w19,w19,w26 // Maj(a,b,c) - eor w17,w6,w17,ror#13 // Sigma0(a) - add w24,w24,w19 // h+=Maj(a,b,c) - ldr w19,[x30],#4 // *K++, w28 in next round - //add w24,w24,w17 // h+=Sigma0(a) -#ifndef __AARCH64EB__ - rev w15,w15 // 12 -#endif - add w24,w24,w17 // h+=Sigma0(a) - str w7,[sp,#0] - ror w16,w20,#6 - add w23,w23,w19 // h+=K[i] - eor w7,w20,w20,ror#14 - and w17,w21,w20 - bic w19,w22,w20 - add w23,w23,w15 // h+=X[i] - orr w17,w17,w19 // Ch(e,f,g) - eor w19,w24,w25 // a^b, b^c in next round - eor w16,w16,w7,ror#11 // Sigma1(e) - ror w7,w24,#2 - add w23,w23,w17 // h+=Ch(e,f,g) - eor w17,w24,w24,ror#9 - add w23,w23,w16 // h+=Sigma1(e) - and w28,w28,w19 // (b^c)&=(a^b) - add w27,w27,w23 // d+=h - eor w28,w28,w25 // Maj(a,b,c) - eor w17,w7,w17,ror#13 // Sigma0(a) - add w23,w23,w28 // h+=Maj(a,b,c) - ldr w28,[x30],#4 // *K++, w19 in next round - //add w23,w23,w17 // h+=Sigma0(a) -#ifndef __AARCH64EB__ - rev w0,w0 // 13 -#endif - ldp w1,w2,[x1] - add w23,w23,w17 // h+=Sigma0(a) - str w8,[sp,#4] - ror w16,w27,#6 - add w22,w22,w28 // h+=K[i] - eor w8,w27,w27,ror#14 - and w17,w20,w27 - bic w28,w21,w27 - add w22,w22,w0 // h+=X[i] - orr w17,w17,w28 // Ch(e,f,g) - eor w28,w23,w24 // a^b, b^c in next round - eor w16,w16,w8,ror#11 // Sigma1(e) - ror w8,w23,#2 - add w22,w22,w17 // h+=Ch(e,f,g) - eor w17,w23,w23,ror#9 - add w22,w22,w16 // h+=Sigma1(e) - and w19,w19,w28 // (b^c)&=(a^b) - add w26,w26,w22 // d+=h - eor w19,w19,w24 // Maj(a,b,c) - eor w17,w8,w17,ror#13 // Sigma0(a) - add w22,w22,w19 // h+=Maj(a,b,c) - ldr w19,[x30],#4 // *K++, w28 in next round - //add w22,w22,w17 // h+=Sigma0(a) -#ifndef __AARCH64EB__ - rev w1,w1 // 14 -#endif - ldr w6,[sp,#12] - add w22,w22,w17 // h+=Sigma0(a) - str w9,[sp,#8] - ror w16,w26,#6 - add w21,w21,w19 // h+=K[i] - eor w9,w26,w26,ror#14 - and w17,w27,w26 - bic w19,w20,w26 - add w21,w21,w1 // h+=X[i] - orr w17,w17,w19 // Ch(e,f,g) - eor w19,w22,w23 // a^b, b^c in next round - eor w16,w16,w9,ror#11 // Sigma1(e) - ror w9,w22,#2 - add w21,w21,w17 // h+=Ch(e,f,g) - eor w17,w22,w22,ror#9 - add w21,w21,w16 // h+=Sigma1(e) - and w28,w28,w19 // (b^c)&=(a^b) - add w25,w25,w21 // d+=h - eor w28,w28,w23 // Maj(a,b,c) - eor w17,w9,w17,ror#13 // Sigma0(a) - add w21,w21,w28 // h+=Maj(a,b,c) - ldr w28,[x30],#4 // *K++, w19 in next round - //add w21,w21,w17 // h+=Sigma0(a) -#ifndef __AARCH64EB__ - rev w2,w2 // 15 -#endif - ldr w7,[sp,#0] - add w21,w21,w17 // h+=Sigma0(a) - str w10,[sp,#12] - ror w16,w25,#6 - add w20,w20,w28 // h+=K[i] - ror w9,w4,#7 - and w17,w26,w25 - ror w8,w1,#17 - bic w28,w27,w25 - ror w10,w21,#2 - add w20,w20,w2 // h+=X[i] - eor w16,w16,w25,ror#11 - eor w9,w9,w4,ror#18 - orr w17,w17,w28 // Ch(e,f,g) - eor w28,w21,w22 // a^b, b^c in next round - eor w16,w16,w25,ror#25 // Sigma1(e) - eor w10,w10,w21,ror#13 - add w20,w20,w17 // h+=Ch(e,f,g) - and w19,w19,w28 // (b^c)&=(a^b) - eor w8,w8,w1,ror#19 - eor w9,w9,w4,lsr#3 // sigma0(X[i+1]) - add w20,w20,w16 // h+=Sigma1(e) - eor w19,w19,w22 // Maj(a,b,c) - eor w17,w10,w21,ror#22 // Sigma0(a) - eor w8,w8,w1,lsr#10 // sigma1(X[i+14]) - add w3,w3,w12 - add w24,w24,w20 // d+=h - add w20,w20,w19 // h+=Maj(a,b,c) - ldr w19,[x30],#4 // *K++, w28 in next round - add w3,w3,w9 - add w20,w20,w17 // h+=Sigma0(a) - add w3,w3,w8 -.Loop_16_xx: - ldr w8,[sp,#4] - str w11,[sp,#0] - ror w16,w24,#6 - add w27,w27,w19 // h+=K[i] - ror w10,w5,#7 - and w17,w25,w24 - ror w9,w2,#17 - bic w19,w26,w24 - ror w11,w20,#2 - add w27,w27,w3 // h+=X[i] - eor w16,w16,w24,ror#11 - eor w10,w10,w5,ror#18 - orr w17,w17,w19 // Ch(e,f,g) - eor w19,w20,w21 // a^b, b^c in next round - eor w16,w16,w24,ror#25 // Sigma1(e) - eor w11,w11,w20,ror#13 - add w27,w27,w17 // h+=Ch(e,f,g) - and w28,w28,w19 // (b^c)&=(a^b) - eor w9,w9,w2,ror#19 - eor w10,w10,w5,lsr#3 // sigma0(X[i+1]) - add w27,w27,w16 // h+=Sigma1(e) - eor w28,w28,w21 // Maj(a,b,c) - eor w17,w11,w20,ror#22 // Sigma0(a) - eor w9,w9,w2,lsr#10 // sigma1(X[i+14]) - add w4,w4,w13 - add w23,w23,w27 // d+=h - add w27,w27,w28 // h+=Maj(a,b,c) - ldr w28,[x30],#4 // *K++, w19 in next round - add w4,w4,w10 - add w27,w27,w17 // h+=Sigma0(a) - add w4,w4,w9 - ldr w9,[sp,#8] - str w12,[sp,#4] - ror w16,w23,#6 - add w26,w26,w28 // h+=K[i] - ror w11,w6,#7 - and w17,w24,w23 - ror w10,w3,#17 - bic w28,w25,w23 - ror w12,w27,#2 - add w26,w26,w4 // h+=X[i] - eor w16,w16,w23,ror#11 - eor w11,w11,w6,ror#18 - orr w17,w17,w28 // Ch(e,f,g) - eor w28,w27,w20 // a^b, b^c in next round - eor w16,w16,w23,ror#25 // Sigma1(e) - eor w12,w12,w27,ror#13 - add w26,w26,w17 // h+=Ch(e,f,g) - and w19,w19,w28 // (b^c)&=(a^b) - eor w10,w10,w3,ror#19 - eor w11,w11,w6,lsr#3 // sigma0(X[i+1]) - add w26,w26,w16 // h+=Sigma1(e) - eor w19,w19,w20 // Maj(a,b,c) - eor w17,w12,w27,ror#22 // Sigma0(a) - eor w10,w10,w3,lsr#10 // sigma1(X[i+14]) - add w5,w5,w14 - add w22,w22,w26 // d+=h - add w26,w26,w19 // h+=Maj(a,b,c) - ldr w19,[x30],#4 // *K++, w28 in next round - add w5,w5,w11 - add w26,w26,w17 // h+=Sigma0(a) - add w5,w5,w10 - ldr w10,[sp,#12] - str w13,[sp,#8] - ror w16,w22,#6 - add w25,w25,w19 // h+=K[i] - ror w12,w7,#7 - and w17,w23,w22 - ror w11,w4,#17 - bic w19,w24,w22 - ror w13,w26,#2 - add w25,w25,w5 // h+=X[i] - eor w16,w16,w22,ror#11 - eor w12,w12,w7,ror#18 - orr w17,w17,w19 // Ch(e,f,g) - eor w19,w26,w27 // a^b, b^c in next round - eor w16,w16,w22,ror#25 // Sigma1(e) - eor w13,w13,w26,ror#13 - add w25,w25,w17 // h+=Ch(e,f,g) - and w28,w28,w19 // (b^c)&=(a^b) - eor w11,w11,w4,ror#19 - eor w12,w12,w7,lsr#3 // sigma0(X[i+1]) - add w25,w25,w16 // h+=Sigma1(e) - eor w28,w28,w27 // Maj(a,b,c) - eor w17,w13,w26,ror#22 // Sigma0(a) - eor w11,w11,w4,lsr#10 // sigma1(X[i+14]) - add w6,w6,w15 - add w21,w21,w25 // d+=h - add w25,w25,w28 // h+=Maj(a,b,c) - ldr w28,[x30],#4 // *K++, w19 in next round - add w6,w6,w12 - add w25,w25,w17 // h+=Sigma0(a) - add w6,w6,w11 - ldr w11,[sp,#0] - str w14,[sp,#12] - ror w16,w21,#6 - add w24,w24,w28 // h+=K[i] - ror w13,w8,#7 - and w17,w22,w21 - ror w12,w5,#17 - bic w28,w23,w21 - ror w14,w25,#2 - add w24,w24,w6 // h+=X[i] - eor w16,w16,w21,ror#11 - eor w13,w13,w8,ror#18 - orr w17,w17,w28 // Ch(e,f,g) - eor w28,w25,w26 // a^b, b^c in next round - eor w16,w16,w21,ror#25 // Sigma1(e) - eor w14,w14,w25,ror#13 - add w24,w24,w17 // h+=Ch(e,f,g) - and w19,w19,w28 // (b^c)&=(a^b) - eor w12,w12,w5,ror#19 - eor w13,w13,w8,lsr#3 // sigma0(X[i+1]) - add w24,w24,w16 // h+=Sigma1(e) - eor w19,w19,w26 // Maj(a,b,c) - eor w17,w14,w25,ror#22 // Sigma0(a) - eor w12,w12,w5,lsr#10 // sigma1(X[i+14]) - add w7,w7,w0 - add w20,w20,w24 // d+=h - add w24,w24,w19 // h+=Maj(a,b,c) - ldr w19,[x30],#4 // *K++, w28 in next round - add w7,w7,w13 - add w24,w24,w17 // h+=Sigma0(a) - add w7,w7,w12 - ldr w12,[sp,#4] - str w15,[sp,#0] - ror w16,w20,#6 - add w23,w23,w19 // h+=K[i] - ror w14,w9,#7 - and w17,w21,w20 - ror w13,w6,#17 - bic w19,w22,w20 - ror w15,w24,#2 - add w23,w23,w7 // h+=X[i] - eor w16,w16,w20,ror#11 - eor w14,w14,w9,ror#18 - orr w17,w17,w19 // Ch(e,f,g) - eor w19,w24,w25 // a^b, b^c in next round - eor w16,w16,w20,ror#25 // Sigma1(e) - eor w15,w15,w24,ror#13 - add w23,w23,w17 // h+=Ch(e,f,g) - and w28,w28,w19 // (b^c)&=(a^b) - eor w13,w13,w6,ror#19 - eor w14,w14,w9,lsr#3 // sigma0(X[i+1]) - add w23,w23,w16 // h+=Sigma1(e) - eor w28,w28,w25 // Maj(a,b,c) - eor w17,w15,w24,ror#22 // Sigma0(a) - eor w13,w13,w6,lsr#10 // sigma1(X[i+14]) - add w8,w8,w1 - add w27,w27,w23 // d+=h - add w23,w23,w28 // h+=Maj(a,b,c) - ldr w28,[x30],#4 // *K++, w19 in next round - add w8,w8,w14 - add w23,w23,w17 // h+=Sigma0(a) - add w8,w8,w13 - ldr w13,[sp,#8] - str w0,[sp,#4] - ror w16,w27,#6 - add w22,w22,w28 // h+=K[i] - ror w15,w10,#7 - and w17,w20,w27 - ror w14,w7,#17 - bic w28,w21,w27 - ror w0,w23,#2 - add w22,w22,w8 // h+=X[i] - eor w16,w16,w27,ror#11 - eor w15,w15,w10,ror#18 - orr w17,w17,w28 // Ch(e,f,g) - eor w28,w23,w24 // a^b, b^c in next round - eor w16,w16,w27,ror#25 // Sigma1(e) - eor w0,w0,w23,ror#13 - add w22,w22,w17 // h+=Ch(e,f,g) - and w19,w19,w28 // (b^c)&=(a^b) - eor w14,w14,w7,ror#19 - eor w15,w15,w10,lsr#3 // sigma0(X[i+1]) - add w22,w22,w16 // h+=Sigma1(e) - eor w19,w19,w24 // Maj(a,b,c) - eor w17,w0,w23,ror#22 // Sigma0(a) - eor w14,w14,w7,lsr#10 // sigma1(X[i+14]) - add w9,w9,w2 - add w26,w26,w22 // d+=h - add w22,w22,w19 // h+=Maj(a,b,c) - ldr w19,[x30],#4 // *K++, w28 in next round - add w9,w9,w15 - add w22,w22,w17 // h+=Sigma0(a) - add w9,w9,w14 - ldr w14,[sp,#12] - str w1,[sp,#8] - ror w16,w26,#6 - add w21,w21,w19 // h+=K[i] - ror w0,w11,#7 - and w17,w27,w26 - ror w15,w8,#17 - bic w19,w20,w26 - ror w1,w22,#2 - add w21,w21,w9 // h+=X[i] - eor w16,w16,w26,ror#11 - eor w0,w0,w11,ror#18 - orr w17,w17,w19 // Ch(e,f,g) - eor w19,w22,w23 // a^b, b^c in next round - eor w16,w16,w26,ror#25 // Sigma1(e) - eor w1,w1,w22,ror#13 - add w21,w21,w17 // h+=Ch(e,f,g) - and w28,w28,w19 // (b^c)&=(a^b) - eor w15,w15,w8,ror#19 - eor w0,w0,w11,lsr#3 // sigma0(X[i+1]) - add w21,w21,w16 // h+=Sigma1(e) - eor w28,w28,w23 // Maj(a,b,c) - eor w17,w1,w22,ror#22 // Sigma0(a) - eor w15,w15,w8,lsr#10 // sigma1(X[i+14]) - add w10,w10,w3 - add w25,w25,w21 // d+=h - add w21,w21,w28 // h+=Maj(a,b,c) - ldr w28,[x30],#4 // *K++, w19 in next round - add w10,w10,w0 - add w21,w21,w17 // h+=Sigma0(a) - add w10,w10,w15 - ldr w15,[sp,#0] - str w2,[sp,#12] - ror w16,w25,#6 - add w20,w20,w28 // h+=K[i] - ror w1,w12,#7 - and w17,w26,w25 - ror w0,w9,#17 - bic w28,w27,w25 - ror w2,w21,#2 - add w20,w20,w10 // h+=X[i] - eor w16,w16,w25,ror#11 - eor w1,w1,w12,ror#18 - orr w17,w17,w28 // Ch(e,f,g) - eor w28,w21,w22 // a^b, b^c in next round - eor w16,w16,w25,ror#25 // Sigma1(e) - eor w2,w2,w21,ror#13 - add w20,w20,w17 // h+=Ch(e,f,g) - and w19,w19,w28 // (b^c)&=(a^b) - eor w0,w0,w9,ror#19 - eor w1,w1,w12,lsr#3 // sigma0(X[i+1]) - add w20,w20,w16 // h+=Sigma1(e) - eor w19,w19,w22 // Maj(a,b,c) - eor w17,w2,w21,ror#22 // Sigma0(a) - eor w0,w0,w9,lsr#10 // sigma1(X[i+14]) - add w11,w11,w4 - add w24,w24,w20 // d+=h - add w20,w20,w19 // h+=Maj(a,b,c) - ldr w19,[x30],#4 // *K++, w28 in next round - add w11,w11,w1 - add w20,w20,w17 // h+=Sigma0(a) - add w11,w11,w0 - ldr w0,[sp,#4] - str w3,[sp,#0] - ror w16,w24,#6 - add w27,w27,w19 // h+=K[i] - ror w2,w13,#7 - and w17,w25,w24 - ror w1,w10,#17 - bic w19,w26,w24 - ror w3,w20,#2 - add w27,w27,w11 // h+=X[i] - eor w16,w16,w24,ror#11 - eor w2,w2,w13,ror#18 - orr w17,w17,w19 // Ch(e,f,g) - eor w19,w20,w21 // a^b, b^c in next round - eor w16,w16,w24,ror#25 // Sigma1(e) - eor w3,w3,w20,ror#13 - add w27,w27,w17 // h+=Ch(e,f,g) - and w28,w28,w19 // (b^c)&=(a^b) - eor w1,w1,w10,ror#19 - eor w2,w2,w13,lsr#3 // sigma0(X[i+1]) - add w27,w27,w16 // h+=Sigma1(e) - eor w28,w28,w21 // Maj(a,b,c) - eor w17,w3,w20,ror#22 // Sigma0(a) - eor w1,w1,w10,lsr#10 // sigma1(X[i+14]) - add w12,w12,w5 - add w23,w23,w27 // d+=h - add w27,w27,w28 // h+=Maj(a,b,c) - ldr w28,[x30],#4 // *K++, w19 in next round - add w12,w12,w2 - add w27,w27,w17 // h+=Sigma0(a) - add w12,w12,w1 - ldr w1,[sp,#8] - str w4,[sp,#4] - ror w16,w23,#6 - add w26,w26,w28 // h+=K[i] - ror w3,w14,#7 - and w17,w24,w23 - ror w2,w11,#17 - bic w28,w25,w23 - ror w4,w27,#2 - add w26,w26,w12 // h+=X[i] - eor w16,w16,w23,ror#11 - eor w3,w3,w14,ror#18 - orr w17,w17,w28 // Ch(e,f,g) - eor w28,w27,w20 // a^b, b^c in next round - eor w16,w16,w23,ror#25 // Sigma1(e) - eor w4,w4,w27,ror#13 - add w26,w26,w17 // h+=Ch(e,f,g) - and w19,w19,w28 // (b^c)&=(a^b) - eor w2,w2,w11,ror#19 - eor w3,w3,w14,lsr#3 // sigma0(X[i+1]) - add w26,w26,w16 // h+=Sigma1(e) - eor w19,w19,w20 // Maj(a,b,c) - eor w17,w4,w27,ror#22 // Sigma0(a) - eor w2,w2,w11,lsr#10 // sigma1(X[i+14]) - add w13,w13,w6 - add w22,w22,w26 // d+=h - add w26,w26,w19 // h+=Maj(a,b,c) - ldr w19,[x30],#4 // *K++, w28 in next round - add w13,w13,w3 - add w26,w26,w17 // h+=Sigma0(a) - add w13,w13,w2 - ldr w2,[sp,#12] - str w5,[sp,#8] - ror w16,w22,#6 - add w25,w25,w19 // h+=K[i] - ror w4,w15,#7 - and w17,w23,w22 - ror w3,w12,#17 - bic w19,w24,w22 - ror w5,w26,#2 - add w25,w25,w13 // h+=X[i] - eor w16,w16,w22,ror#11 - eor w4,w4,w15,ror#18 - orr w17,w17,w19 // Ch(e,f,g) - eor w19,w26,w27 // a^b, b^c in next round - eor w16,w16,w22,ror#25 // Sigma1(e) - eor w5,w5,w26,ror#13 - add w25,w25,w17 // h+=Ch(e,f,g) - and w28,w28,w19 // (b^c)&=(a^b) - eor w3,w3,w12,ror#19 - eor w4,w4,w15,lsr#3 // sigma0(X[i+1]) - add w25,w25,w16 // h+=Sigma1(e) - eor w28,w28,w27 // Maj(a,b,c) - eor w17,w5,w26,ror#22 // Sigma0(a) - eor w3,w3,w12,lsr#10 // sigma1(X[i+14]) - add w14,w14,w7 - add w21,w21,w25 // d+=h - add w25,w25,w28 // h+=Maj(a,b,c) - ldr w28,[x30],#4 // *K++, w19 in next round - add w14,w14,w4 - add w25,w25,w17 // h+=Sigma0(a) - add w14,w14,w3 - ldr w3,[sp,#0] - str w6,[sp,#12] - ror w16,w21,#6 - add w24,w24,w28 // h+=K[i] - ror w5,w0,#7 - and w17,w22,w21 - ror w4,w13,#17 - bic w28,w23,w21 - ror w6,w25,#2 - add w24,w24,w14 // h+=X[i] - eor w16,w16,w21,ror#11 - eor w5,w5,w0,ror#18 - orr w17,w17,w28 // Ch(e,f,g) - eor w28,w25,w26 // a^b, b^c in next round - eor w16,w16,w21,ror#25 // Sigma1(e) - eor w6,w6,w25,ror#13 - add w24,w24,w17 // h+=Ch(e,f,g) - and w19,w19,w28 // (b^c)&=(a^b) - eor w4,w4,w13,ror#19 - eor w5,w5,w0,lsr#3 // sigma0(X[i+1]) - add w24,w24,w16 // h+=Sigma1(e) - eor w19,w19,w26 // Maj(a,b,c) - eor w17,w6,w25,ror#22 // Sigma0(a) - eor w4,w4,w13,lsr#10 // sigma1(X[i+14]) - add w15,w15,w8 - add w20,w20,w24 // d+=h - add w24,w24,w19 // h+=Maj(a,b,c) - ldr w19,[x30],#4 // *K++, w28 in next round - add w15,w15,w5 - add w24,w24,w17 // h+=Sigma0(a) - add w15,w15,w4 - ldr w4,[sp,#4] - str w7,[sp,#0] - ror w16,w20,#6 - add w23,w23,w19 // h+=K[i] - ror w6,w1,#7 - and w17,w21,w20 - ror w5,w14,#17 - bic w19,w22,w20 - ror w7,w24,#2 - add w23,w23,w15 // h+=X[i] - eor w16,w16,w20,ror#11 - eor w6,w6,w1,ror#18 - orr w17,w17,w19 // Ch(e,f,g) - eor w19,w24,w25 // a^b, b^c in next round - eor w16,w16,w20,ror#25 // Sigma1(e) - eor w7,w7,w24,ror#13 - add w23,w23,w17 // h+=Ch(e,f,g) - and w28,w28,w19 // (b^c)&=(a^b) - eor w5,w5,w14,ror#19 - eor w6,w6,w1,lsr#3 // sigma0(X[i+1]) - add w23,w23,w16 // h+=Sigma1(e) - eor w28,w28,w25 // Maj(a,b,c) - eor w17,w7,w24,ror#22 // Sigma0(a) - eor w5,w5,w14,lsr#10 // sigma1(X[i+14]) - add w0,w0,w9 - add w27,w27,w23 // d+=h - add w23,w23,w28 // h+=Maj(a,b,c) - ldr w28,[x30],#4 // *K++, w19 in next round - add w0,w0,w6 - add w23,w23,w17 // h+=Sigma0(a) - add w0,w0,w5 - ldr w5,[sp,#8] - str w8,[sp,#4] - ror w16,w27,#6 - add w22,w22,w28 // h+=K[i] - ror w7,w2,#7 - and w17,w20,w27 - ror w6,w15,#17 - bic w28,w21,w27 - ror w8,w23,#2 - add w22,w22,w0 // h+=X[i] - eor w16,w16,w27,ror#11 - eor w7,w7,w2,ror#18 - orr w17,w17,w28 // Ch(e,f,g) - eor w28,w23,w24 // a^b, b^c in next round - eor w16,w16,w27,ror#25 // Sigma1(e) - eor w8,w8,w23,ror#13 - add w22,w22,w17 // h+=Ch(e,f,g) - and w19,w19,w28 // (b^c)&=(a^b) - eor w6,w6,w15,ror#19 - eor w7,w7,w2,lsr#3 // sigma0(X[i+1]) - add w22,w22,w16 // h+=Sigma1(e) - eor w19,w19,w24 // Maj(a,b,c) - eor w17,w8,w23,ror#22 // Sigma0(a) - eor w6,w6,w15,lsr#10 // sigma1(X[i+14]) - add w1,w1,w10 - add w26,w26,w22 // d+=h - add w22,w22,w19 // h+=Maj(a,b,c) - ldr w19,[x30],#4 // *K++, w28 in next round - add w1,w1,w7 - add w22,w22,w17 // h+=Sigma0(a) - add w1,w1,w6 - ldr w6,[sp,#12] - str w9,[sp,#8] - ror w16,w26,#6 - add w21,w21,w19 // h+=K[i] - ror w8,w3,#7 - and w17,w27,w26 - ror w7,w0,#17 - bic w19,w20,w26 - ror w9,w22,#2 - add w21,w21,w1 // h+=X[i] - eor w16,w16,w26,ror#11 - eor w8,w8,w3,ror#18 - orr w17,w17,w19 // Ch(e,f,g) - eor w19,w22,w23 // a^b, b^c in next round - eor w16,w16,w26,ror#25 // Sigma1(e) - eor w9,w9,w22,ror#13 - add w21,w21,w17 // h+=Ch(e,f,g) - and w28,w28,w19 // (b^c)&=(a^b) - eor w7,w7,w0,ror#19 - eor w8,w8,w3,lsr#3 // sigma0(X[i+1]) - add w21,w21,w16 // h+=Sigma1(e) - eor w28,w28,w23 // Maj(a,b,c) - eor w17,w9,w22,ror#22 // Sigma0(a) - eor w7,w7,w0,lsr#10 // sigma1(X[i+14]) - add w2,w2,w11 - add w25,w25,w21 // d+=h - add w21,w21,w28 // h+=Maj(a,b,c) - ldr w28,[x30],#4 // *K++, w19 in next round - add w2,w2,w8 - add w21,w21,w17 // h+=Sigma0(a) - add w2,w2,w7 - ldr w7,[sp,#0] - str w10,[sp,#12] - ror w16,w25,#6 - add w20,w20,w28 // h+=K[i] - ror w9,w4,#7 - and w17,w26,w25 - ror w8,w1,#17 - bic w28,w27,w25 - ror w10,w21,#2 - add w20,w20,w2 // h+=X[i] - eor w16,w16,w25,ror#11 - eor w9,w9,w4,ror#18 - orr w17,w17,w28 // Ch(e,f,g) - eor w28,w21,w22 // a^b, b^c in next round - eor w16,w16,w25,ror#25 // Sigma1(e) - eor w10,w10,w21,ror#13 - add w20,w20,w17 // h+=Ch(e,f,g) - and w19,w19,w28 // (b^c)&=(a^b) - eor w8,w8,w1,ror#19 - eor w9,w9,w4,lsr#3 // sigma0(X[i+1]) - add w20,w20,w16 // h+=Sigma1(e) - eor w19,w19,w22 // Maj(a,b,c) - eor w17,w10,w21,ror#22 // Sigma0(a) - eor w8,w8,w1,lsr#10 // sigma1(X[i+14]) - add w3,w3,w12 - add w24,w24,w20 // d+=h - add w20,w20,w19 // h+=Maj(a,b,c) - ldr w19,[x30],#4 // *K++, w28 in next round - add w3,w3,w9 - add w20,w20,w17 // h+=Sigma0(a) - add w3,w3,w8 - cbnz w19,.Loop_16_xx - - ldp x0,x2,[x29,#96] - ldr x1,[x29,#112] - sub x30,x30,#260 // rewind - - ldp w3,w4,[x0] - ldp w5,w6,[x0,#2*4] - add x1,x1,#14*4 // advance input pointer - ldp w7,w8,[x0,#4*4] - add w20,w20,w3 - ldp w9,w10,[x0,#6*4] - add w21,w21,w4 - add w22,w22,w5 - add w23,w23,w6 - stp w20,w21,[x0] - add w24,w24,w7 - add w25,w25,w8 - stp w22,w23,[x0,#2*4] - add w26,w26,w9 - add w27,w27,w10 - cmp x1,x2 - stp w24,w25,[x0,#4*4] - stp w26,w27,[x0,#6*4] - b.ne .Loop - - ldp x19,x20,[x29,#16] - add sp,sp,#4*4 - ldp x21,x22,[x29,#32] - ldp x23,x24,[x29,#48] - ldp x25,x26,[x29,#64] - ldp x27,x28,[x29,#80] - ldp x29,x30,[sp],#128 - ret -.size sha256_block_data_order,.-sha256_block_data_order - -.align 6 -.type .LK256,%object -.LK256: - .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 - .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 - .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 - .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 - .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc - .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da - .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 - .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 - .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 - .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 - .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 - .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 - .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 - .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 - .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 - .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 - .long 0 //terminator -.size .LK256,.-.LK256 -#ifndef __KERNEL__ -.align 3 -.LOPENSSL_armcap_P: -# ifdef __ILP32__ - .long OPENSSL_armcap_P-. -# else - .quad OPENSSL_armcap_P-. -# endif -#endif -.asciz "SHA256 block transform for ARMv8, CRYPTOGAMS by " -.align 2 -#ifndef __KERNEL__ -.type sha256_block_armv8,%function -.align 6 -sha256_block_armv8: -.Lv8_entry: - stp x29,x30,[sp,#-16]! - add x29,sp,#0 - - ld1 {v0.4s,v1.4s},[x0] - adr x3,.LK256 - -.Loop_hw: - ld1 {v4.16b-v7.16b},[x1],#64 - sub x2,x2,#1 - ld1 {v16.4s},[x3],#16 - rev32 v4.16b,v4.16b - rev32 v5.16b,v5.16b - rev32 v6.16b,v6.16b - rev32 v7.16b,v7.16b - orr v18.16b,v0.16b,v0.16b // offload - orr v19.16b,v1.16b,v1.16b - ld1 {v17.4s},[x3],#16 - add v16.4s,v16.4s,v4.4s - .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b - orr v2.16b,v0.16b,v0.16b - .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s - .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s - .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b - ld1 {v16.4s},[x3],#16 - add v17.4s,v17.4s,v5.4s - .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b - orr v2.16b,v0.16b,v0.16b - .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s - .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s - .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b - ld1 {v17.4s},[x3],#16 - add v16.4s,v16.4s,v6.4s - .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b - orr v2.16b,v0.16b,v0.16b - .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s - .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s - .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b - ld1 {v16.4s},[x3],#16 - add v17.4s,v17.4s,v7.4s - .inst 0x5e282887 //sha256su0 v7.16b,v4.16b - orr v2.16b,v0.16b,v0.16b - .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s - .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s - .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b - ld1 {v17.4s},[x3],#16 - add v16.4s,v16.4s,v4.4s - .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b - orr v2.16b,v0.16b,v0.16b - .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s - .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s - .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b - ld1 {v16.4s},[x3],#16 - add v17.4s,v17.4s,v5.4s - .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b - orr v2.16b,v0.16b,v0.16b - .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s - .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s - .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b - ld1 {v17.4s},[x3],#16 - add v16.4s,v16.4s,v6.4s - .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b - orr v2.16b,v0.16b,v0.16b - .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s - .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s - .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b - ld1 {v16.4s},[x3],#16 - add v17.4s,v17.4s,v7.4s - .inst 0x5e282887 //sha256su0 v7.16b,v4.16b - orr v2.16b,v0.16b,v0.16b - .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s - .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s - .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b - ld1 {v17.4s},[x3],#16 - add v16.4s,v16.4s,v4.4s - .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b - orr v2.16b,v0.16b,v0.16b - .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s - .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s - .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b - ld1 {v16.4s},[x3],#16 - add v17.4s,v17.4s,v5.4s - .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b - orr v2.16b,v0.16b,v0.16b - .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s - .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s - .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b - ld1 {v17.4s},[x3],#16 - add v16.4s,v16.4s,v6.4s - .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b - orr v2.16b,v0.16b,v0.16b - .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s - .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s - .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b - ld1 {v16.4s},[x3],#16 - add v17.4s,v17.4s,v7.4s - .inst 0x5e282887 //sha256su0 v7.16b,v4.16b - orr v2.16b,v0.16b,v0.16b - .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s - .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s - .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b - ld1 {v17.4s},[x3],#16 - add v16.4s,v16.4s,v4.4s - orr v2.16b,v0.16b,v0.16b - .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s - .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s - - ld1 {v16.4s},[x3],#16 - add v17.4s,v17.4s,v5.4s - orr v2.16b,v0.16b,v0.16b - .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s - .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s - - ld1 {v17.4s},[x3] - add v16.4s,v16.4s,v6.4s - sub x3,x3,#64*4-16 // rewind - orr v2.16b,v0.16b,v0.16b - .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s - .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s - - add v17.4s,v17.4s,v7.4s - orr v2.16b,v0.16b,v0.16b - .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s - .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s - - add v0.4s,v0.4s,v18.4s - add v1.4s,v1.4s,v19.4s - - cbnz x2,.Loop_hw - - st1 {v0.4s,v1.4s},[x0] - - ldr x29,[sp],#16 - ret -.size sha256_block_armv8,.-sha256_block_armv8 -#endif -#ifdef __KERNEL__ -.globl sha256_block_neon -#endif -.type sha256_block_neon,%function -.align 4 -sha256_block_neon: -.Lneon_entry: - stp x29, x30, [sp, #-16]! - mov x29, sp - sub sp,sp,#16*4 - - adr x16,.LK256 - add x2,x1,x2,lsl#6 // len to point at the end of inp - - ld1 {v0.16b},[x1], #16 - ld1 {v1.16b},[x1], #16 - ld1 {v2.16b},[x1], #16 - ld1 {v3.16b},[x1], #16 - ld1 {v4.4s},[x16], #16 - ld1 {v5.4s},[x16], #16 - ld1 {v6.4s},[x16], #16 - ld1 {v7.4s},[x16], #16 - rev32 v0.16b,v0.16b // yes, even on - rev32 v1.16b,v1.16b // big-endian - rev32 v2.16b,v2.16b - rev32 v3.16b,v3.16b - mov x17,sp - add v4.4s,v4.4s,v0.4s - add v5.4s,v5.4s,v1.4s - add v6.4s,v6.4s,v2.4s - st1 {v4.4s-v5.4s},[x17], #32 - add v7.4s,v7.4s,v3.4s - st1 {v6.4s-v7.4s},[x17] - sub x17,x17,#32 - - ldp w3,w4,[x0] - ldp w5,w6,[x0,#8] - ldp w7,w8,[x0,#16] - ldp w9,w10,[x0,#24] - ldr w12,[sp,#0] - mov w13,wzr - eor w14,w4,w5 - mov w15,wzr - b .L_00_48 - -.align 4 -.L_00_48: - ext v4.16b,v0.16b,v1.16b,#4 - add w10,w10,w12 - add w3,w3,w15 - and w12,w8,w7 - bic w15,w9,w7 - ext v7.16b,v2.16b,v3.16b,#4 - eor w11,w7,w7,ror#5 - add w3,w3,w13 - mov d19,v3.d[1] - orr w12,w12,w15 - eor w11,w11,w7,ror#19 - ushr v6.4s,v4.4s,#7 - eor w15,w3,w3,ror#11 - ushr v5.4s,v4.4s,#3 - add w10,w10,w12 - add v0.4s,v0.4s,v7.4s - ror w11,w11,#6 - sli v6.4s,v4.4s,#25 - eor w13,w3,w4 - eor w15,w15,w3,ror#20 - ushr v7.4s,v4.4s,#18 - add w10,w10,w11 - ldr w12,[sp,#4] - and w14,w14,w13 - eor v5.16b,v5.16b,v6.16b - ror w15,w15,#2 - add w6,w6,w10 - sli v7.4s,v4.4s,#14 - eor w14,w14,w4 - ushr v16.4s,v19.4s,#17 - add w9,w9,w12 - add w10,w10,w15 - and w12,w7,w6 - eor v5.16b,v5.16b,v7.16b - bic w15,w8,w6 - eor w11,w6,w6,ror#5 - sli v16.4s,v19.4s,#15 - add w10,w10,w14 - orr w12,w12,w15 - ushr v17.4s,v19.4s,#10 - eor w11,w11,w6,ror#19 - eor w15,w10,w10,ror#11 - ushr v7.4s,v19.4s,#19 - add w9,w9,w12 - ror w11,w11,#6 - add v0.4s,v0.4s,v5.4s - eor w14,w10,w3 - eor w15,w15,w10,ror#20 - sli v7.4s,v19.4s,#13 - add w9,w9,w11 - ldr w12,[sp,#8] - and w13,w13,w14 - eor v17.16b,v17.16b,v16.16b - ror w15,w15,#2 - add w5,w5,w9 - eor w13,w13,w3 - eor v17.16b,v17.16b,v7.16b - add w8,w8,w12 - add w9,w9,w15 - and w12,w6,w5 - add v0.4s,v0.4s,v17.4s - bic w15,w7,w5 - eor w11,w5,w5,ror#5 - add w9,w9,w13 - ushr v18.4s,v0.4s,#17 - orr w12,w12,w15 - ushr v19.4s,v0.4s,#10 - eor w11,w11,w5,ror#19 - eor w15,w9,w9,ror#11 - sli v18.4s,v0.4s,#15 - add w8,w8,w12 - ushr v17.4s,v0.4s,#19 - ror w11,w11,#6 - eor w13,w9,w10 - eor v19.16b,v19.16b,v18.16b - eor w15,w15,w9,ror#20 - add w8,w8,w11 - sli v17.4s,v0.4s,#13 - ldr w12,[sp,#12] - and w14,w14,w13 - ror w15,w15,#2 - ld1 {v4.4s},[x16], #16 - add w4,w4,w8 - eor v19.16b,v19.16b,v17.16b - eor w14,w14,w10 - eor v17.16b,v17.16b,v17.16b - add w7,w7,w12 - add w8,w8,w15 - and w12,w5,w4 - mov v17.d[1],v19.d[0] - bic w15,w6,w4 - eor w11,w4,w4,ror#5 - add w8,w8,w14 - add v0.4s,v0.4s,v17.4s - orr w12,w12,w15 - eor w11,w11,w4,ror#19 - eor w15,w8,w8,ror#11 - add v4.4s,v4.4s,v0.4s - add w7,w7,w12 - ror w11,w11,#6 - eor w14,w8,w9 - eor w15,w15,w8,ror#20 - add w7,w7,w11 - ldr w12,[sp,#16] - and w13,w13,w14 - ror w15,w15,#2 - add w3,w3,w7 - eor w13,w13,w9 - st1 {v4.4s},[x17], #16 - ext v4.16b,v1.16b,v2.16b,#4 - add w6,w6,w12 - add w7,w7,w15 - and w12,w4,w3 - bic w15,w5,w3 - ext v7.16b,v3.16b,v0.16b,#4 - eor w11,w3,w3,ror#5 - add w7,w7,w13 - mov d19,v0.d[1] - orr w12,w12,w15 - eor w11,w11,w3,ror#19 - ushr v6.4s,v4.4s,#7 - eor w15,w7,w7,ror#11 - ushr v5.4s,v4.4s,#3 - add w6,w6,w12 - add v1.4s,v1.4s,v7.4s - ror w11,w11,#6 - sli v6.4s,v4.4s,#25 - eor w13,w7,w8 - eor w15,w15,w7,ror#20 - ushr v7.4s,v4.4s,#18 - add w6,w6,w11 - ldr w12,[sp,#20] - and w14,w14,w13 - eor v5.16b,v5.16b,v6.16b - ror w15,w15,#2 - add w10,w10,w6 - sli v7.4s,v4.4s,#14 - eor w14,w14,w8 - ushr v16.4s,v19.4s,#17 - add w5,w5,w12 - add w6,w6,w15 - and w12,w3,w10 - eor v5.16b,v5.16b,v7.16b - bic w15,w4,w10 - eor w11,w10,w10,ror#5 - sli v16.4s,v19.4s,#15 - add w6,w6,w14 - orr w12,w12,w15 - ushr v17.4s,v19.4s,#10 - eor w11,w11,w10,ror#19 - eor w15,w6,w6,ror#11 - ushr v7.4s,v19.4s,#19 - add w5,w5,w12 - ror w11,w11,#6 - add v1.4s,v1.4s,v5.4s - eor w14,w6,w7 - eor w15,w15,w6,ror#20 - sli v7.4s,v19.4s,#13 - add w5,w5,w11 - ldr w12,[sp,#24] - and w13,w13,w14 - eor v17.16b,v17.16b,v16.16b - ror w15,w15,#2 - add w9,w9,w5 - eor w13,w13,w7 - eor v17.16b,v17.16b,v7.16b - add w4,w4,w12 - add w5,w5,w15 - and w12,w10,w9 - add v1.4s,v1.4s,v17.4s - bic w15,w3,w9 - eor w11,w9,w9,ror#5 - add w5,w5,w13 - ushr v18.4s,v1.4s,#17 - orr w12,w12,w15 - ushr v19.4s,v1.4s,#10 - eor w11,w11,w9,ror#19 - eor w15,w5,w5,ror#11 - sli v18.4s,v1.4s,#15 - add w4,w4,w12 - ushr v17.4s,v1.4s,#19 - ror w11,w11,#6 - eor w13,w5,w6 - eor v19.16b,v19.16b,v18.16b - eor w15,w15,w5,ror#20 - add w4,w4,w11 - sli v17.4s,v1.4s,#13 - ldr w12,[sp,#28] - and w14,w14,w13 - ror w15,w15,#2 - ld1 {v4.4s},[x16], #16 - add w8,w8,w4 - eor v19.16b,v19.16b,v17.16b - eor w14,w14,w6 - eor v17.16b,v17.16b,v17.16b - add w3,w3,w12 - add w4,w4,w15 - and w12,w9,w8 - mov v17.d[1],v19.d[0] - bic w15,w10,w8 - eor w11,w8,w8,ror#5 - add w4,w4,w14 - add v1.4s,v1.4s,v17.4s - orr w12,w12,w15 - eor w11,w11,w8,ror#19 - eor w15,w4,w4,ror#11 - add v4.4s,v4.4s,v1.4s - add w3,w3,w12 - ror w11,w11,#6 - eor w14,w4,w5 - eor w15,w15,w4,ror#20 - add w3,w3,w11 - ldr w12,[sp,#32] - and w13,w13,w14 - ror w15,w15,#2 - add w7,w7,w3 - eor w13,w13,w5 - st1 {v4.4s},[x17], #16 - ext v4.16b,v2.16b,v3.16b,#4 - add w10,w10,w12 - add w3,w3,w15 - and w12,w8,w7 - bic w15,w9,w7 - ext v7.16b,v0.16b,v1.16b,#4 - eor w11,w7,w7,ror#5 - add w3,w3,w13 - mov d19,v1.d[1] - orr w12,w12,w15 - eor w11,w11,w7,ror#19 - ushr v6.4s,v4.4s,#7 - eor w15,w3,w3,ror#11 - ushr v5.4s,v4.4s,#3 - add w10,w10,w12 - add v2.4s,v2.4s,v7.4s - ror w11,w11,#6 - sli v6.4s,v4.4s,#25 - eor w13,w3,w4 - eor w15,w15,w3,ror#20 - ushr v7.4s,v4.4s,#18 - add w10,w10,w11 - ldr w12,[sp,#36] - and w14,w14,w13 - eor v5.16b,v5.16b,v6.16b - ror w15,w15,#2 - add w6,w6,w10 - sli v7.4s,v4.4s,#14 - eor w14,w14,w4 - ushr v16.4s,v19.4s,#17 - add w9,w9,w12 - add w10,w10,w15 - and w12,w7,w6 - eor v5.16b,v5.16b,v7.16b - bic w15,w8,w6 - eor w11,w6,w6,ror#5 - sli v16.4s,v19.4s,#15 - add w10,w10,w14 - orr w12,w12,w15 - ushr v17.4s,v19.4s,#10 - eor w11,w11,w6,ror#19 - eor w15,w10,w10,ror#11 - ushr v7.4s,v19.4s,#19 - add w9,w9,w12 - ror w11,w11,#6 - add v2.4s,v2.4s,v5.4s - eor w14,w10,w3 - eor w15,w15,w10,ror#20 - sli v7.4s,v19.4s,#13 - add w9,w9,w11 - ldr w12,[sp,#40] - and w13,w13,w14 - eor v17.16b,v17.16b,v16.16b - ror w15,w15,#2 - add w5,w5,w9 - eor w13,w13,w3 - eor v17.16b,v17.16b,v7.16b - add w8,w8,w12 - add w9,w9,w15 - and w12,w6,w5 - add v2.4s,v2.4s,v17.4s - bic w15,w7,w5 - eor w11,w5,w5,ror#5 - add w9,w9,w13 - ushr v18.4s,v2.4s,#17 - orr w12,w12,w15 - ushr v19.4s,v2.4s,#10 - eor w11,w11,w5,ror#19 - eor w15,w9,w9,ror#11 - sli v18.4s,v2.4s,#15 - add w8,w8,w12 - ushr v17.4s,v2.4s,#19 - ror w11,w11,#6 - eor w13,w9,w10 - eor v19.16b,v19.16b,v18.16b - eor w15,w15,w9,ror#20 - add w8,w8,w11 - sli v17.4s,v2.4s,#13 - ldr w12,[sp,#44] - and w14,w14,w13 - ror w15,w15,#2 - ld1 {v4.4s},[x16], #16 - add w4,w4,w8 - eor v19.16b,v19.16b,v17.16b - eor w14,w14,w10 - eor v17.16b,v17.16b,v17.16b - add w7,w7,w12 - add w8,w8,w15 - and w12,w5,w4 - mov v17.d[1],v19.d[0] - bic w15,w6,w4 - eor w11,w4,w4,ror#5 - add w8,w8,w14 - add v2.4s,v2.4s,v17.4s - orr w12,w12,w15 - eor w11,w11,w4,ror#19 - eor w15,w8,w8,ror#11 - add v4.4s,v4.4s,v2.4s - add w7,w7,w12 - ror w11,w11,#6 - eor w14,w8,w9 - eor w15,w15,w8,ror#20 - add w7,w7,w11 - ldr w12,[sp,#48] - and w13,w13,w14 - ror w15,w15,#2 - add w3,w3,w7 - eor w13,w13,w9 - st1 {v4.4s},[x17], #16 - ext v4.16b,v3.16b,v0.16b,#4 - add w6,w6,w12 - add w7,w7,w15 - and w12,w4,w3 - bic w15,w5,w3 - ext v7.16b,v1.16b,v2.16b,#4 - eor w11,w3,w3,ror#5 - add w7,w7,w13 - mov d19,v2.d[1] - orr w12,w12,w15 - eor w11,w11,w3,ror#19 - ushr v6.4s,v4.4s,#7 - eor w15,w7,w7,ror#11 - ushr v5.4s,v4.4s,#3 - add w6,w6,w12 - add v3.4s,v3.4s,v7.4s - ror w11,w11,#6 - sli v6.4s,v4.4s,#25 - eor w13,w7,w8 - eor w15,w15,w7,ror#20 - ushr v7.4s,v4.4s,#18 - add w6,w6,w11 - ldr w12,[sp,#52] - and w14,w14,w13 - eor v5.16b,v5.16b,v6.16b - ror w15,w15,#2 - add w10,w10,w6 - sli v7.4s,v4.4s,#14 - eor w14,w14,w8 - ushr v16.4s,v19.4s,#17 - add w5,w5,w12 - add w6,w6,w15 - and w12,w3,w10 - eor v5.16b,v5.16b,v7.16b - bic w15,w4,w10 - eor w11,w10,w10,ror#5 - sli v16.4s,v19.4s,#15 - add w6,w6,w14 - orr w12,w12,w15 - ushr v17.4s,v19.4s,#10 - eor w11,w11,w10,ror#19 - eor w15,w6,w6,ror#11 - ushr v7.4s,v19.4s,#19 - add w5,w5,w12 - ror w11,w11,#6 - add v3.4s,v3.4s,v5.4s - eor w14,w6,w7 - eor w15,w15,w6,ror#20 - sli v7.4s,v19.4s,#13 - add w5,w5,w11 - ldr w12,[sp,#56] - and w13,w13,w14 - eor v17.16b,v17.16b,v16.16b - ror w15,w15,#2 - add w9,w9,w5 - eor w13,w13,w7 - eor v17.16b,v17.16b,v7.16b - add w4,w4,w12 - add w5,w5,w15 - and w12,w10,w9 - add v3.4s,v3.4s,v17.4s - bic w15,w3,w9 - eor w11,w9,w9,ror#5 - add w5,w5,w13 - ushr v18.4s,v3.4s,#17 - orr w12,w12,w15 - ushr v19.4s,v3.4s,#10 - eor w11,w11,w9,ror#19 - eor w15,w5,w5,ror#11 - sli v18.4s,v3.4s,#15 - add w4,w4,w12 - ushr v17.4s,v3.4s,#19 - ror w11,w11,#6 - eor w13,w5,w6 - eor v19.16b,v19.16b,v18.16b - eor w15,w15,w5,ror#20 - add w4,w4,w11 - sli v17.4s,v3.4s,#13 - ldr w12,[sp,#60] - and w14,w14,w13 - ror w15,w15,#2 - ld1 {v4.4s},[x16], #16 - add w8,w8,w4 - eor v19.16b,v19.16b,v17.16b - eor w14,w14,w6 - eor v17.16b,v17.16b,v17.16b - add w3,w3,w12 - add w4,w4,w15 - and w12,w9,w8 - mov v17.d[1],v19.d[0] - bic w15,w10,w8 - eor w11,w8,w8,ror#5 - add w4,w4,w14 - add v3.4s,v3.4s,v17.4s - orr w12,w12,w15 - eor w11,w11,w8,ror#19 - eor w15,w4,w4,ror#11 - add v4.4s,v4.4s,v3.4s - add w3,w3,w12 - ror w11,w11,#6 - eor w14,w4,w5 - eor w15,w15,w4,ror#20 - add w3,w3,w11 - ldr w12,[x16] - and w13,w13,w14 - ror w15,w15,#2 - add w7,w7,w3 - eor w13,w13,w5 - st1 {v4.4s},[x17], #16 - cmp w12,#0 // check for K256 terminator - ldr w12,[sp,#0] - sub x17,x17,#64 - bne .L_00_48 - - sub x16,x16,#256 // rewind x16 - cmp x1,x2 - mov x17, #64 - csel x17, x17, xzr, eq - sub x1,x1,x17 // avoid SEGV - mov x17,sp - add w10,w10,w12 - add w3,w3,w15 - and w12,w8,w7 - ld1 {v0.16b},[x1],#16 - bic w15,w9,w7 - eor w11,w7,w7,ror#5 - ld1 {v4.4s},[x16],#16 - add w3,w3,w13 - orr w12,w12,w15 - eor w11,w11,w7,ror#19 - eor w15,w3,w3,ror#11 - rev32 v0.16b,v0.16b - add w10,w10,w12 - ror w11,w11,#6 - eor w13,w3,w4 - eor w15,w15,w3,ror#20 - add v4.4s,v4.4s,v0.4s - add w10,w10,w11 - ldr w12,[sp,#4] - and w14,w14,w13 - ror w15,w15,#2 - add w6,w6,w10 - eor w14,w14,w4 - add w9,w9,w12 - add w10,w10,w15 - and w12,w7,w6 - bic w15,w8,w6 - eor w11,w6,w6,ror#5 - add w10,w10,w14 - orr w12,w12,w15 - eor w11,w11,w6,ror#19 - eor w15,w10,w10,ror#11 - add w9,w9,w12 - ror w11,w11,#6 - eor w14,w10,w3 - eor w15,w15,w10,ror#20 - add w9,w9,w11 - ldr w12,[sp,#8] - and w13,w13,w14 - ror w15,w15,#2 - add w5,w5,w9 - eor w13,w13,w3 - add w8,w8,w12 - add w9,w9,w15 - and w12,w6,w5 - bic w15,w7,w5 - eor w11,w5,w5,ror#5 - add w9,w9,w13 - orr w12,w12,w15 - eor w11,w11,w5,ror#19 - eor w15,w9,w9,ror#11 - add w8,w8,w12 - ror w11,w11,#6 - eor w13,w9,w10 - eor w15,w15,w9,ror#20 - add w8,w8,w11 - ldr w12,[sp,#12] - and w14,w14,w13 - ror w15,w15,#2 - add w4,w4,w8 - eor w14,w14,w10 - add w7,w7,w12 - add w8,w8,w15 - and w12,w5,w4 - bic w15,w6,w4 - eor w11,w4,w4,ror#5 - add w8,w8,w14 - orr w12,w12,w15 - eor w11,w11,w4,ror#19 - eor w15,w8,w8,ror#11 - add w7,w7,w12 - ror w11,w11,#6 - eor w14,w8,w9 - eor w15,w15,w8,ror#20 - add w7,w7,w11 - ldr w12,[sp,#16] - and w13,w13,w14 - ror w15,w15,#2 - add w3,w3,w7 - eor w13,w13,w9 - st1 {v4.4s},[x17], #16 - add w6,w6,w12 - add w7,w7,w15 - and w12,w4,w3 - ld1 {v1.16b},[x1],#16 - bic w15,w5,w3 - eor w11,w3,w3,ror#5 - ld1 {v4.4s},[x16],#16 - add w7,w7,w13 - orr w12,w12,w15 - eor w11,w11,w3,ror#19 - eor w15,w7,w7,ror#11 - rev32 v1.16b,v1.16b - add w6,w6,w12 - ror w11,w11,#6 - eor w13,w7,w8 - eor w15,w15,w7,ror#20 - add v4.4s,v4.4s,v1.4s - add w6,w6,w11 - ldr w12,[sp,#20] - and w14,w14,w13 - ror w15,w15,#2 - add w10,w10,w6 - eor w14,w14,w8 - add w5,w5,w12 - add w6,w6,w15 - and w12,w3,w10 - bic w15,w4,w10 - eor w11,w10,w10,ror#5 - add w6,w6,w14 - orr w12,w12,w15 - eor w11,w11,w10,ror#19 - eor w15,w6,w6,ror#11 - add w5,w5,w12 - ror w11,w11,#6 - eor w14,w6,w7 - eor w15,w15,w6,ror#20 - add w5,w5,w11 - ldr w12,[sp,#24] - and w13,w13,w14 - ror w15,w15,#2 - add w9,w9,w5 - eor w13,w13,w7 - add w4,w4,w12 - add w5,w5,w15 - and w12,w10,w9 - bic w15,w3,w9 - eor w11,w9,w9,ror#5 - add w5,w5,w13 - orr w12,w12,w15 - eor w11,w11,w9,ror#19 - eor w15,w5,w5,ror#11 - add w4,w4,w12 - ror w11,w11,#6 - eor w13,w5,w6 - eor w15,w15,w5,ror#20 - add w4,w4,w11 - ldr w12,[sp,#28] - and w14,w14,w13 - ror w15,w15,#2 - add w8,w8,w4 - eor w14,w14,w6 - add w3,w3,w12 - add w4,w4,w15 - and w12,w9,w8 - bic w15,w10,w8 - eor w11,w8,w8,ror#5 - add w4,w4,w14 - orr w12,w12,w15 - eor w11,w11,w8,ror#19 - eor w15,w4,w4,ror#11 - add w3,w3,w12 - ror w11,w11,#6 - eor w14,w4,w5 - eor w15,w15,w4,ror#20 - add w3,w3,w11 - ldr w12,[sp,#32] - and w13,w13,w14 - ror w15,w15,#2 - add w7,w7,w3 - eor w13,w13,w5 - st1 {v4.4s},[x17], #16 - add w10,w10,w12 - add w3,w3,w15 - and w12,w8,w7 - ld1 {v2.16b},[x1],#16 - bic w15,w9,w7 - eor w11,w7,w7,ror#5 - ld1 {v4.4s},[x16],#16 - add w3,w3,w13 - orr w12,w12,w15 - eor w11,w11,w7,ror#19 - eor w15,w3,w3,ror#11 - rev32 v2.16b,v2.16b - add w10,w10,w12 - ror w11,w11,#6 - eor w13,w3,w4 - eor w15,w15,w3,ror#20 - add v4.4s,v4.4s,v2.4s - add w10,w10,w11 - ldr w12,[sp,#36] - and w14,w14,w13 - ror w15,w15,#2 - add w6,w6,w10 - eor w14,w14,w4 - add w9,w9,w12 - add w10,w10,w15 - and w12,w7,w6 - bic w15,w8,w6 - eor w11,w6,w6,ror#5 - add w10,w10,w14 - orr w12,w12,w15 - eor w11,w11,w6,ror#19 - eor w15,w10,w10,ror#11 - add w9,w9,w12 - ror w11,w11,#6 - eor w14,w10,w3 - eor w15,w15,w10,ror#20 - add w9,w9,w11 - ldr w12,[sp,#40] - and w13,w13,w14 - ror w15,w15,#2 - add w5,w5,w9 - eor w13,w13,w3 - add w8,w8,w12 - add w9,w9,w15 - and w12,w6,w5 - bic w15,w7,w5 - eor w11,w5,w5,ror#5 - add w9,w9,w13 - orr w12,w12,w15 - eor w11,w11,w5,ror#19 - eor w15,w9,w9,ror#11 - add w8,w8,w12 - ror w11,w11,#6 - eor w13,w9,w10 - eor w15,w15,w9,ror#20 - add w8,w8,w11 - ldr w12,[sp,#44] - and w14,w14,w13 - ror w15,w15,#2 - add w4,w4,w8 - eor w14,w14,w10 - add w7,w7,w12 - add w8,w8,w15 - and w12,w5,w4 - bic w15,w6,w4 - eor w11,w4,w4,ror#5 - add w8,w8,w14 - orr w12,w12,w15 - eor w11,w11,w4,ror#19 - eor w15,w8,w8,ror#11 - add w7,w7,w12 - ror w11,w11,#6 - eor w14,w8,w9 - eor w15,w15,w8,ror#20 - add w7,w7,w11 - ldr w12,[sp,#48] - and w13,w13,w14 - ror w15,w15,#2 - add w3,w3,w7 - eor w13,w13,w9 - st1 {v4.4s},[x17], #16 - add w6,w6,w12 - add w7,w7,w15 - and w12,w4,w3 - ld1 {v3.16b},[x1],#16 - bic w15,w5,w3 - eor w11,w3,w3,ror#5 - ld1 {v4.4s},[x16],#16 - add w7,w7,w13 - orr w12,w12,w15 - eor w11,w11,w3,ror#19 - eor w15,w7,w7,ror#11 - rev32 v3.16b,v3.16b - add w6,w6,w12 - ror w11,w11,#6 - eor w13,w7,w8 - eor w15,w15,w7,ror#20 - add v4.4s,v4.4s,v3.4s - add w6,w6,w11 - ldr w12,[sp,#52] - and w14,w14,w13 - ror w15,w15,#2 - add w10,w10,w6 - eor w14,w14,w8 - add w5,w5,w12 - add w6,w6,w15 - and w12,w3,w10 - bic w15,w4,w10 - eor w11,w10,w10,ror#5 - add w6,w6,w14 - orr w12,w12,w15 - eor w11,w11,w10,ror#19 - eor w15,w6,w6,ror#11 - add w5,w5,w12 - ror w11,w11,#6 - eor w14,w6,w7 - eor w15,w15,w6,ror#20 - add w5,w5,w11 - ldr w12,[sp,#56] - and w13,w13,w14 - ror w15,w15,#2 - add w9,w9,w5 - eor w13,w13,w7 - add w4,w4,w12 - add w5,w5,w15 - and w12,w10,w9 - bic w15,w3,w9 - eor w11,w9,w9,ror#5 - add w5,w5,w13 - orr w12,w12,w15 - eor w11,w11,w9,ror#19 - eor w15,w5,w5,ror#11 - add w4,w4,w12 - ror w11,w11,#6 - eor w13,w5,w6 - eor w15,w15,w5,ror#20 - add w4,w4,w11 - ldr w12,[sp,#60] - and w14,w14,w13 - ror w15,w15,#2 - add w8,w8,w4 - eor w14,w14,w6 - add w3,w3,w12 - add w4,w4,w15 - and w12,w9,w8 - bic w15,w10,w8 - eor w11,w8,w8,ror#5 - add w4,w4,w14 - orr w12,w12,w15 - eor w11,w11,w8,ror#19 - eor w15,w4,w4,ror#11 - add w3,w3,w12 - ror w11,w11,#6 - eor w14,w4,w5 - eor w15,w15,w4,ror#20 - add w3,w3,w11 - and w13,w13,w14 - ror w15,w15,#2 - add w7,w7,w3 - eor w13,w13,w5 - st1 {v4.4s},[x17], #16 - add w3,w3,w15 // h+=Sigma0(a) from the past - ldp w11,w12,[x0,#0] - add w3,w3,w13 // h+=Maj(a,b,c) from the past - ldp w13,w14,[x0,#8] - add w3,w3,w11 // accumulate - add w4,w4,w12 - ldp w11,w12,[x0,#16] - add w5,w5,w13 - add w6,w6,w14 - ldp w13,w14,[x0,#24] - add w7,w7,w11 - add w8,w8,w12 - ldr w12,[sp,#0] - stp w3,w4,[x0,#0] - add w9,w9,w13 - mov w13,wzr - stp w5,w6,[x0,#8] - add w10,w10,w14 - stp w7,w8,[x0,#16] - eor w14,w4,w5 - stp w9,w10,[x0,#24] - mov w15,wzr - mov x17,sp - b.ne .L_00_48 - - ldr x29,[x29] - add sp,sp,#16*4+16 - ret -.size sha256_block_neon,.-sha256_block_neon -#ifndef __KERNEL__ -.comm OPENSSL_armcap_P,4,4 -#endif diff --git a/arch/arm64/crypto/sha512-core.S_shipped b/arch/arm64/crypto/sha512-core.S_shipped deleted file mode 100644 index e063a61067201..0000000000000 --- a/arch/arm64/crypto/sha512-core.S_shipped +++ /dev/null @@ -1,1093 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -// This code is taken from the OpenSSL project but the author (Andy Polyakov) -// has relicensed it under the GPLv2. Therefore this program is free software; -// you can redistribute it and/or modify it under the terms of the GNU General -// Public License version 2 as published by the Free Software Foundation. -// -// The original headers, including the original license headers, are -// included below for completeness. - -// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved. -// -// Licensed under the OpenSSL license (the "License"). You may not use -// this file except in compliance with the License. You can obtain a copy -// in the file LICENSE in the source distribution or at -// https://www.openssl.org/source/license.html - -// ==================================================================== -// Written by Andy Polyakov for the OpenSSL -// project. The module is, however, dual licensed under OpenSSL and -// CRYPTOGAMS licenses depending on where you obtain it. For further -// details see http://www.openssl.org/~appro/cryptogams/. -// ==================================================================== -// -// SHA256/512 for ARMv8. -// -// Performance in cycles per processed byte and improvement coefficient -// over code generated with "default" compiler: -// -// SHA256-hw SHA256(*) SHA512 -// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**)) -// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***)) -// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***)) -// Denver 2.01 10.5 (+26%) 6.70 (+8%) -// X-Gene 20.0 (+100%) 12.8 (+300%(***)) -// Mongoose 2.36 13.0 (+50%) 8.36 (+33%) -// -// (*) Software SHA256 results are of lesser relevance, presented -// mostly for informational purposes. -// (**) The result is a trade-off: it's possible to improve it by -// 10% (or by 1 cycle per round), but at the cost of 20% loss -// on Cortex-A53 (or by 4 cycles per round). -// (***) Super-impressive coefficients over gcc-generated code are -// indication of some compiler "pathology", most notably code -// generated with -mgeneral-regs-only is significanty faster -// and the gap is only 40-90%. -// -// October 2016. -// -// Originally it was reckoned that it makes no sense to implement NEON -// version of SHA256 for 64-bit processors. This is because performance -// improvement on most wide-spread Cortex-A5x processors was observed -// to be marginal, same on Cortex-A53 and ~10% on A57. But then it was -// observed that 32-bit NEON SHA256 performs significantly better than -// 64-bit scalar version on *some* of the more recent processors. As -// result 64-bit NEON version of SHA256 was added to provide best -// all-round performance. For example it executes ~30% faster on X-Gene -// and Mongoose. [For reference, NEON version of SHA512 is bound to -// deliver much less improvement, likely *negative* on Cortex-A5x. -// Which is why NEON support is limited to SHA256.] - -#ifndef __KERNEL__ -# include "arm_arch.h" -#endif - -.text - -.extern OPENSSL_armcap_P -.globl sha512_block_data_order -.type sha512_block_data_order,%function -.align 6 -sha512_block_data_order: - stp x29,x30,[sp,#-128]! - add x29,sp,#0 - - stp x19,x20,[sp,#16] - stp x21,x22,[sp,#32] - stp x23,x24,[sp,#48] - stp x25,x26,[sp,#64] - stp x27,x28,[sp,#80] - sub sp,sp,#4*8 - - ldp x20,x21,[x0] // load context - ldp x22,x23,[x0,#2*8] - ldp x24,x25,[x0,#4*8] - add x2,x1,x2,lsl#7 // end of input - ldp x26,x27,[x0,#6*8] - adr x30,.LK512 - stp x0,x2,[x29,#96] - -.Loop: - ldp x3,x4,[x1],#2*8 - ldr x19,[x30],#8 // *K++ - eor x28,x21,x22 // magic seed - str x1,[x29,#112] -#ifndef __AARCH64EB__ - rev x3,x3 // 0 -#endif - ror x16,x24,#14 - add x27,x27,x19 // h+=K[i] - eor x6,x24,x24,ror#23 - and x17,x25,x24 - bic x19,x26,x24 - add x27,x27,x3 // h+=X[i] - orr x17,x17,x19 // Ch(e,f,g) - eor x19,x20,x21 // a^b, b^c in next round - eor x16,x16,x6,ror#18 // Sigma1(e) - ror x6,x20,#28 - add x27,x27,x17 // h+=Ch(e,f,g) - eor x17,x20,x20,ror#5 - add x27,x27,x16 // h+=Sigma1(e) - and x28,x28,x19 // (b^c)&=(a^b) - add x23,x23,x27 // d+=h - eor x28,x28,x21 // Maj(a,b,c) - eor x17,x6,x17,ror#34 // Sigma0(a) - add x27,x27,x28 // h+=Maj(a,b,c) - ldr x28,[x30],#8 // *K++, x19 in next round - //add x27,x27,x17 // h+=Sigma0(a) -#ifndef __AARCH64EB__ - rev x4,x4 // 1 -#endif - ldp x5,x6,[x1],#2*8 - add x27,x27,x17 // h+=Sigma0(a) - ror x16,x23,#14 - add x26,x26,x28 // h+=K[i] - eor x7,x23,x23,ror#23 - and x17,x24,x23 - bic x28,x25,x23 - add x26,x26,x4 // h+=X[i] - orr x17,x17,x28 // Ch(e,f,g) - eor x28,x27,x20 // a^b, b^c in next round - eor x16,x16,x7,ror#18 // Sigma1(e) - ror x7,x27,#28 - add x26,x26,x17 // h+=Ch(e,f,g) - eor x17,x27,x27,ror#5 - add x26,x26,x16 // h+=Sigma1(e) - and x19,x19,x28 // (b^c)&=(a^b) - add x22,x22,x26 // d+=h - eor x19,x19,x20 // Maj(a,b,c) - eor x17,x7,x17,ror#34 // Sigma0(a) - add x26,x26,x19 // h+=Maj(a,b,c) - ldr x19,[x30],#8 // *K++, x28 in next round - //add x26,x26,x17 // h+=Sigma0(a) -#ifndef __AARCH64EB__ - rev x5,x5 // 2 -#endif - add x26,x26,x17 // h+=Sigma0(a) - ror x16,x22,#14 - add x25,x25,x19 // h+=K[i] - eor x8,x22,x22,ror#23 - and x17,x23,x22 - bic x19,x24,x22 - add x25,x25,x5 // h+=X[i] - orr x17,x17,x19 // Ch(e,f,g) - eor x19,x26,x27 // a^b, b^c in next round - eor x16,x16,x8,ror#18 // Sigma1(e) - ror x8,x26,#28 - add x25,x25,x17 // h+=Ch(e,f,g) - eor x17,x26,x26,ror#5 - add x25,x25,x16 // h+=Sigma1(e) - and x28,x28,x19 // (b^c)&=(a^b) - add x21,x21,x25 // d+=h - eor x28,x28,x27 // Maj(a,b,c) - eor x17,x8,x17,ror#34 // Sigma0(a) - add x25,x25,x28 // h+=Maj(a,b,c) - ldr x28,[x30],#8 // *K++, x19 in next round - //add x25,x25,x17 // h+=Sigma0(a) -#ifndef __AARCH64EB__ - rev x6,x6 // 3 -#endif - ldp x7,x8,[x1],#2*8 - add x25,x25,x17 // h+=Sigma0(a) - ror x16,x21,#14 - add x24,x24,x28 // h+=K[i] - eor x9,x21,x21,ror#23 - and x17,x22,x21 - bic x28,x23,x21 - add x24,x24,x6 // h+=X[i] - orr x17,x17,x28 // Ch(e,f,g) - eor x28,x25,x26 // a^b, b^c in next round - eor x16,x16,x9,ror#18 // Sigma1(e) - ror x9,x25,#28 - add x24,x24,x17 // h+=Ch(e,f,g) - eor x17,x25,x25,ror#5 - add x24,x24,x16 // h+=Sigma1(e) - and x19,x19,x28 // (b^c)&=(a^b) - add x20,x20,x24 // d+=h - eor x19,x19,x26 // Maj(a,b,c) - eor x17,x9,x17,ror#34 // Sigma0(a) - add x24,x24,x19 // h+=Maj(a,b,c) - ldr x19,[x30],#8 // *K++, x28 in next round - //add x24,x24,x17 // h+=Sigma0(a) -#ifndef __AARCH64EB__ - rev x7,x7 // 4 -#endif - add x24,x24,x17 // h+=Sigma0(a) - ror x16,x20,#14 - add x23,x23,x19 // h+=K[i] - eor x10,x20,x20,ror#23 - and x17,x21,x20 - bic x19,x22,x20 - add x23,x23,x7 // h+=X[i] - orr x17,x17,x19 // Ch(e,f,g) - eor x19,x24,x25 // a^b, b^c in next round - eor x16,x16,x10,ror#18 // Sigma1(e) - ror x10,x24,#28 - add x23,x23,x17 // h+=Ch(e,f,g) - eor x17,x24,x24,ror#5 - add x23,x23,x16 // h+=Sigma1(e) - and x28,x28,x19 // (b^c)&=(a^b) - add x27,x27,x23 // d+=h - eor x28,x28,x25 // Maj(a,b,c) - eor x17,x10,x17,ror#34 // Sigma0(a) - add x23,x23,x28 // h+=Maj(a,b,c) - ldr x28,[x30],#8 // *K++, x19 in next round - //add x23,x23,x17 // h+=Sigma0(a) -#ifndef __AARCH64EB__ - rev x8,x8 // 5 -#endif - ldp x9,x10,[x1],#2*8 - add x23,x23,x17 // h+=Sigma0(a) - ror x16,x27,#14 - add x22,x22,x28 // h+=K[i] - eor x11,x27,x27,ror#23 - and x17,x20,x27 - bic x28,x21,x27 - add x22,x22,x8 // h+=X[i] - orr x17,x17,x28 // Ch(e,f,g) - eor x28,x23,x24 // a^b, b^c in next round - eor x16,x16,x11,ror#18 // Sigma1(e) - ror x11,x23,#28 - add x22,x22,x17 // h+=Ch(e,f,g) - eor x17,x23,x23,ror#5 - add x22,x22,x16 // h+=Sigma1(e) - and x19,x19,x28 // (b^c)&=(a^b) - add x26,x26,x22 // d+=h - eor x19,x19,x24 // Maj(a,b,c) - eor x17,x11,x17,ror#34 // Sigma0(a) - add x22,x22,x19 // h+=Maj(a,b,c) - ldr x19,[x30],#8 // *K++, x28 in next round - //add x22,x22,x17 // h+=Sigma0(a) -#ifndef __AARCH64EB__ - rev x9,x9 // 6 -#endif - add x22,x22,x17 // h+=Sigma0(a) - ror x16,x26,#14 - add x21,x21,x19 // h+=K[i] - eor x12,x26,x26,ror#23 - and x17,x27,x26 - bic x19,x20,x26 - add x21,x21,x9 // h+=X[i] - orr x17,x17,x19 // Ch(e,f,g) - eor x19,x22,x23 // a^b, b^c in next round - eor x16,x16,x12,ror#18 // Sigma1(e) - ror x12,x22,#28 - add x21,x21,x17 // h+=Ch(e,f,g) - eor x17,x22,x22,ror#5 - add x21,x21,x16 // h+=Sigma1(e) - and x28,x28,x19 // (b^c)&=(a^b) - add x25,x25,x21 // d+=h - eor x28,x28,x23 // Maj(a,b,c) - eor x17,x12,x17,ror#34 // Sigma0(a) - add x21,x21,x28 // h+=Maj(a,b,c) - ldr x28,[x30],#8 // *K++, x19 in next round - //add x21,x21,x17 // h+=Sigma0(a) -#ifndef __AARCH64EB__ - rev x10,x10 // 7 -#endif - ldp x11,x12,[x1],#2*8 - add x21,x21,x17 // h+=Sigma0(a) - ror x16,x25,#14 - add x20,x20,x28 // h+=K[i] - eor x13,x25,x25,ror#23 - and x17,x26,x25 - bic x28,x27,x25 - add x20,x20,x10 // h+=X[i] - orr x17,x17,x28 // Ch(e,f,g) - eor x28,x21,x22 // a^b, b^c in next round - eor x16,x16,x13,ror#18 // Sigma1(e) - ror x13,x21,#28 - add x20,x20,x17 // h+=Ch(e,f,g) - eor x17,x21,x21,ror#5 - add x20,x20,x16 // h+=Sigma1(e) - and x19,x19,x28 // (b^c)&=(a^b) - add x24,x24,x20 // d+=h - eor x19,x19,x22 // Maj(a,b,c) - eor x17,x13,x17,ror#34 // Sigma0(a) - add x20,x20,x19 // h+=Maj(a,b,c) - ldr x19,[x30],#8 // *K++, x28 in next round - //add x20,x20,x17 // h+=Sigma0(a) -#ifndef __AARCH64EB__ - rev x11,x11 // 8 -#endif - add x20,x20,x17 // h+=Sigma0(a) - ror x16,x24,#14 - add x27,x27,x19 // h+=K[i] - eor x14,x24,x24,ror#23 - and x17,x25,x24 - bic x19,x26,x24 - add x27,x27,x11 // h+=X[i] - orr x17,x17,x19 // Ch(e,f,g) - eor x19,x20,x21 // a^b, b^c in next round - eor x16,x16,x14,ror#18 // Sigma1(e) - ror x14,x20,#28 - add x27,x27,x17 // h+=Ch(e,f,g) - eor x17,x20,x20,ror#5 - add x27,x27,x16 // h+=Sigma1(e) - and x28,x28,x19 // (b^c)&=(a^b) - add x23,x23,x27 // d+=h - eor x28,x28,x21 // Maj(a,b,c) - eor x17,x14,x17,ror#34 // Sigma0(a) - add x27,x27,x28 // h+=Maj(a,b,c) - ldr x28,[x30],#8 // *K++, x19 in next round - //add x27,x27,x17 // h+=Sigma0(a) -#ifndef __AARCH64EB__ - rev x12,x12 // 9 -#endif - ldp x13,x14,[x1],#2*8 - add x27,x27,x17 // h+=Sigma0(a) - ror x16,x23,#14 - add x26,x26,x28 // h+=K[i] - eor x15,x23,x23,ror#23 - and x17,x24,x23 - bic x28,x25,x23 - add x26,x26,x12 // h+=X[i] - orr x17,x17,x28 // Ch(e,f,g) - eor x28,x27,x20 // a^b, b^c in next round - eor x16,x16,x15,ror#18 // Sigma1(e) - ror x15,x27,#28 - add x26,x26,x17 // h+=Ch(e,f,g) - eor x17,x27,x27,ror#5 - add x26,x26,x16 // h+=Sigma1(e) - and x19,x19,x28 // (b^c)&=(a^b) - add x22,x22,x26 // d+=h - eor x19,x19,x20 // Maj(a,b,c) - eor x17,x15,x17,ror#34 // Sigma0(a) - add x26,x26,x19 // h+=Maj(a,b,c) - ldr x19,[x30],#8 // *K++, x28 in next round - //add x26,x26,x17 // h+=Sigma0(a) -#ifndef __AARCH64EB__ - rev x13,x13 // 10 -#endif - add x26,x26,x17 // h+=Sigma0(a) - ror x16,x22,#14 - add x25,x25,x19 // h+=K[i] - eor x0,x22,x22,ror#23 - and x17,x23,x22 - bic x19,x24,x22 - add x25,x25,x13 // h+=X[i] - orr x17,x17,x19 // Ch(e,f,g) - eor x19,x26,x27 // a^b, b^c in next round - eor x16,x16,x0,ror#18 // Sigma1(e) - ror x0,x26,#28 - add x25,x25,x17 // h+=Ch(e,f,g) - eor x17,x26,x26,ror#5 - add x25,x25,x16 // h+=Sigma1(e) - and x28,x28,x19 // (b^c)&=(a^b) - add x21,x21,x25 // d+=h - eor x28,x28,x27 // Maj(a,b,c) - eor x17,x0,x17,ror#34 // Sigma0(a) - add x25,x25,x28 // h+=Maj(a,b,c) - ldr x28,[x30],#8 // *K++, x19 in next round - //add x25,x25,x17 // h+=Sigma0(a) -#ifndef __AARCH64EB__ - rev x14,x14 // 11 -#endif - ldp x15,x0,[x1],#2*8 - add x25,x25,x17 // h+=Sigma0(a) - str x6,[sp,#24] - ror x16,x21,#14 - add x24,x24,x28 // h+=K[i] - eor x6,x21,x21,ror#23 - and x17,x22,x21 - bic x28,x23,x21 - add x24,x24,x14 // h+=X[i] - orr x17,x17,x28 // Ch(e,f,g) - eor x28,x25,x26 // a^b, b^c in next round - eor x16,x16,x6,ror#18 // Sigma1(e) - ror x6,x25,#28 - add x24,x24,x17 // h+=Ch(e,f,g) - eor x17,x25,x25,ror#5 - add x24,x24,x16 // h+=Sigma1(e) - and x19,x19,x28 // (b^c)&=(a^b) - add x20,x20,x24 // d+=h - eor x19,x19,x26 // Maj(a,b,c) - eor x17,x6,x17,ror#34 // Sigma0(a) - add x24,x24,x19 // h+=Maj(a,b,c) - ldr x19,[x30],#8 // *K++, x28 in next round - //add x24,x24,x17 // h+=Sigma0(a) -#ifndef __AARCH64EB__ - rev x15,x15 // 12 -#endif - add x24,x24,x17 // h+=Sigma0(a) - str x7,[sp,#0] - ror x16,x20,#14 - add x23,x23,x19 // h+=K[i] - eor x7,x20,x20,ror#23 - and x17,x21,x20 - bic x19,x22,x20 - add x23,x23,x15 // h+=X[i] - orr x17,x17,x19 // Ch(e,f,g) - eor x19,x24,x25 // a^b, b^c in next round - eor x16,x16,x7,ror#18 // Sigma1(e) - ror x7,x24,#28 - add x23,x23,x17 // h+=Ch(e,f,g) - eor x17,x24,x24,ror#5 - add x23,x23,x16 // h+=Sigma1(e) - and x28,x28,x19 // (b^c)&=(a^b) - add x27,x27,x23 // d+=h - eor x28,x28,x25 // Maj(a,b,c) - eor x17,x7,x17,ror#34 // Sigma0(a) - add x23,x23,x28 // h+=Maj(a,b,c) - ldr x28,[x30],#8 // *K++, x19 in next round - //add x23,x23,x17 // h+=Sigma0(a) -#ifndef __AARCH64EB__ - rev x0,x0 // 13 -#endif - ldp x1,x2,[x1] - add x23,x23,x17 // h+=Sigma0(a) - str x8,[sp,#8] - ror x16,x27,#14 - add x22,x22,x28 // h+=K[i] - eor x8,x27,x27,ror#23 - and x17,x20,x27 - bic x28,x21,x27 - add x22,x22,x0 // h+=X[i] - orr x17,x17,x28 // Ch(e,f,g) - eor x28,x23,x24 // a^b, b^c in next round - eor x16,x16,x8,ror#18 // Sigma1(e) - ror x8,x23,#28 - add x22,x22,x17 // h+=Ch(e,f,g) - eor x17,x23,x23,ror#5 - add x22,x22,x16 // h+=Sigma1(e) - and x19,x19,x28 // (b^c)&=(a^b) - add x26,x26,x22 // d+=h - eor x19,x19,x24 // Maj(a,b,c) - eor x17,x8,x17,ror#34 // Sigma0(a) - add x22,x22,x19 // h+=Maj(a,b,c) - ldr x19,[x30],#8 // *K++, x28 in next round - //add x22,x22,x17 // h+=Sigma0(a) -#ifndef __AARCH64EB__ - rev x1,x1 // 14 -#endif - ldr x6,[sp,#24] - add x22,x22,x17 // h+=Sigma0(a) - str x9,[sp,#16] - ror x16,x26,#14 - add x21,x21,x19 // h+=K[i] - eor x9,x26,x26,ror#23 - and x17,x27,x26 - bic x19,x20,x26 - add x21,x21,x1 // h+=X[i] - orr x17,x17,x19 // Ch(e,f,g) - eor x19,x22,x23 // a^b, b^c in next round - eor x16,x16,x9,ror#18 // Sigma1(e) - ror x9,x22,#28 - add x21,x21,x17 // h+=Ch(e,f,g) - eor x17,x22,x22,ror#5 - add x21,x21,x16 // h+=Sigma1(e) - and x28,x28,x19 // (b^c)&=(a^b) - add x25,x25,x21 // d+=h - eor x28,x28,x23 // Maj(a,b,c) - eor x17,x9,x17,ror#34 // Sigma0(a) - add x21,x21,x28 // h+=Maj(a,b,c) - ldr x28,[x30],#8 // *K++, x19 in next round - //add x21,x21,x17 // h+=Sigma0(a) -#ifndef __AARCH64EB__ - rev x2,x2 // 15 -#endif - ldr x7,[sp,#0] - add x21,x21,x17 // h+=Sigma0(a) - str x10,[sp,#24] - ror x16,x25,#14 - add x20,x20,x28 // h+=K[i] - ror x9,x4,#1 - and x17,x26,x25 - ror x8,x1,#19 - bic x28,x27,x25 - ror x10,x21,#28 - add x20,x20,x2 // h+=X[i] - eor x16,x16,x25,ror#18 - eor x9,x9,x4,ror#8 - orr x17,x17,x28 // Ch(e,f,g) - eor x28,x21,x22 // a^b, b^c in next round - eor x16,x16,x25,ror#41 // Sigma1(e) - eor x10,x10,x21,ror#34 - add x20,x20,x17 // h+=Ch(e,f,g) - and x19,x19,x28 // (b^c)&=(a^b) - eor x8,x8,x1,ror#61 - eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) - add x20,x20,x16 // h+=Sigma1(e) - eor x19,x19,x22 // Maj(a,b,c) - eor x17,x10,x21,ror#39 // Sigma0(a) - eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) - add x3,x3,x12 - add x24,x24,x20 // d+=h - add x20,x20,x19 // h+=Maj(a,b,c) - ldr x19,[x30],#8 // *K++, x28 in next round - add x3,x3,x9 - add x20,x20,x17 // h+=Sigma0(a) - add x3,x3,x8 -.Loop_16_xx: - ldr x8,[sp,#8] - str x11,[sp,#0] - ror x16,x24,#14 - add x27,x27,x19 // h+=K[i] - ror x10,x5,#1 - and x17,x25,x24 - ror x9,x2,#19 - bic x19,x26,x24 - ror x11,x20,#28 - add x27,x27,x3 // h+=X[i] - eor x16,x16,x24,ror#18 - eor x10,x10,x5,ror#8 - orr x17,x17,x19 // Ch(e,f,g) - eor x19,x20,x21 // a^b, b^c in next round - eor x16,x16,x24,ror#41 // Sigma1(e) - eor x11,x11,x20,ror#34 - add x27,x27,x17 // h+=Ch(e,f,g) - and x28,x28,x19 // (b^c)&=(a^b) - eor x9,x9,x2,ror#61 - eor x10,x10,x5,lsr#7 // sigma0(X[i+1]) - add x27,x27,x16 // h+=Sigma1(e) - eor x28,x28,x21 // Maj(a,b,c) - eor x17,x11,x20,ror#39 // Sigma0(a) - eor x9,x9,x2,lsr#6 // sigma1(X[i+14]) - add x4,x4,x13 - add x23,x23,x27 // d+=h - add x27,x27,x28 // h+=Maj(a,b,c) - ldr x28,[x30],#8 // *K++, x19 in next round - add x4,x4,x10 - add x27,x27,x17 // h+=Sigma0(a) - add x4,x4,x9 - ldr x9,[sp,#16] - str x12,[sp,#8] - ror x16,x23,#14 - add x26,x26,x28 // h+=K[i] - ror x11,x6,#1 - and x17,x24,x23 - ror x10,x3,#19 - bic x28,x25,x23 - ror x12,x27,#28 - add x26,x26,x4 // h+=X[i] - eor x16,x16,x23,ror#18 - eor x11,x11,x6,ror#8 - orr x17,x17,x28 // Ch(e,f,g) - eor x28,x27,x20 // a^b, b^c in next round - eor x16,x16,x23,ror#41 // Sigma1(e) - eor x12,x12,x27,ror#34 - add x26,x26,x17 // h+=Ch(e,f,g) - and x19,x19,x28 // (b^c)&=(a^b) - eor x10,x10,x3,ror#61 - eor x11,x11,x6,lsr#7 // sigma0(X[i+1]) - add x26,x26,x16 // h+=Sigma1(e) - eor x19,x19,x20 // Maj(a,b,c) - eor x17,x12,x27,ror#39 // Sigma0(a) - eor x10,x10,x3,lsr#6 // sigma1(X[i+14]) - add x5,x5,x14 - add x22,x22,x26 // d+=h - add x26,x26,x19 // h+=Maj(a,b,c) - ldr x19,[x30],#8 // *K++, x28 in next round - add x5,x5,x11 - add x26,x26,x17 // h+=Sigma0(a) - add x5,x5,x10 - ldr x10,[sp,#24] - str x13,[sp,#16] - ror x16,x22,#14 - add x25,x25,x19 // h+=K[i] - ror x12,x7,#1 - and x17,x23,x22 - ror x11,x4,#19 - bic x19,x24,x22 - ror x13,x26,#28 - add x25,x25,x5 // h+=X[i] - eor x16,x16,x22,ror#18 - eor x12,x12,x7,ror#8 - orr x17,x17,x19 // Ch(e,f,g) - eor x19,x26,x27 // a^b, b^c in next round - eor x16,x16,x22,ror#41 // Sigma1(e) - eor x13,x13,x26,ror#34 - add x25,x25,x17 // h+=Ch(e,f,g) - and x28,x28,x19 // (b^c)&=(a^b) - eor x11,x11,x4,ror#61 - eor x12,x12,x7,lsr#7 // sigma0(X[i+1]) - add x25,x25,x16 // h+=Sigma1(e) - eor x28,x28,x27 // Maj(a,b,c) - eor x17,x13,x26,ror#39 // Sigma0(a) - eor x11,x11,x4,lsr#6 // sigma1(X[i+14]) - add x6,x6,x15 - add x21,x21,x25 // d+=h - add x25,x25,x28 // h+=Maj(a,b,c) - ldr x28,[x30],#8 // *K++, x19 in next round - add x6,x6,x12 - add x25,x25,x17 // h+=Sigma0(a) - add x6,x6,x11 - ldr x11,[sp,#0] - str x14,[sp,#24] - ror x16,x21,#14 - add x24,x24,x28 // h+=K[i] - ror x13,x8,#1 - and x17,x22,x21 - ror x12,x5,#19 - bic x28,x23,x21 - ror x14,x25,#28 - add x24,x24,x6 // h+=X[i] - eor x16,x16,x21,ror#18 - eor x13,x13,x8,ror#8 - orr x17,x17,x28 // Ch(e,f,g) - eor x28,x25,x26 // a^b, b^c in next round - eor x16,x16,x21,ror#41 // Sigma1(e) - eor x14,x14,x25,ror#34 - add x24,x24,x17 // h+=Ch(e,f,g) - and x19,x19,x28 // (b^c)&=(a^b) - eor x12,x12,x5,ror#61 - eor x13,x13,x8,lsr#7 // sigma0(X[i+1]) - add x24,x24,x16 // h+=Sigma1(e) - eor x19,x19,x26 // Maj(a,b,c) - eor x17,x14,x25,ror#39 // Sigma0(a) - eor x12,x12,x5,lsr#6 // sigma1(X[i+14]) - add x7,x7,x0 - add x20,x20,x24 // d+=h - add x24,x24,x19 // h+=Maj(a,b,c) - ldr x19,[x30],#8 // *K++, x28 in next round - add x7,x7,x13 - add x24,x24,x17 // h+=Sigma0(a) - add x7,x7,x12 - ldr x12,[sp,#8] - str x15,[sp,#0] - ror x16,x20,#14 - add x23,x23,x19 // h+=K[i] - ror x14,x9,#1 - and x17,x21,x20 - ror x13,x6,#19 - bic x19,x22,x20 - ror x15,x24,#28 - add x23,x23,x7 // h+=X[i] - eor x16,x16,x20,ror#18 - eor x14,x14,x9,ror#8 - orr x17,x17,x19 // Ch(e,f,g) - eor x19,x24,x25 // a^b, b^c in next round - eor x16,x16,x20,ror#41 // Sigma1(e) - eor x15,x15,x24,ror#34 - add x23,x23,x17 // h+=Ch(e,f,g) - and x28,x28,x19 // (b^c)&=(a^b) - eor x13,x13,x6,ror#61 - eor x14,x14,x9,lsr#7 // sigma0(X[i+1]) - add x23,x23,x16 // h+=Sigma1(e) - eor x28,x28,x25 // Maj(a,b,c) - eor x17,x15,x24,ror#39 // Sigma0(a) - eor x13,x13,x6,lsr#6 // sigma1(X[i+14]) - add x8,x8,x1 - add x27,x27,x23 // d+=h - add x23,x23,x28 // h+=Maj(a,b,c) - ldr x28,[x30],#8 // *K++, x19 in next round - add x8,x8,x14 - add x23,x23,x17 // h+=Sigma0(a) - add x8,x8,x13 - ldr x13,[sp,#16] - str x0,[sp,#8] - ror x16,x27,#14 - add x22,x22,x28 // h+=K[i] - ror x15,x10,#1 - and x17,x20,x27 - ror x14,x7,#19 - bic x28,x21,x27 - ror x0,x23,#28 - add x22,x22,x8 // h+=X[i] - eor x16,x16,x27,ror#18 - eor x15,x15,x10,ror#8 - orr x17,x17,x28 // Ch(e,f,g) - eor x28,x23,x24 // a^b, b^c in next round - eor x16,x16,x27,ror#41 // Sigma1(e) - eor x0,x0,x23,ror#34 - add x22,x22,x17 // h+=Ch(e,f,g) - and x19,x19,x28 // (b^c)&=(a^b) - eor x14,x14,x7,ror#61 - eor x15,x15,x10,lsr#7 // sigma0(X[i+1]) - add x22,x22,x16 // h+=Sigma1(e) - eor x19,x19,x24 // Maj(a,b,c) - eor x17,x0,x23,ror#39 // Sigma0(a) - eor x14,x14,x7,lsr#6 // sigma1(X[i+14]) - add x9,x9,x2 - add x26,x26,x22 // d+=h - add x22,x22,x19 // h+=Maj(a,b,c) - ldr x19,[x30],#8 // *K++, x28 in next round - add x9,x9,x15 - add x22,x22,x17 // h+=Sigma0(a) - add x9,x9,x14 - ldr x14,[sp,#24] - str x1,[sp,#16] - ror x16,x26,#14 - add x21,x21,x19 // h+=K[i] - ror x0,x11,#1 - and x17,x27,x26 - ror x15,x8,#19 - bic x19,x20,x26 - ror x1,x22,#28 - add x21,x21,x9 // h+=X[i] - eor x16,x16,x26,ror#18 - eor x0,x0,x11,ror#8 - orr x17,x17,x19 // Ch(e,f,g) - eor x19,x22,x23 // a^b, b^c in next round - eor x16,x16,x26,ror#41 // Sigma1(e) - eor x1,x1,x22,ror#34 - add x21,x21,x17 // h+=Ch(e,f,g) - and x28,x28,x19 // (b^c)&=(a^b) - eor x15,x15,x8,ror#61 - eor x0,x0,x11,lsr#7 // sigma0(X[i+1]) - add x21,x21,x16 // h+=Sigma1(e) - eor x28,x28,x23 // Maj(a,b,c) - eor x17,x1,x22,ror#39 // Sigma0(a) - eor x15,x15,x8,lsr#6 // sigma1(X[i+14]) - add x10,x10,x3 - add x25,x25,x21 // d+=h - add x21,x21,x28 // h+=Maj(a,b,c) - ldr x28,[x30],#8 // *K++, x19 in next round - add x10,x10,x0 - add x21,x21,x17 // h+=Sigma0(a) - add x10,x10,x15 - ldr x15,[sp,#0] - str x2,[sp,#24] - ror x16,x25,#14 - add x20,x20,x28 // h+=K[i] - ror x1,x12,#1 - and x17,x26,x25 - ror x0,x9,#19 - bic x28,x27,x25 - ror x2,x21,#28 - add x20,x20,x10 // h+=X[i] - eor x16,x16,x25,ror#18 - eor x1,x1,x12,ror#8 - orr x17,x17,x28 // Ch(e,f,g) - eor x28,x21,x22 // a^b, b^c in next round - eor x16,x16,x25,ror#41 // Sigma1(e) - eor x2,x2,x21,ror#34 - add x20,x20,x17 // h+=Ch(e,f,g) - and x19,x19,x28 // (b^c)&=(a^b) - eor x0,x0,x9,ror#61 - eor x1,x1,x12,lsr#7 // sigma0(X[i+1]) - add x20,x20,x16 // h+=Sigma1(e) - eor x19,x19,x22 // Maj(a,b,c) - eor x17,x2,x21,ror#39 // Sigma0(a) - eor x0,x0,x9,lsr#6 // sigma1(X[i+14]) - add x11,x11,x4 - add x24,x24,x20 // d+=h - add x20,x20,x19 // h+=Maj(a,b,c) - ldr x19,[x30],#8 // *K++, x28 in next round - add x11,x11,x1 - add x20,x20,x17 // h+=Sigma0(a) - add x11,x11,x0 - ldr x0,[sp,#8] - str x3,[sp,#0] - ror x16,x24,#14 - add x27,x27,x19 // h+=K[i] - ror x2,x13,#1 - and x17,x25,x24 - ror x1,x10,#19 - bic x19,x26,x24 - ror x3,x20,#28 - add x27,x27,x11 // h+=X[i] - eor x16,x16,x24,ror#18 - eor x2,x2,x13,ror#8 - orr x17,x17,x19 // Ch(e,f,g) - eor x19,x20,x21 // a^b, b^c in next round - eor x16,x16,x24,ror#41 // Sigma1(e) - eor x3,x3,x20,ror#34 - add x27,x27,x17 // h+=Ch(e,f,g) - and x28,x28,x19 // (b^c)&=(a^b) - eor x1,x1,x10,ror#61 - eor x2,x2,x13,lsr#7 // sigma0(X[i+1]) - add x27,x27,x16 // h+=Sigma1(e) - eor x28,x28,x21 // Maj(a,b,c) - eor x17,x3,x20,ror#39 // Sigma0(a) - eor x1,x1,x10,lsr#6 // sigma1(X[i+14]) - add x12,x12,x5 - add x23,x23,x27 // d+=h - add x27,x27,x28 // h+=Maj(a,b,c) - ldr x28,[x30],#8 // *K++, x19 in next round - add x12,x12,x2 - add x27,x27,x17 // h+=Sigma0(a) - add x12,x12,x1 - ldr x1,[sp,#16] - str x4,[sp,#8] - ror x16,x23,#14 - add x26,x26,x28 // h+=K[i] - ror x3,x14,#1 - and x17,x24,x23 - ror x2,x11,#19 - bic x28,x25,x23 - ror x4,x27,#28 - add x26,x26,x12 // h+=X[i] - eor x16,x16,x23,ror#18 - eor x3,x3,x14,ror#8 - orr x17,x17,x28 // Ch(e,f,g) - eor x28,x27,x20 // a^b, b^c in next round - eor x16,x16,x23,ror#41 // Sigma1(e) - eor x4,x4,x27,ror#34 - add x26,x26,x17 // h+=Ch(e,f,g) - and x19,x19,x28 // (b^c)&=(a^b) - eor x2,x2,x11,ror#61 - eor x3,x3,x14,lsr#7 // sigma0(X[i+1]) - add x26,x26,x16 // h+=Sigma1(e) - eor x19,x19,x20 // Maj(a,b,c) - eor x17,x4,x27,ror#39 // Sigma0(a) - eor x2,x2,x11,lsr#6 // sigma1(X[i+14]) - add x13,x13,x6 - add x22,x22,x26 // d+=h - add x26,x26,x19 // h+=Maj(a,b,c) - ldr x19,[x30],#8 // *K++, x28 in next round - add x13,x13,x3 - add x26,x26,x17 // h+=Sigma0(a) - add x13,x13,x2 - ldr x2,[sp,#24] - str x5,[sp,#16] - ror x16,x22,#14 - add x25,x25,x19 // h+=K[i] - ror x4,x15,#1 - and x17,x23,x22 - ror x3,x12,#19 - bic x19,x24,x22 - ror x5,x26,#28 - add x25,x25,x13 // h+=X[i] - eor x16,x16,x22,ror#18 - eor x4,x4,x15,ror#8 - orr x17,x17,x19 // Ch(e,f,g) - eor x19,x26,x27 // a^b, b^c in next round - eor x16,x16,x22,ror#41 // Sigma1(e) - eor x5,x5,x26,ror#34 - add x25,x25,x17 // h+=Ch(e,f,g) - and x28,x28,x19 // (b^c)&=(a^b) - eor x3,x3,x12,ror#61 - eor x4,x4,x15,lsr#7 // sigma0(X[i+1]) - add x25,x25,x16 // h+=Sigma1(e) - eor x28,x28,x27 // Maj(a,b,c) - eor x17,x5,x26,ror#39 // Sigma0(a) - eor x3,x3,x12,lsr#6 // sigma1(X[i+14]) - add x14,x14,x7 - add x21,x21,x25 // d+=h - add x25,x25,x28 // h+=Maj(a,b,c) - ldr x28,[x30],#8 // *K++, x19 in next round - add x14,x14,x4 - add x25,x25,x17 // h+=Sigma0(a) - add x14,x14,x3 - ldr x3,[sp,#0] - str x6,[sp,#24] - ror x16,x21,#14 - add x24,x24,x28 // h+=K[i] - ror x5,x0,#1 - and x17,x22,x21 - ror x4,x13,#19 - bic x28,x23,x21 - ror x6,x25,#28 - add x24,x24,x14 // h+=X[i] - eor x16,x16,x21,ror#18 - eor x5,x5,x0,ror#8 - orr x17,x17,x28 // Ch(e,f,g) - eor x28,x25,x26 // a^b, b^c in next round - eor x16,x16,x21,ror#41 // Sigma1(e) - eor x6,x6,x25,ror#34 - add x24,x24,x17 // h+=Ch(e,f,g) - and x19,x19,x28 // (b^c)&=(a^b) - eor x4,x4,x13,ror#61 - eor x5,x5,x0,lsr#7 // sigma0(X[i+1]) - add x24,x24,x16 // h+=Sigma1(e) - eor x19,x19,x26 // Maj(a,b,c) - eor x17,x6,x25,ror#39 // Sigma0(a) - eor x4,x4,x13,lsr#6 // sigma1(X[i+14]) - add x15,x15,x8 - add x20,x20,x24 // d+=h - add x24,x24,x19 // h+=Maj(a,b,c) - ldr x19,[x30],#8 // *K++, x28 in next round - add x15,x15,x5 - add x24,x24,x17 // h+=Sigma0(a) - add x15,x15,x4 - ldr x4,[sp,#8] - str x7,[sp,#0] - ror x16,x20,#14 - add x23,x23,x19 // h+=K[i] - ror x6,x1,#1 - and x17,x21,x20 - ror x5,x14,#19 - bic x19,x22,x20 - ror x7,x24,#28 - add x23,x23,x15 // h+=X[i] - eor x16,x16,x20,ror#18 - eor x6,x6,x1,ror#8 - orr x17,x17,x19 // Ch(e,f,g) - eor x19,x24,x25 // a^b, b^c in next round - eor x16,x16,x20,ror#41 // Sigma1(e) - eor x7,x7,x24,ror#34 - add x23,x23,x17 // h+=Ch(e,f,g) - and x28,x28,x19 // (b^c)&=(a^b) - eor x5,x5,x14,ror#61 - eor x6,x6,x1,lsr#7 // sigma0(X[i+1]) - add x23,x23,x16 // h+=Sigma1(e) - eor x28,x28,x25 // Maj(a,b,c) - eor x17,x7,x24,ror#39 // Sigma0(a) - eor x5,x5,x14,lsr#6 // sigma1(X[i+14]) - add x0,x0,x9 - add x27,x27,x23 // d+=h - add x23,x23,x28 // h+=Maj(a,b,c) - ldr x28,[x30],#8 // *K++, x19 in next round - add x0,x0,x6 - add x23,x23,x17 // h+=Sigma0(a) - add x0,x0,x5 - ldr x5,[sp,#16] - str x8,[sp,#8] - ror x16,x27,#14 - add x22,x22,x28 // h+=K[i] - ror x7,x2,#1 - and x17,x20,x27 - ror x6,x15,#19 - bic x28,x21,x27 - ror x8,x23,#28 - add x22,x22,x0 // h+=X[i] - eor x16,x16,x27,ror#18 - eor x7,x7,x2,ror#8 - orr x17,x17,x28 // Ch(e,f,g) - eor x28,x23,x24 // a^b, b^c in next round - eor x16,x16,x27,ror#41 // Sigma1(e) - eor x8,x8,x23,ror#34 - add x22,x22,x17 // h+=Ch(e,f,g) - and x19,x19,x28 // (b^c)&=(a^b) - eor x6,x6,x15,ror#61 - eor x7,x7,x2,lsr#7 // sigma0(X[i+1]) - add x22,x22,x16 // h+=Sigma1(e) - eor x19,x19,x24 // Maj(a,b,c) - eor x17,x8,x23,ror#39 // Sigma0(a) - eor x6,x6,x15,lsr#6 // sigma1(X[i+14]) - add x1,x1,x10 - add x26,x26,x22 // d+=h - add x22,x22,x19 // h+=Maj(a,b,c) - ldr x19,[x30],#8 // *K++, x28 in next round - add x1,x1,x7 - add x22,x22,x17 // h+=Sigma0(a) - add x1,x1,x6 - ldr x6,[sp,#24] - str x9,[sp,#16] - ror x16,x26,#14 - add x21,x21,x19 // h+=K[i] - ror x8,x3,#1 - and x17,x27,x26 - ror x7,x0,#19 - bic x19,x20,x26 - ror x9,x22,#28 - add x21,x21,x1 // h+=X[i] - eor x16,x16,x26,ror#18 - eor x8,x8,x3,ror#8 - orr x17,x17,x19 // Ch(e,f,g) - eor x19,x22,x23 // a^b, b^c in next round - eor x16,x16,x26,ror#41 // Sigma1(e) - eor x9,x9,x22,ror#34 - add x21,x21,x17 // h+=Ch(e,f,g) - and x28,x28,x19 // (b^c)&=(a^b) - eor x7,x7,x0,ror#61 - eor x8,x8,x3,lsr#7 // sigma0(X[i+1]) - add x21,x21,x16 // h+=Sigma1(e) - eor x28,x28,x23 // Maj(a,b,c) - eor x17,x9,x22,ror#39 // Sigma0(a) - eor x7,x7,x0,lsr#6 // sigma1(X[i+14]) - add x2,x2,x11 - add x25,x25,x21 // d+=h - add x21,x21,x28 // h+=Maj(a,b,c) - ldr x28,[x30],#8 // *K++, x19 in next round - add x2,x2,x8 - add x21,x21,x17 // h+=Sigma0(a) - add x2,x2,x7 - ldr x7,[sp,#0] - str x10,[sp,#24] - ror x16,x25,#14 - add x20,x20,x28 // h+=K[i] - ror x9,x4,#1 - and x17,x26,x25 - ror x8,x1,#19 - bic x28,x27,x25 - ror x10,x21,#28 - add x20,x20,x2 // h+=X[i] - eor x16,x16,x25,ror#18 - eor x9,x9,x4,ror#8 - orr x17,x17,x28 // Ch(e,f,g) - eor x28,x21,x22 // a^b, b^c in next round - eor x16,x16,x25,ror#41 // Sigma1(e) - eor x10,x10,x21,ror#34 - add x20,x20,x17 // h+=Ch(e,f,g) - and x19,x19,x28 // (b^c)&=(a^b) - eor x8,x8,x1,ror#61 - eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) - add x20,x20,x16 // h+=Sigma1(e) - eor x19,x19,x22 // Maj(a,b,c) - eor x17,x10,x21,ror#39 // Sigma0(a) - eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) - add x3,x3,x12 - add x24,x24,x20 // d+=h - add x20,x20,x19 // h+=Maj(a,b,c) - ldr x19,[x30],#8 // *K++, x28 in next round - add x3,x3,x9 - add x20,x20,x17 // h+=Sigma0(a) - add x3,x3,x8 - cbnz x19,.Loop_16_xx - - ldp x0,x2,[x29,#96] - ldr x1,[x29,#112] - sub x30,x30,#648 // rewind - - ldp x3,x4,[x0] - ldp x5,x6,[x0,#2*8] - add x1,x1,#14*8 // advance input pointer - ldp x7,x8,[x0,#4*8] - add x20,x20,x3 - ldp x9,x10,[x0,#6*8] - add x21,x21,x4 - add x22,x22,x5 - add x23,x23,x6 - stp x20,x21,[x0] - add x24,x24,x7 - add x25,x25,x8 - stp x22,x23,[x0,#2*8] - add x26,x26,x9 - add x27,x27,x10 - cmp x1,x2 - stp x24,x25,[x0,#4*8] - stp x26,x27,[x0,#6*8] - b.ne .Loop - - ldp x19,x20,[x29,#16] - add sp,sp,#4*8 - ldp x21,x22,[x29,#32] - ldp x23,x24,[x29,#48] - ldp x25,x26,[x29,#64] - ldp x27,x28,[x29,#80] - ldp x29,x30,[sp],#128 - ret -.size sha512_block_data_order,.-sha512_block_data_order - -.align 6 -.type .LK512,%object -.LK512: - .quad 0x428a2f98d728ae22,0x7137449123ef65cd - .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc - .quad 0x3956c25bf348b538,0x59f111f1b605d019 - .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 - .quad 0xd807aa98a3030242,0x12835b0145706fbe - .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 - .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 - .quad 0x9bdc06a725c71235,0xc19bf174cf692694 - .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 - .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 - .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 - .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 - .quad 0x983e5152ee66dfab,0xa831c66d2db43210 - .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 - .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 - .quad 0x06ca6351e003826f,0x142929670a0e6e70 - .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 - .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df - .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 - .quad 0x81c2c92e47edaee6,0x92722c851482353b - .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 - .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 - .quad 0xd192e819d6ef5218,0xd69906245565a910 - .quad 0xf40e35855771202a,0x106aa07032bbd1b8 - .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 - .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 - .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb - .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 - .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 - .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec - .quad 0x90befffa23631e28,0xa4506cebde82bde9 - .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b - .quad 0xca273eceea26619c,0xd186b8c721c0c207 - .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 - .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 - .quad 0x113f9804bef90dae,0x1b710b35131c471b - .quad 0x28db77f523047d84,0x32caab7b40c72493 - .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c - .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a - .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 - .quad 0 // terminator -.size .LK512,.-.LK512 -#ifndef __KERNEL__ -.align 3 -.LOPENSSL_armcap_P: -# ifdef __ILP32__ - .long OPENSSL_armcap_P-. -# else - .quad OPENSSL_armcap_P-. -# endif -#endif -.asciz "SHA512 block transform for ARMv8, CRYPTOGAMS by " -.align 2 -#ifndef __KERNEL__ -.comm OPENSSL_armcap_P,4,4 -#endif From 2063257d4b2472e8f113527c642e467914bb82d3 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 26 Apr 2021 02:57:34 +0900 Subject: [PATCH 004/142] crypto: arm64 - use a pattern rule for generating *.S files Unify similar build rules. sha256-core.S opts out it because it is generated from sha512-armv8.pl. Signed-off-by: Masahiro Yamada Signed-off-by: Herbert Xu --- arch/arm64/crypto/Makefile | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index 592e52a08c623..09a805cc32d7c 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -71,13 +71,10 @@ $(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE quiet_cmd_perlasm = PERLASM $@ cmd_perlasm = $(PERL) $(<) void $(@) -$(obj)/poly1305-core.S: $(src)/poly1305-armv8.pl +$(obj)/%-core.S: $(src)/%-armv8.pl $(call cmd,perlasm) $(obj)/sha256-core.S: $(src)/sha512-armv8.pl $(call cmd,perlasm) -$(obj)/sha512-core.S: $(src)/sha512-armv8.pl - $(call cmd,perlasm) - clean-files += poly1305-core.S sha256-core.S sha512-core.S From c8671c7dc7d51125ab9f651697866bf4a9132277 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 26 Apr 2021 10:17:48 +0200 Subject: [PATCH 005/142] crypto: ccp - Annotate SEV Firmware file names Annotate the firmware files CCP might need using MODULE_FIRMWARE(). This will get them included into an initrd when CCP is also included there. Otherwise the CCP module will not find its firmware when loaded before the root-fs is mounted. This can cause problems when the pre-loaded SEV firmware is too old to support current SEV and SEV-ES virtualization features. Fixes: e93720606efd ("crypto: ccp - Allow SEV firmware to be chosen based on Family and Model") Cc: stable@vger.kernel.org # v4.20+ Acked-by: Tom Lendacky Signed-off-by: Joerg Roedel Signed-off-by: Herbert Xu --- drivers/crypto/ccp/sev-dev.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index 3506b2050fb86..91808402e0bf2 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -43,6 +43,10 @@ static int psp_probe_timeout = 5; module_param(psp_probe_timeout, int, 0644); MODULE_PARM_DESC(psp_probe_timeout, " default timeout value, in seconds, during PSP device probe"); +MODULE_FIRMWARE("amd/amd_sev_fam17h_model0xh.sbin"); /* 1st gen EPYC */ +MODULE_FIRMWARE("amd/amd_sev_fam17h_model3xh.sbin"); /* 2nd gen EPYC */ +MODULE_FIRMWARE("amd/amd_sev_fam19h_model0xh.sbin"); /* 3rd gen EPYC */ + static bool psp_dead; static int psp_timeout; From fa8edbb630ae9ef99d4ab570a16f01c3c39d9a86 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 28 Apr 2021 09:33:37 +0200 Subject: [PATCH 006/142] crypto: cpt - Use 'hlist_for_each_entry' to simplify code Use 'hlist_for_each_entry' instead of hand writing it. This saves a few lines of code. Signed-off-by: Christophe JAILLET Signed-off-by: Herbert Xu --- drivers/crypto/cavium/cpt/cptvf_reqmanager.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/crypto/cavium/cpt/cptvf_reqmanager.c b/drivers/crypto/cavium/cpt/cptvf_reqmanager.c index 4fe7898c85615..feb0f76783dda 100644 --- a/drivers/crypto/cavium/cpt/cptvf_reqmanager.c +++ b/drivers/crypto/cavium/cpt/cptvf_reqmanager.c @@ -244,11 +244,7 @@ static int send_cpt_command(struct cpt_vf *cptvf, union cpt_inst_s *cmd, memcpy(ent, (void *)cmd, qinfo->cmd_size); if (++queue->idx >= queue->qhead->size / 64) { - struct hlist_node *node; - - hlist_for_each(node, &queue->chead) { - chunk = hlist_entry(node, struct command_chunk, - nextchunk); + hlist_for_each_entry(chunk, &queue->chead, nextchunk) { if (chunk == queue->qhead) { continue; } else { From b7c3635e56d6561436af59b9876faa7cc8389644 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 29 Apr 2021 12:32:53 +0100 Subject: [PATCH 007/142] hwrng: amd - remove redundant initialization of variable err The variable err is being initialized with a value that is never read and it is being updated later with a new value. The initialization is redundant and can be removed Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Signed-off-by: Herbert Xu --- drivers/char/hw_random/amd-rng.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/hw_random/amd-rng.c b/drivers/char/hw_random/amd-rng.c index 9959c762da2f8..d8d4ef5214a19 100644 --- a/drivers/char/hw_random/amd-rng.c +++ b/drivers/char/hw_random/amd-rng.c @@ -126,7 +126,7 @@ static struct hwrng amd_rng = { static int __init mod_init(void) { - int err = -ENODEV; + int err; struct pci_dev *pdev = NULL; const struct pci_device_id *ent; u32 pmbase; From a9ca8eacb3204208863b9175baae8ac7ee6b2a64 Mon Sep 17 00:00:00 2001 From: Thara Gopinath Date: Thu, 29 Apr 2021 11:07:01 -0400 Subject: [PATCH 008/142] crypto: qce - Add MAC failed error checking MAC_FAILED gets set in the status register if authenthication fails for ccm algorithms(during decryption). Add support to catch and flag this error. Reviewed-by: Bjorn Andersson Signed-off-by: Thara Gopinath Signed-off-by: Herbert Xu --- drivers/crypto/qce/common.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/crypto/qce/common.c b/drivers/crypto/qce/common.c index dceb9579d87a2..dd76175d5c628 100644 --- a/drivers/crypto/qce/common.c +++ b/drivers/crypto/qce/common.c @@ -419,6 +419,8 @@ int qce_check_status(struct qce_device *qce, u32 *status) */ if (*status & STATUS_ERRORS || !(*status & BIT(OPERATION_DONE_SHIFT))) ret = -ENXIO; + else if (*status & BIT(MAC_FAILED_SHIFT)) + ret = -EBADMSG; return ret; } From 6c34e446b2e2b7d26e83c4c391e89d7cf6824093 Mon Sep 17 00:00:00 2001 From: Thara Gopinath Date: Thu, 29 Apr 2021 11:07:02 -0400 Subject: [PATCH 009/142] crypto: qce - Make result dump optional Qualcomm crypto engine allows for IV registers and status register to be concatenated to the output. This option is enabled by setting the RESULTS_DUMP field in GOPROC register. This is useful for most of the algorithms to either retrieve status of operation or in case of authentication algorithms to retrieve the mac. But for ccm algorithms, the mac is part of the output stream and not retrieved from the IV registers, thus needing a separate buffer to retrieve it. Make enabling RESULTS_DUMP field optional so that algorithms can choose whether or not to enable the option. Note that in this patch, the enabled algorithms always choose RESULTS_DUMP to be enabled. But later with the introduction of ccm algorithms, this changes. Reviewed-by: Bjorn Andersson Signed-off-by: Thara Gopinath Signed-off-by: Herbert Xu --- drivers/crypto/qce/common.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/qce/common.c b/drivers/crypto/qce/common.c index dd76175d5c628..7b5bc5a6ae81c 100644 --- a/drivers/crypto/qce/common.c +++ b/drivers/crypto/qce/common.c @@ -88,9 +88,12 @@ static void qce_setup_config(struct qce_device *qce) qce_write(qce, REG_CONFIG, config); } -static inline void qce_crypto_go(struct qce_device *qce) +static inline void qce_crypto_go(struct qce_device *qce, bool result_dump) { - qce_write(qce, REG_GOPROC, BIT(GO_SHIFT) | BIT(RESULTS_DUMP_SHIFT)); + if (result_dump) + qce_write(qce, REG_GOPROC, BIT(GO_SHIFT) | BIT(RESULTS_DUMP_SHIFT)); + else + qce_write(qce, REG_GOPROC, BIT(GO_SHIFT)); } #ifdef CONFIG_CRYPTO_DEV_QCE_SHA @@ -219,7 +222,7 @@ static int qce_setup_regs_ahash(struct crypto_async_request *async_req) config = qce_config_reg(qce, 1); qce_write(qce, REG_CONFIG, config); - qce_crypto_go(qce); + qce_crypto_go(qce, true); return 0; } @@ -380,7 +383,7 @@ static int qce_setup_regs_skcipher(struct crypto_async_request *async_req) config = qce_config_reg(qce, 1); qce_write(qce, REG_CONFIG, config); - qce_crypto_go(qce); + qce_crypto_go(qce, true); return 0; } From 7ba9cd4e22a0c177a222669fc58ab300903b63e8 Mon Sep 17 00:00:00 2001 From: Thara Gopinath Date: Thu, 29 Apr 2021 11:07:03 -0400 Subject: [PATCH 010/142] crypto: qce - Add mode for rfc4309 rf4309 is the specification that uses aes ccm algorithms with IPsec security packets. Add a submode to identify rfc4309 ccm(aes) algorithm in the crypto driver. Reviewed-by: Bjorn Andersson Signed-off-by: Thara Gopinath Signed-off-by: Herbert Xu --- drivers/crypto/qce/common.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/qce/common.h b/drivers/crypto/qce/common.h index 3bc244bcca2d9..b135440bf72b5 100644 --- a/drivers/crypto/qce/common.h +++ b/drivers/crypto/qce/common.h @@ -51,9 +51,11 @@ #define QCE_MODE_CCM BIT(12) #define QCE_MODE_MASK GENMASK(12, 8) +#define QCE_MODE_CCM_RFC4309 BIT(13) + /* cipher encryption/decryption operations */ -#define QCE_ENCRYPT BIT(13) -#define QCE_DECRYPT BIT(14) +#define QCE_ENCRYPT BIT(30) +#define QCE_DECRYPT BIT(31) #define IS_DES(flags) (flags & QCE_ALG_DES) #define IS_3DES(flags) (flags & QCE_ALG_3DES) @@ -73,6 +75,7 @@ #define IS_CTR(mode) (mode & QCE_MODE_CTR) #define IS_XTS(mode) (mode & QCE_MODE_XTS) #define IS_CCM(mode) (mode & QCE_MODE_CCM) +#define IS_CCM_RFC4309(mode) ((mode) & QCE_MODE_CCM_RFC4309) #define IS_ENCRYPT(dir) (dir & QCE_ENCRYPT) #define IS_DECRYPT(dir) (dir & QCE_DECRYPT) From 9363efb4181c5e0fbf86bdfa759262aa29f0eb50 Mon Sep 17 00:00:00 2001 From: Thara Gopinath Date: Thu, 29 Apr 2021 11:07:04 -0400 Subject: [PATCH 011/142] crypto: qce - Add support for AEAD algorithms Introduce support to enable following algorithms in Qualcomm Crypto Engine. - authenc(hmac(sha1),cbc(des)) - authenc(hmac(sha1),cbc(des3_ede)) - authenc(hmac(sha256),cbc(des)) - authenc(hmac(sha256),cbc(des3_ede)) - authenc(hmac(sha256),cbc(aes)) - ccm(aes) - rfc4309(ccm(aes)) Signed-off-by: Thara Gopinath Signed-off-by: Herbert Xu --- drivers/crypto/Kconfig | 15 + drivers/crypto/qce/Makefile | 1 + drivers/crypto/qce/aead.c | 799 ++++++++++++++++++++++++++++++++++++ drivers/crypto/qce/aead.h | 53 +++ drivers/crypto/qce/common.h | 2 + drivers/crypto/qce/core.c | 4 + 6 files changed, 874 insertions(+) create mode 100644 drivers/crypto/qce/aead.c create mode 100644 drivers/crypto/qce/aead.h diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 9a4c275a13350..1fe5b7eafc02c 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -627,6 +627,12 @@ config CRYPTO_DEV_QCE_SHA select CRYPTO_SHA1 select CRYPTO_SHA256 +config CRYPTO_DEV_QCE_AEAD + bool + depends on CRYPTO_DEV_QCE + select CRYPTO_AUTHENC + select CRYPTO_LIB_DES + choice prompt "Algorithms enabled for QCE acceleration" default CRYPTO_DEV_QCE_ENABLE_ALL @@ -647,6 +653,7 @@ choice bool "All supported algorithms" select CRYPTO_DEV_QCE_SKCIPHER select CRYPTO_DEV_QCE_SHA + select CRYPTO_DEV_QCE_AEAD help Enable all supported algorithms: - AES (CBC, CTR, ECB, XTS) @@ -672,6 +679,14 @@ choice - SHA1, HMAC-SHA1 - SHA256, HMAC-SHA256 + config CRYPTO_DEV_QCE_ENABLE_AEAD + bool "AEAD algorithms only" + select CRYPTO_DEV_QCE_AEAD + help + Enable AEAD algorithms only: + - authenc() + - ccm(aes) + - rfc4309(ccm(aes)) endchoice config CRYPTO_DEV_QCE_SW_MAX_LEN diff --git a/drivers/crypto/qce/Makefile b/drivers/crypto/qce/Makefile index 14ade8a7d6644..2cf8984e1b851 100644 --- a/drivers/crypto/qce/Makefile +++ b/drivers/crypto/qce/Makefile @@ -6,3 +6,4 @@ qcrypto-objs := core.o \ qcrypto-$(CONFIG_CRYPTO_DEV_QCE_SHA) += sha.o qcrypto-$(CONFIG_CRYPTO_DEV_QCE_SKCIPHER) += skcipher.o +qcrypto-$(CONFIG_CRYPTO_DEV_QCE_AEAD) += aead.o diff --git a/drivers/crypto/qce/aead.c b/drivers/crypto/qce/aead.c new file mode 100644 index 0000000000000..ef66ae21eae36 --- /dev/null +++ b/drivers/crypto/qce/aead.c @@ -0,0 +1,799 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Copyright (C) 2021, Linaro Limited. All rights reserved. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "aead.h" + +#define CCM_NONCE_ADATA_SHIFT 6 +#define CCM_NONCE_AUTHSIZE_SHIFT 3 +#define MAX_CCM_ADATA_HEADER_LEN 6 + +static LIST_HEAD(aead_algs); + +static void qce_aead_done(void *data) +{ + struct crypto_async_request *async_req = data; + struct aead_request *req = aead_request_cast(async_req); + struct qce_aead_reqctx *rctx = aead_request_ctx(req); + struct qce_aead_ctx *ctx = crypto_tfm_ctx(async_req->tfm); + struct qce_alg_template *tmpl = to_aead_tmpl(crypto_aead_reqtfm(req)); + struct qce_device *qce = tmpl->qce; + struct qce_result_dump *result_buf = qce->dma.result_buf; + enum dma_data_direction dir_src, dir_dst; + bool diff_dst; + int error; + u32 status; + unsigned int totallen; + unsigned char tag[SHA256_DIGEST_SIZE] = {0}; + int ret = 0; + + diff_dst = (req->src != req->dst) ? true : false; + dir_src = diff_dst ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL; + dir_dst = diff_dst ? DMA_FROM_DEVICE : DMA_BIDIRECTIONAL; + + error = qce_dma_terminate_all(&qce->dma); + if (error) + dev_dbg(qce->dev, "aead dma termination error (%d)\n", + error); + if (diff_dst) + dma_unmap_sg(qce->dev, rctx->src_sg, rctx->src_nents, dir_src); + + dma_unmap_sg(qce->dev, rctx->dst_sg, rctx->dst_nents, dir_dst); + + if (IS_CCM(rctx->flags)) { + if (req->assoclen) { + sg_free_table(&rctx->src_tbl); + if (diff_dst) + sg_free_table(&rctx->dst_tbl); + } else { + if (!(IS_DECRYPT(rctx->flags) && !diff_dst)) + sg_free_table(&rctx->dst_tbl); + } + } else { + sg_free_table(&rctx->dst_tbl); + } + + error = qce_check_status(qce, &status); + if (error < 0 && (error != -EBADMSG)) + dev_err(qce->dev, "aead operation error (%x)\n", status); + + if (IS_ENCRYPT(rctx->flags)) { + totallen = req->cryptlen + req->assoclen; + if (IS_CCM(rctx->flags)) + scatterwalk_map_and_copy(rctx->ccmresult_buf, req->dst, + totallen, ctx->authsize, 1); + else + scatterwalk_map_and_copy(result_buf->auth_iv, req->dst, + totallen, ctx->authsize, 1); + + } else if (!IS_CCM(rctx->flags)) { + totallen = req->cryptlen + req->assoclen - ctx->authsize; + scatterwalk_map_and_copy(tag, req->src, totallen, ctx->authsize, 0); + ret = memcmp(result_buf->auth_iv, tag, ctx->authsize); + if (ret) { + pr_err("Bad message error\n"); + error = -EBADMSG; + } + } + + qce->async_req_done(qce, error); +} + +static struct scatterlist * +qce_aead_prepare_result_buf(struct sg_table *tbl, struct aead_request *req) +{ + struct qce_aead_reqctx *rctx = aead_request_ctx(req); + struct qce_alg_template *tmpl = to_aead_tmpl(crypto_aead_reqtfm(req)); + struct qce_device *qce = tmpl->qce; + + sg_init_one(&rctx->result_sg, qce->dma.result_buf, QCE_RESULT_BUF_SZ); + return qce_sgtable_add(tbl, &rctx->result_sg, QCE_RESULT_BUF_SZ); +} + +static struct scatterlist * +qce_aead_prepare_ccm_result_buf(struct sg_table *tbl, struct aead_request *req) +{ + struct qce_aead_reqctx *rctx = aead_request_ctx(req); + + sg_init_one(&rctx->result_sg, rctx->ccmresult_buf, QCE_BAM_BURST_SIZE); + return qce_sgtable_add(tbl, &rctx->result_sg, QCE_BAM_BURST_SIZE); +} + +static struct scatterlist * +qce_aead_prepare_dst_buf(struct aead_request *req) +{ + struct qce_aead_reqctx *rctx = aead_request_ctx(req); + struct qce_alg_template *tmpl = to_aead_tmpl(crypto_aead_reqtfm(req)); + struct qce_device *qce = tmpl->qce; + struct scatterlist *sg, *msg_sg, __sg[2]; + gfp_t gfp; + unsigned int assoclen = req->assoclen; + unsigned int totallen; + int ret; + + totallen = rctx->cryptlen + assoclen; + rctx->dst_nents = sg_nents_for_len(req->dst, totallen); + if (rctx->dst_nents < 0) { + dev_err(qce->dev, "Invalid numbers of dst SG.\n"); + return ERR_PTR(-EINVAL); + } + if (IS_CCM(rctx->flags)) + rctx->dst_nents += 2; + else + rctx->dst_nents += 1; + + gfp = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? + GFP_KERNEL : GFP_ATOMIC; + ret = sg_alloc_table(&rctx->dst_tbl, rctx->dst_nents, gfp); + if (ret) + return ERR_PTR(ret); + + if (IS_CCM(rctx->flags) && assoclen) { + /* Get the dst buffer */ + msg_sg = scatterwalk_ffwd(__sg, req->dst, assoclen); + + sg = qce_sgtable_add(&rctx->dst_tbl, &rctx->adata_sg, + rctx->assoclen); + if (IS_ERR(sg)) { + ret = PTR_ERR(sg); + goto dst_tbl_free; + } + /* dst buffer */ + sg = qce_sgtable_add(&rctx->dst_tbl, msg_sg, rctx->cryptlen); + if (IS_ERR(sg)) { + ret = PTR_ERR(sg); + goto dst_tbl_free; + } + totallen = rctx->cryptlen + rctx->assoclen; + } else { + if (totallen) { + sg = qce_sgtable_add(&rctx->dst_tbl, req->dst, totallen); + if (IS_ERR(sg)) + goto dst_tbl_free; + } + } + if (IS_CCM(rctx->flags)) + sg = qce_aead_prepare_ccm_result_buf(&rctx->dst_tbl, req); + else + sg = qce_aead_prepare_result_buf(&rctx->dst_tbl, req); + + if (IS_ERR(sg)) + goto dst_tbl_free; + + sg_mark_end(sg); + rctx->dst_sg = rctx->dst_tbl.sgl; + rctx->dst_nents = sg_nents_for_len(rctx->dst_sg, totallen) + 1; + + return sg; + +dst_tbl_free: + sg_free_table(&rctx->dst_tbl); + return sg; +} + +static int +qce_aead_ccm_prepare_buf_assoclen(struct aead_request *req) +{ + struct scatterlist *sg, *msg_sg, __sg[2]; + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct qce_aead_reqctx *rctx = aead_request_ctx(req); + struct qce_aead_ctx *ctx = crypto_aead_ctx(tfm); + unsigned int assoclen = rctx->assoclen; + unsigned int adata_header_len, cryptlen, totallen; + gfp_t gfp; + bool diff_dst; + int ret; + + if (IS_DECRYPT(rctx->flags)) + cryptlen = rctx->cryptlen + ctx->authsize; + else + cryptlen = rctx->cryptlen; + totallen = cryptlen + req->assoclen; + + /* Get the msg */ + msg_sg = scatterwalk_ffwd(__sg, req->src, req->assoclen); + + rctx->adata = kzalloc((ALIGN(assoclen, 16) + MAX_CCM_ADATA_HEADER_LEN) * + sizeof(unsigned char), GFP_ATOMIC); + if (!rctx->adata) + return -ENOMEM; + + /* + * Format associated data (RFC3610 and NIST 800-38C) + * Even though specification allows for AAD to be up to 2^64 - 1 bytes, + * the assoclen field in aead_request is unsigned int and thus limits + * the AAD to be up to 2^32 - 1 bytes. So we handle only two scenarios + * while forming the header for AAD. + */ + if (assoclen < 0xff00) { + adata_header_len = 2; + *(__be16 *)rctx->adata = cpu_to_be16(assoclen); + } else { + adata_header_len = 6; + *(__be16 *)rctx->adata = cpu_to_be16(0xfffe); + *(__be32 *)(rctx->adata + 2) = cpu_to_be32(assoclen); + } + + /* Copy the associated data */ + if (sg_copy_to_buffer(req->src, sg_nents_for_len(req->src, assoclen), + rctx->adata + adata_header_len, + assoclen) != assoclen) + return -EINVAL; + + /* Pad associated data to block size */ + rctx->assoclen = ALIGN(assoclen + adata_header_len, 16); + + diff_dst = (req->src != req->dst) ? true : false; + + if (diff_dst) + rctx->src_nents = sg_nents_for_len(req->src, totallen) + 1; + else + rctx->src_nents = sg_nents_for_len(req->src, totallen) + 2; + + gfp = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : GFP_ATOMIC; + ret = sg_alloc_table(&rctx->src_tbl, rctx->src_nents, gfp); + if (ret) + return ret; + + /* Associated Data */ + sg_init_one(&rctx->adata_sg, rctx->adata, rctx->assoclen); + sg = qce_sgtable_add(&rctx->src_tbl, &rctx->adata_sg, + rctx->assoclen); + if (IS_ERR(sg)) { + ret = PTR_ERR(sg); + goto err_free; + } + /* src msg */ + sg = qce_sgtable_add(&rctx->src_tbl, msg_sg, cryptlen); + if (IS_ERR(sg)) { + ret = PTR_ERR(sg); + goto err_free; + } + if (!diff_dst) { + /* + * For decrypt, when src and dst buffers are same, there is already space + * in the buffer for padded 0's which is output in lieu of + * the MAC that is input. So skip the below. + */ + if (!IS_DECRYPT(rctx->flags)) { + sg = qce_aead_prepare_ccm_result_buf(&rctx->src_tbl, req); + if (IS_ERR(sg)) { + ret = PTR_ERR(sg); + goto err_free; + } + } + } + sg_mark_end(sg); + rctx->src_sg = rctx->src_tbl.sgl; + totallen = cryptlen + rctx->assoclen; + rctx->src_nents = sg_nents_for_len(rctx->src_sg, totallen); + + if (diff_dst) { + sg = qce_aead_prepare_dst_buf(req); + if (IS_ERR(sg)) + goto err_free; + } else { + if (IS_ENCRYPT(rctx->flags)) + rctx->dst_nents = rctx->src_nents + 1; + else + rctx->dst_nents = rctx->src_nents; + rctx->dst_sg = rctx->src_sg; + } + + return 0; +err_free: + sg_free_table(&rctx->src_tbl); + return ret; +} + +static int qce_aead_prepare_buf(struct aead_request *req) +{ + struct qce_aead_reqctx *rctx = aead_request_ctx(req); + struct qce_alg_template *tmpl = to_aead_tmpl(crypto_aead_reqtfm(req)); + struct qce_device *qce = tmpl->qce; + struct scatterlist *sg; + bool diff_dst = (req->src != req->dst) ? true : false; + unsigned int totallen; + + totallen = rctx->cryptlen + rctx->assoclen; + + sg = qce_aead_prepare_dst_buf(req); + if (IS_ERR(sg)) + return PTR_ERR(sg); + if (diff_dst) { + rctx->src_nents = sg_nents_for_len(req->src, totallen); + if (rctx->src_nents < 0) { + dev_err(qce->dev, "Invalid numbers of src SG.\n"); + return -EINVAL; + } + rctx->src_sg = req->src; + } else { + rctx->src_nents = rctx->dst_nents - 1; + rctx->src_sg = rctx->dst_sg; + } + return 0; +} + +static int qce_aead_ccm_prepare_buf(struct aead_request *req) +{ + struct qce_aead_reqctx *rctx = aead_request_ctx(req); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct qce_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct scatterlist *sg; + bool diff_dst = (req->src != req->dst) ? true : false; + unsigned int cryptlen; + + if (rctx->assoclen) + return qce_aead_ccm_prepare_buf_assoclen(req); + + if (IS_ENCRYPT(rctx->flags)) + return qce_aead_prepare_buf(req); + + cryptlen = rctx->cryptlen + ctx->authsize; + if (diff_dst) { + rctx->src_nents = sg_nents_for_len(req->src, cryptlen); + rctx->src_sg = req->src; + sg = qce_aead_prepare_dst_buf(req); + if (IS_ERR(sg)) + return PTR_ERR(sg); + } else { + rctx->src_nents = sg_nents_for_len(req->src, cryptlen); + rctx->src_sg = req->src; + rctx->dst_nents = rctx->src_nents; + rctx->dst_sg = rctx->src_sg; + } + + return 0; +} + +static int qce_aead_create_ccm_nonce(struct qce_aead_reqctx *rctx, struct qce_aead_ctx *ctx) +{ + unsigned int msglen_size, ivsize; + u8 msg_len[4]; + int i; + + if (!rctx || !rctx->iv) + return -EINVAL; + + msglen_size = rctx->iv[0] + 1; + + /* Verify that msg len size is valid */ + if (msglen_size < 2 || msglen_size > 8) + return -EINVAL; + + ivsize = rctx->ivsize; + + /* + * Clear the msglen bytes in IV. + * Else the h/w engine and nonce will use any stray value pending there. + */ + if (!IS_CCM_RFC4309(rctx->flags)) { + for (i = 0; i < msglen_size; i++) + rctx->iv[ivsize - i - 1] = 0; + } + + /* + * The crypto framework encodes cryptlen as unsigned int. Thus, even though + * spec allows for upto 8 bytes to encode msg_len only 4 bytes are needed. + */ + if (msglen_size > 4) + msglen_size = 4; + + memcpy(&msg_len[0], &rctx->cryptlen, 4); + + memcpy(&rctx->ccm_nonce[0], rctx->iv, rctx->ivsize); + if (rctx->assoclen) + rctx->ccm_nonce[0] |= 1 << CCM_NONCE_ADATA_SHIFT; + rctx->ccm_nonce[0] |= ((ctx->authsize - 2) / 2) << + CCM_NONCE_AUTHSIZE_SHIFT; + for (i = 0; i < msglen_size; i++) + rctx->ccm_nonce[QCE_MAX_NONCE - i - 1] = msg_len[i]; + + return 0; +} + +static int +qce_aead_async_req_handle(struct crypto_async_request *async_req) +{ + struct aead_request *req = aead_request_cast(async_req); + struct qce_aead_reqctx *rctx = aead_request_ctx(req); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct qce_aead_ctx *ctx = crypto_tfm_ctx(async_req->tfm); + struct qce_alg_template *tmpl = to_aead_tmpl(crypto_aead_reqtfm(req)); + struct qce_device *qce = tmpl->qce; + enum dma_data_direction dir_src, dir_dst; + bool diff_dst; + int dst_nents, src_nents, ret; + + if (IS_CCM_RFC4309(rctx->flags)) { + memset(rctx->ccm_rfc4309_iv, 0, QCE_MAX_IV_SIZE); + rctx->ccm_rfc4309_iv[0] = 3; + memcpy(&rctx->ccm_rfc4309_iv[1], ctx->ccm4309_salt, QCE_CCM4309_SALT_SIZE); + memcpy(&rctx->ccm_rfc4309_iv[4], req->iv, 8); + rctx->iv = rctx->ccm_rfc4309_iv; + rctx->ivsize = AES_BLOCK_SIZE; + } else { + rctx->iv = req->iv; + rctx->ivsize = crypto_aead_ivsize(tfm); + } + if (IS_CCM_RFC4309(rctx->flags)) + rctx->assoclen = req->assoclen - 8; + else + rctx->assoclen = req->assoclen; + + diff_dst = (req->src != req->dst) ? true : false; + dir_src = diff_dst ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL; + dir_dst = diff_dst ? DMA_FROM_DEVICE : DMA_BIDIRECTIONAL; + + if (IS_CCM(rctx->flags)) { + ret = qce_aead_create_ccm_nonce(rctx, ctx); + if (ret) + return ret; + } + if (IS_CCM(rctx->flags)) + ret = qce_aead_ccm_prepare_buf(req); + else + ret = qce_aead_prepare_buf(req); + + if (ret) + return ret; + dst_nents = dma_map_sg(qce->dev, rctx->dst_sg, rctx->dst_nents, dir_dst); + if (dst_nents < 0) + goto error_free; + + if (diff_dst) { + src_nents = dma_map_sg(qce->dev, rctx->src_sg, rctx->src_nents, dir_src); + if (src_nents < 0) + goto error_unmap_dst; + } else { + if (IS_CCM(rctx->flags) && IS_DECRYPT(rctx->flags)) + src_nents = dst_nents; + else + src_nents = dst_nents - 1; + } + + ret = qce_dma_prep_sgs(&qce->dma, rctx->src_sg, src_nents, rctx->dst_sg, dst_nents, + qce_aead_done, async_req); + if (ret) + goto error_unmap_src; + + qce_dma_issue_pending(&qce->dma); + + ret = qce_start(async_req, tmpl->crypto_alg_type); + if (ret) + goto error_terminate; + + return 0; + +error_terminate: + qce_dma_terminate_all(&qce->dma); +error_unmap_src: + if (diff_dst) + dma_unmap_sg(qce->dev, req->src, rctx->src_nents, dir_src); +error_unmap_dst: + dma_unmap_sg(qce->dev, rctx->dst_sg, rctx->dst_nents, dir_dst); +error_free: + if (IS_CCM(rctx->flags) && rctx->assoclen) { + sg_free_table(&rctx->src_tbl); + if (diff_dst) + sg_free_table(&rctx->dst_tbl); + } else { + sg_free_table(&rctx->dst_tbl); + } + return ret; +} + +static int qce_aead_crypt(struct aead_request *req, int encrypt) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct qce_aead_reqctx *rctx = aead_request_ctx(req); + struct qce_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct qce_alg_template *tmpl = to_aead_tmpl(tfm); + unsigned int blocksize = crypto_aead_blocksize(tfm); + + rctx->flags = tmpl->alg_flags; + rctx->flags |= encrypt ? QCE_ENCRYPT : QCE_DECRYPT; + + if (encrypt) + rctx->cryptlen = req->cryptlen; + else + rctx->cryptlen = req->cryptlen - ctx->authsize; + + /* CE does not handle 0 length messages */ + if (!rctx->cryptlen) { + if (!(IS_CCM(rctx->flags) && IS_DECRYPT(rctx->flags))) + return -EINVAL; + } + + /* + * CBC algorithms require message lengths to be + * multiples of block size. + */ + if (IS_CBC(rctx->flags) && !IS_ALIGNED(rctx->cryptlen, blocksize)) + return -EINVAL; + + /* RFC4309 supported AAD size 16 bytes/20 bytes */ + if (IS_CCM_RFC4309(rctx->flags)) + if (crypto_ipsec_check_assoclen(req->assoclen)) + return -EINVAL; + + return tmpl->qce->async_req_enqueue(tmpl->qce, &req->base); +} + +static int qce_aead_encrypt(struct aead_request *req) +{ + return qce_aead_crypt(req, 1); +} + +static int qce_aead_decrypt(struct aead_request *req) +{ + return qce_aead_crypt(req, 0); +} + +static int qce_aead_ccm_setkey(struct crypto_aead *tfm, const u8 *key, + unsigned int keylen) +{ + struct qce_aead_ctx *ctx = crypto_aead_ctx(tfm); + unsigned long flags = to_aead_tmpl(tfm)->alg_flags; + + if (IS_CCM_RFC4309(flags)) { + if (keylen < QCE_CCM4309_SALT_SIZE) + return -EINVAL; + keylen -= QCE_CCM4309_SALT_SIZE; + memcpy(ctx->ccm4309_salt, key + keylen, QCE_CCM4309_SALT_SIZE); + } + + if (keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_256) + return -EINVAL; + + ctx->enc_keylen = keylen; + ctx->auth_keylen = keylen; + + memcpy(ctx->enc_key, key, keylen); + memcpy(ctx->auth_key, key, keylen); + + return 0; +} + +static int qce_aead_setkey(struct crypto_aead *tfm, const u8 *key, unsigned int keylen) +{ + struct qce_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct crypto_authenc_keys authenc_keys; + unsigned long flags = to_aead_tmpl(tfm)->alg_flags; + u32 _key[6]; + int err; + + err = crypto_authenc_extractkeys(&authenc_keys, key, keylen); + if (err) + return err; + + if (authenc_keys.enckeylen > QCE_MAX_KEY_SIZE || + authenc_keys.authkeylen > QCE_MAX_KEY_SIZE) + return -EINVAL; + + if (IS_DES(flags)) { + err = verify_aead_des_key(tfm, authenc_keys.enckey, authenc_keys.enckeylen); + if (err) + return err; + } else if (IS_3DES(flags)) { + err = verify_aead_des3_key(tfm, authenc_keys.enckey, authenc_keys.enckeylen); + if (err) + return err; + /* + * The crypto engine does not support any two keys + * being the same for triple des algorithms. The + * verify_skcipher_des3_key does not check for all the + * below conditions. Return -EINVAL in case any two keys + * are the same. Revisit to see if a fallback cipher + * is needed to handle this condition. + */ + memcpy(_key, authenc_keys.enckey, DES3_EDE_KEY_SIZE); + if (!((_key[0] ^ _key[2]) | (_key[1] ^ _key[3])) || + !((_key[2] ^ _key[4]) | (_key[3] ^ _key[5])) || + !((_key[0] ^ _key[4]) | (_key[1] ^ _key[5]))) + return -EINVAL; + } else if (IS_AES(flags)) { + /* No random key sizes */ + if (authenc_keys.enckeylen != AES_KEYSIZE_128 && + authenc_keys.enckeylen != AES_KEYSIZE_256) + return -EINVAL; + } + + ctx->enc_keylen = authenc_keys.enckeylen; + ctx->auth_keylen = authenc_keys.authkeylen; + + memcpy(ctx->enc_key, authenc_keys.enckey, authenc_keys.enckeylen); + + memset(ctx->auth_key, 0, sizeof(ctx->auth_key)); + memcpy(ctx->auth_key, authenc_keys.authkey, authenc_keys.authkeylen); + + return 0; +} + +static int qce_aead_setauthsize(struct crypto_aead *tfm, unsigned int authsize) +{ + struct qce_aead_ctx *ctx = crypto_aead_ctx(tfm); + unsigned long flags = to_aead_tmpl(tfm)->alg_flags; + + if (IS_CCM(flags)) { + if (authsize < 4 || authsize > 16 || authsize % 2) + return -EINVAL; + if (IS_CCM_RFC4309(flags) && (authsize < 8 || authsize % 4)) + return -EINVAL; + } + ctx->authsize = authsize; + return 0; +} + +static int qce_aead_init(struct crypto_aead *tfm) +{ + crypto_aead_set_reqsize(tfm, sizeof(struct qce_aead_reqctx)); + return 0; +} + +struct qce_aead_def { + unsigned long flags; + const char *name; + const char *drv_name; + unsigned int blocksize; + unsigned int chunksize; + unsigned int ivsize; + unsigned int maxauthsize; +}; + +static const struct qce_aead_def aead_def[] = { + { + .flags = QCE_ALG_DES | QCE_MODE_CBC | QCE_HASH_SHA1_HMAC, + .name = "authenc(hmac(sha1),cbc(des))", + .drv_name = "authenc-hmac-sha1-cbc-des-qce", + .blocksize = DES_BLOCK_SIZE, + .ivsize = DES_BLOCK_SIZE, + .maxauthsize = SHA1_DIGEST_SIZE, + }, + { + .flags = QCE_ALG_3DES | QCE_MODE_CBC | QCE_HASH_SHA1_HMAC, + .name = "authenc(hmac(sha1),cbc(des3_ede))", + .drv_name = "authenc-hmac-sha1-cbc-3des-qce", + .blocksize = DES3_EDE_BLOCK_SIZE, + .ivsize = DES3_EDE_BLOCK_SIZE, + .maxauthsize = SHA1_DIGEST_SIZE, + }, + { + .flags = QCE_ALG_DES | QCE_MODE_CBC | QCE_HASH_SHA256_HMAC, + .name = "authenc(hmac(sha256),cbc(des))", + .drv_name = "authenc-hmac-sha256-cbc-des-qce", + .blocksize = DES_BLOCK_SIZE, + .ivsize = DES_BLOCK_SIZE, + .maxauthsize = SHA256_DIGEST_SIZE, + }, + { + .flags = QCE_ALG_3DES | QCE_MODE_CBC | QCE_HASH_SHA256_HMAC, + .name = "authenc(hmac(sha256),cbc(des3_ede))", + .drv_name = "authenc-hmac-sha256-cbc-3des-qce", + .blocksize = DES3_EDE_BLOCK_SIZE, + .ivsize = DES3_EDE_BLOCK_SIZE, + .maxauthsize = SHA256_DIGEST_SIZE, + }, + { + .flags = QCE_ALG_AES | QCE_MODE_CBC | QCE_HASH_SHA256_HMAC, + .name = "authenc(hmac(sha256),cbc(aes))", + .drv_name = "authenc-hmac-sha256-cbc-aes-qce", + .blocksize = AES_BLOCK_SIZE, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA256_DIGEST_SIZE, + }, + { + .flags = QCE_ALG_AES | QCE_MODE_CCM, + .name = "ccm(aes)", + .drv_name = "ccm-aes-qce", + .blocksize = 1, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = AES_BLOCK_SIZE, + }, + { + .flags = QCE_ALG_AES | QCE_MODE_CCM | QCE_MODE_CCM_RFC4309, + .name = "rfc4309(ccm(aes))", + .drv_name = "rfc4309-ccm-aes-qce", + .blocksize = 1, + .ivsize = 8, + .maxauthsize = AES_BLOCK_SIZE, + }, +}; + +static int qce_aead_register_one(const struct qce_aead_def *def, struct qce_device *qce) +{ + struct qce_alg_template *tmpl; + struct aead_alg *alg; + int ret; + + tmpl = kzalloc(sizeof(*tmpl), GFP_KERNEL); + if (!tmpl) + return -ENOMEM; + + alg = &tmpl->alg.aead; + + snprintf(alg->base.cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name); + snprintf(alg->base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", + def->drv_name); + + alg->base.cra_blocksize = def->blocksize; + alg->chunksize = def->chunksize; + alg->ivsize = def->ivsize; + alg->maxauthsize = def->maxauthsize; + if (IS_CCM(def->flags)) + alg->setkey = qce_aead_ccm_setkey; + else + alg->setkey = qce_aead_setkey; + alg->setauthsize = qce_aead_setauthsize; + alg->encrypt = qce_aead_encrypt; + alg->decrypt = qce_aead_decrypt; + alg->init = qce_aead_init; + + alg->base.cra_priority = 300; + alg->base.cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_ALLOCATES_MEMORY | + CRYPTO_ALG_KERN_DRIVER_ONLY; + alg->base.cra_ctxsize = sizeof(struct qce_aead_ctx); + alg->base.cra_alignmask = 0; + alg->base.cra_module = THIS_MODULE; + + INIT_LIST_HEAD(&tmpl->entry); + tmpl->crypto_alg_type = CRYPTO_ALG_TYPE_AEAD; + tmpl->alg_flags = def->flags; + tmpl->qce = qce; + + ret = crypto_register_aead(alg); + if (ret) { + kfree(tmpl); + dev_err(qce->dev, "%s registration failed\n", alg->base.cra_name); + return ret; + } + + list_add_tail(&tmpl->entry, &aead_algs); + dev_dbg(qce->dev, "%s is registered\n", alg->base.cra_name); + return 0; +} + +static void qce_aead_unregister(struct qce_device *qce) +{ + struct qce_alg_template *tmpl, *n; + + list_for_each_entry_safe(tmpl, n, &aead_algs, entry) { + crypto_unregister_aead(&tmpl->alg.aead); + list_del(&tmpl->entry); + kfree(tmpl); + } +} + +static int qce_aead_register(struct qce_device *qce) +{ + int ret, i; + + for (i = 0; i < ARRAY_SIZE(aead_def); i++) { + ret = qce_aead_register_one(&aead_def[i], qce); + if (ret) + goto err; + } + + return 0; +err: + qce_aead_unregister(qce); + return ret; +} + +const struct qce_algo_ops aead_ops = { + .type = CRYPTO_ALG_TYPE_AEAD, + .register_algs = qce_aead_register, + .unregister_algs = qce_aead_unregister, + .async_req_handle = qce_aead_async_req_handle, +}; diff --git a/drivers/crypto/qce/aead.h b/drivers/crypto/qce/aead.h new file mode 100644 index 0000000000000..3d1f2039930b6 --- /dev/null +++ b/drivers/crypto/qce/aead.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021, Linaro Limited. All rights reserved. + */ + +#ifndef _AEAD_H_ +#define _AEAD_H_ + +#include "common.h" +#include "core.h" + +#define QCE_MAX_KEY_SIZE 64 +#define QCE_CCM4309_SALT_SIZE 3 + +struct qce_aead_ctx { + u8 enc_key[QCE_MAX_KEY_SIZE]; + u8 auth_key[QCE_MAX_KEY_SIZE]; + u8 ccm4309_salt[QCE_CCM4309_SALT_SIZE]; + unsigned int enc_keylen; + unsigned int auth_keylen; + unsigned int authsize; +}; + +struct qce_aead_reqctx { + unsigned long flags; + u8 *iv; + unsigned int ivsize; + int src_nents; + int dst_nents; + struct scatterlist result_sg; + struct scatterlist adata_sg; + struct sg_table dst_tbl; + struct sg_table src_tbl; + struct scatterlist *dst_sg; + struct scatterlist *src_sg; + unsigned int cryptlen; + unsigned int assoclen; + unsigned char *adata; + u8 ccm_nonce[QCE_MAX_NONCE]; + u8 ccmresult_buf[QCE_BAM_BURST_SIZE]; + u8 ccm_rfc4309_iv[QCE_MAX_IV_SIZE]; +}; + +static inline struct qce_alg_template *to_aead_tmpl(struct crypto_aead *tfm) +{ + struct aead_alg *alg = crypto_aead_alg(tfm); + + return container_of(alg, struct qce_alg_template, alg.aead); +} + +extern const struct qce_algo_ops aead_ops; + +#endif /* _AEAD_H_ */ diff --git a/drivers/crypto/qce/common.h b/drivers/crypto/qce/common.h index b135440bf72b5..02e63ad9f2455 100644 --- a/drivers/crypto/qce/common.h +++ b/drivers/crypto/qce/common.h @@ -11,6 +11,7 @@ #include #include #include +#include /* xts du size */ #define QCE_SECTOR_SIZE 512 @@ -88,6 +89,7 @@ struct qce_alg_template { union { struct skcipher_alg skcipher; struct ahash_alg ahash; + struct aead_alg aead; } alg; struct qce_device *qce; const u8 *hash_zero; diff --git a/drivers/crypto/qce/core.c b/drivers/crypto/qce/core.c index 80b75085c2659..d3780be44a763 100644 --- a/drivers/crypto/qce/core.c +++ b/drivers/crypto/qce/core.c @@ -17,6 +17,7 @@ #include "core.h" #include "cipher.h" #include "sha.h" +#include "aead.h" #define QCE_MAJOR_VERSION5 0x05 #define QCE_QUEUE_LENGTH 1 @@ -28,6 +29,9 @@ static const struct qce_algo_ops *qce_ops[] = { #ifdef CONFIG_CRYPTO_DEV_QCE_SHA &ahash_ops, #endif +#ifdef CONFIG_CRYPTO_DEV_QCE_AEAD + &aead_ops, +#endif }; static void qce_unregister_algs(struct qce_device *qce) From e5d6181d35b257c13841f774f5ad36b0cb2d82aa Mon Sep 17 00:00:00 2001 From: Thara Gopinath Date: Thu, 29 Apr 2021 11:07:05 -0400 Subject: [PATCH 012/142] crypto: qce - Clean up qce_auth_cfg Remove various redundant checks in qce_auth_cfg. Also allow qce_auth_cfg to take auth_size as a parameter which is a required setting for ccm(aes) algorithms Signed-off-by: Thara Gopinath Signed-off-by: Herbert Xu --- drivers/crypto/qce/common.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/crypto/qce/common.c b/drivers/crypto/qce/common.c index 7b5bc5a6ae81c..7b3d6caec1b21 100644 --- a/drivers/crypto/qce/common.c +++ b/drivers/crypto/qce/common.c @@ -97,11 +97,11 @@ static inline void qce_crypto_go(struct qce_device *qce, bool result_dump) } #ifdef CONFIG_CRYPTO_DEV_QCE_SHA -static u32 qce_auth_cfg(unsigned long flags, u32 key_size) +static u32 qce_auth_cfg(unsigned long flags, u32 key_size, u32 auth_size) { u32 cfg = 0; - if (IS_AES(flags) && (IS_CCM(flags) || IS_CMAC(flags))) + if (IS_CCM(flags) || IS_CMAC(flags)) cfg |= AUTH_ALG_AES << AUTH_ALG_SHIFT; else cfg |= AUTH_ALG_SHA << AUTH_ALG_SHIFT; @@ -119,15 +119,16 @@ static u32 qce_auth_cfg(unsigned long flags, u32 key_size) cfg |= AUTH_SIZE_SHA256 << AUTH_SIZE_SHIFT; else if (IS_CMAC(flags)) cfg |= AUTH_SIZE_ENUM_16_BYTES << AUTH_SIZE_SHIFT; + else if (IS_CCM(flags)) + cfg |= (auth_size - 1) << AUTH_SIZE_SHIFT; if (IS_SHA1(flags) || IS_SHA256(flags)) cfg |= AUTH_MODE_HASH << AUTH_MODE_SHIFT; - else if (IS_SHA1_HMAC(flags) || IS_SHA256_HMAC(flags) || - IS_CBC(flags) || IS_CTR(flags)) + else if (IS_SHA1_HMAC(flags) || IS_SHA256_HMAC(flags)) cfg |= AUTH_MODE_HMAC << AUTH_MODE_SHIFT; - else if (IS_AES(flags) && IS_CCM(flags)) + else if (IS_CCM(flags)) cfg |= AUTH_MODE_CCM << AUTH_MODE_SHIFT; - else if (IS_AES(flags) && IS_CMAC(flags)) + else if (IS_CMAC(flags)) cfg |= AUTH_MODE_CMAC << AUTH_MODE_SHIFT; if (IS_SHA(flags) || IS_SHA_HMAC(flags)) @@ -136,10 +137,6 @@ static u32 qce_auth_cfg(unsigned long flags, u32 key_size) if (IS_CCM(flags)) cfg |= QCE_MAX_NONCE_WORDS << AUTH_NONCE_NUM_WORDS_SHIFT; - if (IS_CBC(flags) || IS_CTR(flags) || IS_CCM(flags) || - IS_CMAC(flags)) - cfg |= BIT(AUTH_LAST_SHIFT) | BIT(AUTH_FIRST_SHIFT); - return cfg; } @@ -171,7 +168,7 @@ static int qce_setup_regs_ahash(struct crypto_async_request *async_req) qce_clear_array(qce, REG_AUTH_KEY0, 16); qce_clear_array(qce, REG_AUTH_BYTECNT0, 4); - auth_cfg = qce_auth_cfg(rctx->flags, rctx->authklen); + auth_cfg = qce_auth_cfg(rctx->flags, rctx->authklen, digestsize); } if (IS_SHA_HMAC(rctx->flags) || IS_CMAC(rctx->flags)) { @@ -199,7 +196,7 @@ static int qce_setup_regs_ahash(struct crypto_async_request *async_req) qce_write_array(qce, REG_AUTH_BYTECNT0, (u32 *)rctx->byte_count, 2); - auth_cfg = qce_auth_cfg(rctx->flags, 0); + auth_cfg = qce_auth_cfg(rctx->flags, 0, digestsize); if (rctx->last_blk) auth_cfg |= BIT(AUTH_LAST_SHIFT); From db0018a8b615e256c90a63d2d5698f2144dde222 Mon Sep 17 00:00:00 2001 From: Thara Gopinath Date: Thu, 29 Apr 2021 11:07:06 -0400 Subject: [PATCH 013/142] crypto: qce - Add support for AEAD algorithms Add register programming sequence for enabling AEAD algorithms on the Qualcomm crypto engine. Signed-off-by: Thara Gopinath Signed-off-by: Herbert Xu --- drivers/crypto/qce/common.c | 162 +++++++++++++++++++++++++++++++++++- 1 file changed, 160 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/qce/common.c b/drivers/crypto/qce/common.c index 7b3d6caec1b21..7c612ba5068f7 100644 --- a/drivers/crypto/qce/common.c +++ b/drivers/crypto/qce/common.c @@ -15,6 +15,7 @@ #include "core.h" #include "regs-v5.h" #include "sha.h" +#include "aead.h" static inline u32 qce_read(struct qce_device *qce, u32 offset) { @@ -96,7 +97,7 @@ static inline void qce_crypto_go(struct qce_device *qce, bool result_dump) qce_write(qce, REG_GOPROC, BIT(GO_SHIFT)); } -#ifdef CONFIG_CRYPTO_DEV_QCE_SHA +#if defined(CONFIG_CRYPTO_DEV_QCE_SHA) || defined(CONFIG_CRYPTO_DEV_QCE_AEAD) static u32 qce_auth_cfg(unsigned long flags, u32 key_size, u32 auth_size) { u32 cfg = 0; @@ -139,7 +140,9 @@ static u32 qce_auth_cfg(unsigned long flags, u32 key_size, u32 auth_size) return cfg; } +#endif +#ifdef CONFIG_CRYPTO_DEV_QCE_SHA static int qce_setup_regs_ahash(struct crypto_async_request *async_req) { struct ahash_request *req = ahash_request_cast(async_req); @@ -225,7 +228,7 @@ static int qce_setup_regs_ahash(struct crypto_async_request *async_req) } #endif -#ifdef CONFIG_CRYPTO_DEV_QCE_SKCIPHER +#if defined(CONFIG_CRYPTO_DEV_QCE_SKCIPHER) || defined(CONFIG_CRYPTO_DEV_QCE_AEAD) static u32 qce_encr_cfg(unsigned long flags, u32 aes_key_size) { u32 cfg = 0; @@ -271,7 +274,9 @@ static u32 qce_encr_cfg(unsigned long flags, u32 aes_key_size) return cfg; } +#endif +#ifdef CONFIG_CRYPTO_DEV_QCE_SKCIPHER static void qce_xts_swapiv(__be32 *dst, const u8 *src, unsigned int ivsize) { u8 swap[QCE_AES_IV_LENGTH]; @@ -386,6 +391,155 @@ static int qce_setup_regs_skcipher(struct crypto_async_request *async_req) } #endif +#ifdef CONFIG_CRYPTO_DEV_QCE_AEAD +static const u32 std_iv_sha1[SHA256_DIGEST_SIZE / sizeof(u32)] = { + SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4, 0, 0, 0 +}; + +static const u32 std_iv_sha256[SHA256_DIGEST_SIZE / sizeof(u32)] = { + SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, + SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7 +}; + +static unsigned int qce_be32_to_cpu_array(u32 *dst, const u8 *src, unsigned int len) +{ + u32 *d = dst; + const u8 *s = src; + unsigned int n; + + n = len / sizeof(u32); + for (; n > 0; n--) { + *d = be32_to_cpup((const __be32 *)s); + s += sizeof(u32); + d++; + } + return DIV_ROUND_UP(len, sizeof(u32)); +} + +static int qce_setup_regs_aead(struct crypto_async_request *async_req) +{ + struct aead_request *req = aead_request_cast(async_req); + struct qce_aead_reqctx *rctx = aead_request_ctx(req); + struct qce_aead_ctx *ctx = crypto_tfm_ctx(async_req->tfm); + struct qce_alg_template *tmpl = to_aead_tmpl(crypto_aead_reqtfm(req)); + struct qce_device *qce = tmpl->qce; + u32 enckey[QCE_MAX_CIPHER_KEY_SIZE / sizeof(u32)] = {0}; + u32 enciv[QCE_MAX_IV_SIZE / sizeof(u32)] = {0}; + u32 authkey[QCE_SHA_HMAC_KEY_SIZE / sizeof(u32)] = {0}; + u32 authiv[SHA256_DIGEST_SIZE / sizeof(u32)] = {0}; + u32 authnonce[QCE_MAX_NONCE / sizeof(u32)] = {0}; + unsigned int enc_keylen = ctx->enc_keylen; + unsigned int auth_keylen = ctx->auth_keylen; + unsigned int enc_ivsize = rctx->ivsize; + unsigned int auth_ivsize = 0; + unsigned int enckey_words, enciv_words; + unsigned int authkey_words, authiv_words, authnonce_words; + unsigned long flags = rctx->flags; + u32 encr_cfg, auth_cfg, config, totallen; + u32 iv_last_word; + + qce_setup_config(qce); + + /* Write encryption key */ + enckey_words = qce_be32_to_cpu_array(enckey, ctx->enc_key, enc_keylen); + qce_write_array(qce, REG_ENCR_KEY0, enckey, enckey_words); + + /* Write encryption iv */ + enciv_words = qce_be32_to_cpu_array(enciv, rctx->iv, enc_ivsize); + qce_write_array(qce, REG_CNTR0_IV0, enciv, enciv_words); + + if (IS_CCM(rctx->flags)) { + iv_last_word = enciv[enciv_words - 1]; + qce_write(qce, REG_CNTR3_IV3, iv_last_word + 1); + qce_write_array(qce, REG_ENCR_CCM_INT_CNTR0, (u32 *)enciv, enciv_words); + qce_write(qce, REG_CNTR_MASK, ~0); + qce_write(qce, REG_CNTR_MASK0, ~0); + qce_write(qce, REG_CNTR_MASK1, ~0); + qce_write(qce, REG_CNTR_MASK2, ~0); + } + + /* Clear authentication IV and KEY registers of previous values */ + qce_clear_array(qce, REG_AUTH_IV0, 16); + qce_clear_array(qce, REG_AUTH_KEY0, 16); + + /* Clear byte count */ + qce_clear_array(qce, REG_AUTH_BYTECNT0, 4); + + /* Write authentication key */ + authkey_words = qce_be32_to_cpu_array(authkey, ctx->auth_key, auth_keylen); + qce_write_array(qce, REG_AUTH_KEY0, (u32 *)authkey, authkey_words); + + /* Write initial authentication IV only for HMAC algorithms */ + if (IS_SHA_HMAC(rctx->flags)) { + /* Write default authentication iv */ + if (IS_SHA1_HMAC(rctx->flags)) { + auth_ivsize = SHA1_DIGEST_SIZE; + memcpy(authiv, std_iv_sha1, auth_ivsize); + } else if (IS_SHA256_HMAC(rctx->flags)) { + auth_ivsize = SHA256_DIGEST_SIZE; + memcpy(authiv, std_iv_sha256, auth_ivsize); + } + authiv_words = auth_ivsize / sizeof(u32); + qce_write_array(qce, REG_AUTH_IV0, (u32 *)authiv, authiv_words); + } else if (IS_CCM(rctx->flags)) { + /* Write nonce for CCM algorithms */ + authnonce_words = qce_be32_to_cpu_array(authnonce, rctx->ccm_nonce, QCE_MAX_NONCE); + qce_write_array(qce, REG_AUTH_INFO_NONCE0, authnonce, authnonce_words); + } + + /* Set up ENCR_SEG_CFG */ + encr_cfg = qce_encr_cfg(flags, enc_keylen); + if (IS_ENCRYPT(flags)) + encr_cfg |= BIT(ENCODE_SHIFT); + qce_write(qce, REG_ENCR_SEG_CFG, encr_cfg); + + /* Set up AUTH_SEG_CFG */ + auth_cfg = qce_auth_cfg(rctx->flags, auth_keylen, ctx->authsize); + auth_cfg |= BIT(AUTH_LAST_SHIFT); + auth_cfg |= BIT(AUTH_FIRST_SHIFT); + if (IS_ENCRYPT(flags)) { + if (IS_CCM(rctx->flags)) + auth_cfg |= AUTH_POS_BEFORE << AUTH_POS_SHIFT; + else + auth_cfg |= AUTH_POS_AFTER << AUTH_POS_SHIFT; + } else { + if (IS_CCM(rctx->flags)) + auth_cfg |= AUTH_POS_AFTER << AUTH_POS_SHIFT; + else + auth_cfg |= AUTH_POS_BEFORE << AUTH_POS_SHIFT; + } + qce_write(qce, REG_AUTH_SEG_CFG, auth_cfg); + + totallen = rctx->cryptlen + rctx->assoclen; + + /* Set the encryption size and start offset */ + if (IS_CCM(rctx->flags) && IS_DECRYPT(rctx->flags)) + qce_write(qce, REG_ENCR_SEG_SIZE, rctx->cryptlen + ctx->authsize); + else + qce_write(qce, REG_ENCR_SEG_SIZE, rctx->cryptlen); + qce_write(qce, REG_ENCR_SEG_START, rctx->assoclen & 0xffff); + + /* Set the authentication size and start offset */ + qce_write(qce, REG_AUTH_SEG_SIZE, totallen); + qce_write(qce, REG_AUTH_SEG_START, 0); + + /* Write total length */ + if (IS_CCM(rctx->flags) && IS_DECRYPT(rctx->flags)) + qce_write(qce, REG_SEG_SIZE, totallen + ctx->authsize); + else + qce_write(qce, REG_SEG_SIZE, totallen); + + /* get little endianness */ + config = qce_config_reg(qce, 1); + qce_write(qce, REG_CONFIG, config); + + /* Start the process */ + qce_crypto_go(qce, !IS_CCM(flags)); + + return 0; +} +#endif + int qce_start(struct crypto_async_request *async_req, u32 type) { switch (type) { @@ -396,6 +550,10 @@ int qce_start(struct crypto_async_request *async_req, u32 type) #ifdef CONFIG_CRYPTO_DEV_QCE_SHA case CRYPTO_ALG_TYPE_AHASH: return qce_setup_regs_ahash(async_req); +#endif +#ifdef CONFIG_CRYPTO_DEV_QCE_AEAD + case CRYPTO_ALG_TYPE_AEAD: + return qce_setup_regs_aead(async_req); #endif default: return -EINVAL; From b51dcf05c1e96caccda769f3a60042d77f1a3a7d Mon Sep 17 00:00:00 2001 From: Thara Gopinath Date: Thu, 29 Apr 2021 11:07:07 -0400 Subject: [PATCH 014/142] crypto: qce - Schedule fallback aead algorithm Qualcomm crypto engine does not handle the following scenarios and will issue an abort. In such cases, pass on the transformation to a fallback algorithm. - DES3 algorithms with all three keys same. - AES192 algorithms. - 0 length messages. Signed-off-by: Thara Gopinath Signed-off-by: Herbert Xu --- drivers/crypto/qce/aead.c | 64 ++++++++++++++++++++++++++++++++------- drivers/crypto/qce/aead.h | 3 ++ 2 files changed, 56 insertions(+), 11 deletions(-) diff --git a/drivers/crypto/qce/aead.c b/drivers/crypto/qce/aead.c index ef66ae21eae36..6d06a19b48e49 100644 --- a/drivers/crypto/qce/aead.c +++ b/drivers/crypto/qce/aead.c @@ -512,7 +512,23 @@ static int qce_aead_crypt(struct aead_request *req, int encrypt) /* CE does not handle 0 length messages */ if (!rctx->cryptlen) { if (!(IS_CCM(rctx->flags) && IS_DECRYPT(rctx->flags))) - return -EINVAL; + ctx->need_fallback = true; + } + + /* If fallback is needed, schedule and exit */ + if (ctx->need_fallback) { + /* Reset need_fallback in case the same ctx is used for another transaction */ + ctx->need_fallback = false; + + aead_request_set_tfm(&rctx->fallback_req, ctx->fallback); + aead_request_set_callback(&rctx->fallback_req, req->base.flags, + req->base.complete, req->base.data); + aead_request_set_crypt(&rctx->fallback_req, req->src, + req->dst, req->cryptlen, req->iv); + aead_request_set_ad(&rctx->fallback_req, req->assoclen); + + return encrypt ? crypto_aead_encrypt(&rctx->fallback_req) : + crypto_aead_decrypt(&rctx->fallback_req); } /* @@ -553,7 +569,7 @@ static int qce_aead_ccm_setkey(struct crypto_aead *tfm, const u8 *key, memcpy(ctx->ccm4309_salt, key + keylen, QCE_CCM4309_SALT_SIZE); } - if (keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_256) + if (keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_256 && keylen != AES_KEYSIZE_192) return -EINVAL; ctx->enc_keylen = keylen; @@ -562,7 +578,12 @@ static int qce_aead_ccm_setkey(struct crypto_aead *tfm, const u8 *key, memcpy(ctx->enc_key, key, keylen); memcpy(ctx->auth_key, key, keylen); - return 0; + if (keylen == AES_KEYSIZE_192) + ctx->need_fallback = true; + + return IS_CCM_RFC4309(flags) ? + crypto_aead_setkey(ctx->fallback, key, keylen + QCE_CCM4309_SALT_SIZE) : + crypto_aead_setkey(ctx->fallback, key, keylen); } static int qce_aead_setkey(struct crypto_aead *tfm, const u8 *key, unsigned int keylen) @@ -593,20 +614,21 @@ static int qce_aead_setkey(struct crypto_aead *tfm, const u8 *key, unsigned int * The crypto engine does not support any two keys * being the same for triple des algorithms. The * verify_skcipher_des3_key does not check for all the - * below conditions. Return -EINVAL in case any two keys - * are the same. Revisit to see if a fallback cipher - * is needed to handle this condition. + * below conditions. Schedule fallback in this case. */ memcpy(_key, authenc_keys.enckey, DES3_EDE_KEY_SIZE); if (!((_key[0] ^ _key[2]) | (_key[1] ^ _key[3])) || !((_key[2] ^ _key[4]) | (_key[3] ^ _key[5])) || !((_key[0] ^ _key[4]) | (_key[1] ^ _key[5]))) - return -EINVAL; + ctx->need_fallback = true; } else if (IS_AES(flags)) { /* No random key sizes */ if (authenc_keys.enckeylen != AES_KEYSIZE_128 && + authenc_keys.enckeylen != AES_KEYSIZE_192 && authenc_keys.enckeylen != AES_KEYSIZE_256) return -EINVAL; + if (authenc_keys.enckeylen == AES_KEYSIZE_192) + ctx->need_fallback = true; } ctx->enc_keylen = authenc_keys.enckeylen; @@ -617,7 +639,7 @@ static int qce_aead_setkey(struct crypto_aead *tfm, const u8 *key, unsigned int memset(ctx->auth_key, 0, sizeof(ctx->auth_key)); memcpy(ctx->auth_key, authenc_keys.authkey, authenc_keys.authkeylen); - return 0; + return crypto_aead_setkey(ctx->fallback, key, keylen); } static int qce_aead_setauthsize(struct crypto_aead *tfm, unsigned int authsize) @@ -632,15 +654,33 @@ static int qce_aead_setauthsize(struct crypto_aead *tfm, unsigned int authsize) return -EINVAL; } ctx->authsize = authsize; - return 0; + + return crypto_aead_setauthsize(ctx->fallback, authsize); } static int qce_aead_init(struct crypto_aead *tfm) { - crypto_aead_set_reqsize(tfm, sizeof(struct qce_aead_reqctx)); + struct qce_aead_ctx *ctx = crypto_aead_ctx(tfm); + + ctx->need_fallback = false; + ctx->fallback = crypto_alloc_aead(crypto_tfm_alg_name(&tfm->base), + 0, CRYPTO_ALG_NEED_FALLBACK); + + if (IS_ERR(ctx->fallback)) + return PTR_ERR(ctx->fallback); + + crypto_aead_set_reqsize(tfm, sizeof(struct qce_aead_reqctx) + + crypto_aead_reqsize(ctx->fallback)); return 0; } +static void qce_aead_exit(struct crypto_aead *tfm) +{ + struct qce_aead_ctx *ctx = crypto_aead_ctx(tfm); + + crypto_free_aead(ctx->fallback); +} + struct qce_aead_def { unsigned long flags; const char *name; @@ -738,11 +778,13 @@ static int qce_aead_register_one(const struct qce_aead_def *def, struct qce_devi alg->encrypt = qce_aead_encrypt; alg->decrypt = qce_aead_decrypt; alg->init = qce_aead_init; + alg->exit = qce_aead_exit; alg->base.cra_priority = 300; alg->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY | - CRYPTO_ALG_KERN_DRIVER_ONLY; + CRYPTO_ALG_KERN_DRIVER_ONLY | + CRYPTO_ALG_NEED_FALLBACK; alg->base.cra_ctxsize = sizeof(struct qce_aead_ctx); alg->base.cra_alignmask = 0; alg->base.cra_module = THIS_MODULE; diff --git a/drivers/crypto/qce/aead.h b/drivers/crypto/qce/aead.h index 3d1f2039930b6..efb8477cc0887 100644 --- a/drivers/crypto/qce/aead.h +++ b/drivers/crypto/qce/aead.h @@ -19,6 +19,8 @@ struct qce_aead_ctx { unsigned int enc_keylen; unsigned int auth_keylen; unsigned int authsize; + bool need_fallback; + struct crypto_aead *fallback; }; struct qce_aead_reqctx { @@ -39,6 +41,7 @@ struct qce_aead_reqctx { u8 ccm_nonce[QCE_MAX_NONCE]; u8 ccmresult_buf[QCE_BAM_BURST_SIZE]; u8 ccm_rfc4309_iv[QCE_MAX_IV_SIZE]; + struct aead_request fallback_req; }; static inline struct qce_alg_template *to_aead_tmpl(struct crypto_aead *tfm) From 0cdbabf8bb7a6147f5adf37dbc251e92a1bbc2c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stelmach?= Date: Wed, 5 May 2021 20:29:14 +0200 Subject: [PATCH 015/142] hwrng: exynos - Fix runtime PM imbalance on error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pm_runtime_resume_and_get() wraps around pm_runtime_get_sync() and decrements the runtime PM usage counter in case the latter function fails and keeps the counter balanced. Signed-off-by: Łukasz Stelmach Reviewed-by: Krzysztof Kozlowski Signed-off-by: Herbert Xu --- drivers/char/hw_random/exynos-trng.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/char/hw_random/exynos-trng.c b/drivers/char/hw_random/exynos-trng.c index 8e1fe3f8dd2df..c8db62bc5ff72 100644 --- a/drivers/char/hw_random/exynos-trng.c +++ b/drivers/char/hw_random/exynos-trng.c @@ -132,7 +132,7 @@ static int exynos_trng_probe(struct platform_device *pdev) return PTR_ERR(trng->mem); pm_runtime_enable(&pdev->dev); - ret = pm_runtime_get_sync(&pdev->dev); + ret = pm_runtime_resume_and_get(&pdev->dev); if (ret < 0) { dev_err(&pdev->dev, "Could not get runtime PM.\n"); goto err_pm_get; @@ -165,7 +165,7 @@ static int exynos_trng_probe(struct platform_device *pdev) clk_disable_unprepare(trng->clk); err_clock: - pm_runtime_put_sync(&pdev->dev); + pm_runtime_put_noidle(&pdev->dev); err_pm_get: pm_runtime_disable(&pdev->dev); From 9395c58fdddd79cdd3882132cdd04e8ac7ad525f Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Wed, 5 May 2021 20:26:08 +0000 Subject: [PATCH 016/142] crypto: ixp4xx - dma_unmap the correct address Testing ixp4xx_crypto with CONFIG_DMA_API_DEBUG lead to the following error: DMA-API: platform ixp4xx_crypto.0: device driver tries to free DMA memory it has not allocated [device address=0x0000000000000000] [size=24 bytes] This is due to dma_unmap using the wrong address. Fixes: 0d44dc59b2b4 ("crypto: ixp4xx - Fix handling of chained sg buffers") Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/ixp4xx_crypto.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c index 0616e369522e9..ed3deaa5ed2b8 100644 --- a/drivers/crypto/ixp4xx_crypto.c +++ b/drivers/crypto/ixp4xx_crypto.c @@ -330,7 +330,7 @@ static void free_buf_chain(struct device *dev, struct buffer_desc *buf, buf1 = buf->next; phys1 = buf->phys_next; - dma_unmap_single(dev, buf->phys_next, buf->buf_len, buf->dir); + dma_unmap_single(dev, buf->phys_addr, buf->buf_len, buf->dir); dma_pool_free(buffer_pool, buf, phys); buf = buf1; phys = phys1; From e8acf011f2e7e21a7e2fae47cbaa06598e533d40 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Wed, 5 May 2021 20:26:09 +0000 Subject: [PATCH 017/142] crypto: ixp4xx - update IV after requests Crypto selftests fail on ixp4xx since it do not update IV after skcipher requests. Fixes: 81bef0150074 ("crypto: ixp4xx - Hardware crypto support for IXP4xx CPUs") Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/ixp4xx_crypto.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c index ed3deaa5ed2b8..f577ee4afd06f 100644 --- a/drivers/crypto/ixp4xx_crypto.c +++ b/drivers/crypto/ixp4xx_crypto.c @@ -149,6 +149,8 @@ struct crypt_ctl { struct ablk_ctx { struct buffer_desc *src; struct buffer_desc *dst; + u8 iv[MAX_IVLEN]; + bool encrypt; }; struct aead_ctx { @@ -381,6 +383,20 @@ static void one_packet(dma_addr_t phys) case CTL_FLAG_PERFORM_ABLK: { struct skcipher_request *req = crypt->data.ablk_req; struct ablk_ctx *req_ctx = skcipher_request_ctx(req); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + unsigned int ivsize = crypto_skcipher_ivsize(tfm); + unsigned int offset; + + if (ivsize > 0) { + offset = req->cryptlen - ivsize; + if (req_ctx->encrypt) { + scatterwalk_map_and_copy(req->iv, req->dst, + offset, ivsize, 0); + } else { + memcpy(req->iv, req_ctx->iv, ivsize); + memzero_explicit(req_ctx->iv, ivsize); + } + } if (req_ctx->dst) { free_buf_chain(dev, req_ctx->dst, crypt->dst_buf); @@ -876,6 +892,7 @@ static int ablk_perform(struct skcipher_request *req, int encrypt) struct ablk_ctx *req_ctx = skcipher_request_ctx(req); struct buffer_desc src_hook; struct device *dev = &pdev->dev; + unsigned int offset; gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC; @@ -885,6 +902,7 @@ static int ablk_perform(struct skcipher_request *req, int encrypt) return -EAGAIN; dir = encrypt ? &ctx->encrypt : &ctx->decrypt; + req_ctx->encrypt = encrypt; crypt = get_crypt_desc(); if (!crypt) @@ -900,6 +918,10 @@ static int ablk_perform(struct skcipher_request *req, int encrypt) BUG_ON(ivsize && !req->iv); memcpy(crypt->iv, req->iv, ivsize); + if (ivsize > 0 && !encrypt) { + offset = req->cryptlen - ivsize; + scatterwalk_map_and_copy(req_ctx->iv, req->src, offset, ivsize, 0); + } if (req->src != req->dst) { struct buffer_desc dst_hook; crypt->mode |= NPE_OP_NOT_IN_PLACE; From dfb098d692eac2a11a7051dfe87be98cd90da67d Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Wed, 5 May 2021 20:26:10 +0000 Subject: [PATCH 018/142] crypto: ixp4xx - fallback when having more than one SG Testing ixp4xx_crypto lead to: alg: skcipher: ecb(des)-ixp4xx encryption overran dst buffer on test vector 0, cfg="two even aligned splits" The HW overwrites destination always when sg_nents() > 1. The problem seems that the HW always write areq->cryptlen bytes on the last SG. A comment in driver's code seems to give a clue that multiple SG was not planned "This was never tested by Intel for more than one dst buffer, I think". So let's add a fallback for this situation. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/Kconfig | 5 +++ drivers/crypto/ixp4xx_crypto.c | 56 ++++++++++++++++++++++++++++++++-- 2 files changed, 58 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 1fe5b7eafc02c..1d5b342e6b424 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -325,6 +325,11 @@ config CRYPTO_DEV_TALITOS2 config CRYPTO_DEV_IXP4XX tristate "Driver for IXP4xx crypto hardware acceleration" depends on ARCH_IXP4XX && IXP4XX_QMGR && IXP4XX_NPE + select CRYPTO_AES + select CRYPTO_DES + select CRYPTO_ECB + select CRYPTO_CBC + select CRYPTO_CTR select CRYPTO_LIB_DES select CRYPTO_AEAD select CRYPTO_AUTHENC diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c index f577ee4afd06f..8bbf2ead6e791 100644 --- a/drivers/crypto/ixp4xx_crypto.c +++ b/drivers/crypto/ixp4xx_crypto.c @@ -151,6 +151,7 @@ struct ablk_ctx { struct buffer_desc *dst; u8 iv[MAX_IVLEN]; bool encrypt; + struct skcipher_request fallback_req; // keep at the end }; struct aead_ctx { @@ -186,6 +187,7 @@ struct ixp_ctx { unsigned salted; atomic_t configuring; struct completion completion; + struct crypto_skcipher *fallback_tfm; }; struct ixp_alg { @@ -590,7 +592,23 @@ static int init_tfm(struct crypto_tfm *tfm) static int init_tfm_ablk(struct crypto_skcipher *tfm) { - crypto_skcipher_set_reqsize(tfm, sizeof(struct ablk_ctx)); + struct crypto_tfm *ctfm = crypto_skcipher_tfm(tfm); + struct ixp_ctx *ctx = crypto_tfm_ctx(ctfm); + const char *name = crypto_tfm_alg_name(ctfm); + + ctx->fallback_tfm = crypto_alloc_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(ctx->fallback_tfm)) { + pr_err("ERROR: Cannot allocate fallback for %s %ld\n", + name, PTR_ERR(ctx->fallback_tfm)); + return PTR_ERR(ctx->fallback_tfm); + } + + pr_info("Fallback for %s is %s\n", + crypto_tfm_alg_driver_name(&tfm->base), + crypto_tfm_alg_driver_name(crypto_skcipher_tfm(ctx->fallback_tfm)) + ); + + crypto_skcipher_set_reqsize(tfm, sizeof(struct ablk_ctx) + crypto_skcipher_reqsize(ctx->fallback_tfm)); return init_tfm(crypto_skcipher_tfm(tfm)); } @@ -609,6 +627,10 @@ static void exit_tfm(struct crypto_tfm *tfm) static void exit_tfm_ablk(struct crypto_skcipher *tfm) { + struct crypto_tfm *ctfm = crypto_skcipher_tfm(tfm); + struct ixp_ctx *ctx = crypto_tfm_ctx(ctfm); + + crypto_free_skcipher(ctx->fallback_tfm); exit_tfm(crypto_skcipher_tfm(tfm)); } @@ -854,7 +876,12 @@ static int ablk_setkey(struct crypto_skcipher *tfm, const u8 *key, out: if (!atomic_dec_and_test(&ctx->configuring)) wait_for_completion(&ctx->completion); - return ret; + if (ret) + return ret; + crypto_skcipher_clear_flags(ctx->fallback_tfm, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(ctx->fallback_tfm, tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK); + + return crypto_skcipher_setkey(ctx->fallback_tfm, key, key_len); } static int ablk_des3_setkey(struct crypto_skcipher *tfm, const u8 *key, @@ -880,6 +907,25 @@ static int ablk_rfc3686_setkey(struct crypto_skcipher *tfm, const u8 *key, return ablk_setkey(tfm, key, key_len); } +static int ixp4xx_cipher_fallback(struct skcipher_request *areq, int encrypt) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq); + struct ixp_ctx *op = crypto_skcipher_ctx(tfm); + struct ablk_ctx *rctx = skcipher_request_ctx(areq); + int err; + + skcipher_request_set_tfm(&rctx->fallback_req, op->fallback_tfm); + skcipher_request_set_callback(&rctx->fallback_req, areq->base.flags, + areq->base.complete, areq->base.data); + skcipher_request_set_crypt(&rctx->fallback_req, areq->src, areq->dst, + areq->cryptlen, areq->iv); + if (encrypt) + err = crypto_skcipher_encrypt(&rctx->fallback_req); + else + err = crypto_skcipher_decrypt(&rctx->fallback_req); + return err; +} + static int ablk_perform(struct skcipher_request *req, int encrypt) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); @@ -896,6 +942,9 @@ static int ablk_perform(struct skcipher_request *req, int encrypt) gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC; + if (sg_nents(req->src) > 1 || sg_nents(req->dst) > 1) + return ixp4xx_cipher_fallback(req, encrypt); + if (qmgr_stat_full(SEND_QID)) return -EAGAIN; if (atomic_read(&ctx->configuring)) @@ -1422,7 +1471,8 @@ static int __init ixp_module_init(void) /* block ciphers */ cra->base.cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC | - CRYPTO_ALG_ALLOCATES_MEMORY; + CRYPTO_ALG_ALLOCATES_MEMORY | + CRYPTO_ALG_NEED_FALLBACK; if (!cra->setkey) cra->setkey = ablk_setkey; if (!cra->encrypt) From 3557084ef47ba79f84325c575cb9a4887c484d36 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Wed, 5 May 2021 20:26:11 +0000 Subject: [PATCH 019/142] crypto: ixp4xx - convert unsigned to unsigned int Fixes all issues reported by checkpatch about "unsigned", lets convert them to unsigned int. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/ixp4xx_crypto.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c index 8bbf2ead6e791..17de9e60adadb 100644 --- a/drivers/crypto/ixp4xx_crypto.c +++ b/drivers/crypto/ixp4xx_crypto.c @@ -136,7 +136,7 @@ struct crypt_ctl { u32 crypto_ctx; /* NPE Crypto Param structure address */ /* Used by Host: 4*4 bytes*/ - unsigned ctl_flags; + unsigned int ctl_flags; union { struct skcipher_request *ablk_req; struct aead_request *aead_req; @@ -184,7 +184,7 @@ struct ixp_ctx { u8 enckey[MAX_KEYLEN]; u8 salt[MAX_IVLEN]; u8 nonce[CTR_RFC3686_NONCE_SIZE]; - unsigned salted; + unsigned int salted; atomic_t configuring; struct completion completion; struct crypto_skcipher *fallback_tfm; @@ -695,8 +695,8 @@ static int register_chain_var(struct crypto_tfm *tfm, u8 xpad, u32 target, return 0; } -static int setup_auth(struct crypto_tfm *tfm, int encrypt, unsigned authsize, - const u8 *key, int key_len, unsigned digest_len) +static int setup_auth(struct crypto_tfm *tfm, int encrypt, unsigned int authsize, + const u8 *key, int key_len, unsigned int digest_len) { u32 itarget, otarget, npe_ctx_addr; unsigned char *cinfo; @@ -823,12 +823,12 @@ static int setup_cipher(struct crypto_tfm *tfm, int encrypt, } static struct buffer_desc *chainup_buffers(struct device *dev, - struct scatterlist *sg, unsigned nbytes, + struct scatterlist *sg, unsigned int nbytes, struct buffer_desc *buf, gfp_t flags, enum dma_data_direction dir) { for (; nbytes > 0; sg = sg_next(sg)) { - unsigned len = min(nbytes, sg->length); + unsigned int len = min(nbytes, sg->length); struct buffer_desc *next_buf; dma_addr_t next_buf_phys; void *ptr; @@ -930,7 +930,7 @@ static int ablk_perform(struct skcipher_request *req, int encrypt) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct ixp_ctx *ctx = crypto_skcipher_ctx(tfm); - unsigned ivsize = crypto_skcipher_ivsize(tfm); + unsigned int ivsize = crypto_skcipher_ivsize(tfm); struct ix_sa_dir *dir; struct crypt_ctl *crypt; unsigned int nbytes = req->cryptlen; @@ -1045,8 +1045,8 @@ static int aead_perform(struct aead_request *req, int encrypt, { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct ixp_ctx *ctx = crypto_aead_ctx(tfm); - unsigned ivsize = crypto_aead_ivsize(tfm); - unsigned authsize = crypto_aead_authsize(tfm); + unsigned int ivsize = crypto_aead_ivsize(tfm); + unsigned int authsize = crypto_aead_authsize(tfm); struct ix_sa_dir *dir; struct crypt_ctl *crypt; unsigned int cryptlen; @@ -1157,7 +1157,7 @@ static int aead_perform(struct aead_request *req, int encrypt, static int aead_setup(struct crypto_aead *tfm, unsigned int authsize) { struct ixp_ctx *ctx = crypto_aead_ctx(tfm); - unsigned digest_len = crypto_aead_maxauthsize(tfm); + unsigned int digest_len = crypto_aead_maxauthsize(tfm); int ret; if (!ctx->enckey_len && !ctx->authkey_len) From f5b82be62ddd7d9be7dbb624b47aec6240c62a38 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Wed, 5 May 2021 20:26:12 +0000 Subject: [PATCH 020/142] crypto: ixp4xx - convert all printk to dev_xxx Convert all old printk to dev_xxx. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/ixp4xx_crypto.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c index 17de9e60adadb..486a388c909f1 100644 --- a/drivers/crypto/ixp4xx_crypto.c +++ b/drivers/crypto/ixp4xx_crypto.c @@ -452,7 +452,7 @@ static int init_ixp_crypto(struct device *dev) if (! ( ~(*IXP4XX_EXP_CFG2) & (IXP4XX_FEATURE_HASH | IXP4XX_FEATURE_AES | IXP4XX_FEATURE_DES))) { - printk(KERN_ERR "ixp_crypto: No HW crypto available\n"); + dev_err(dev, "ixp_crypto: No HW crypto available\n"); return ret; } npe_c = npe_request(NPE_ID); @@ -475,8 +475,7 @@ static int init_ixp_crypto(struct device *dev) switch ((msg[1]>>16) & 0xff) { case 3: - printk(KERN_WARNING "Firmware of %s lacks AES support\n", - npe_name(npe_c)); + dev_warn(dev, "Firmware of %s lacks AES support\n", npe_name(npe_c)); support_aes = 0; break; case 4: @@ -484,8 +483,7 @@ static int init_ixp_crypto(struct device *dev) support_aes = 1; break; default: - printk(KERN_ERR "Firmware of %s lacks crypto support\n", - npe_name(npe_c)); + dev_err(dev, "Firmware of %s lacks crypto support\n", npe_name(npe_c)); ret = -ENODEV; goto npe_release; } @@ -521,7 +519,7 @@ static int init_ixp_crypto(struct device *dev) return 0; npe_error: - printk(KERN_ERR "%s not responding\n", npe_name(npe_c)); + dev_err(dev, "%s not responding\n", npe_name(npe_c)); ret = -EIO; err: dma_pool_destroy(ctx_pool); @@ -1487,7 +1485,7 @@ static int __init ixp_module_init(void) cra->base.cra_alignmask = 3; cra->base.cra_priority = 300; if (crypto_register_skcipher(cra)) - printk(KERN_ERR "Failed to register '%s'\n", + dev_err(&pdev->dev, "Failed to register '%s'\n", cra->base.cra_name); else ixp4xx_algos[i].registered = 1; @@ -1520,7 +1518,7 @@ static int __init ixp_module_init(void) cra->base.cra_priority = 300; if (crypto_register_aead(cra)) - printk(KERN_ERR "Failed to register '%s'\n", + dev_err(&pdev->dev, "Failed to register '%s'\n", cra->base.cra_driver_name); else ixp4xx_aeads[i].registered = 1; From 39e39cfb2dc7325714e8f93b77c4acacd5c1ac2e Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Wed, 5 May 2021 20:26:13 +0000 Subject: [PATCH 021/142] crypto: ixp4xx - whitespace fixes Fixes all whitespace issues reported by checkpatch Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/ixp4xx_crypto.c | 43 +++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c index 486a388c909f1..5b8ffa4db45d7 100644 --- a/drivers/crypto/ixp4xx_crypto.c +++ b/drivers/crypto/ixp4xx_crypto.c @@ -213,6 +213,7 @@ static const struct ix_hash_algo hash_alg_md5 = { .icv = "\x01\x23\x45\x67\x89\xAB\xCD\xEF" "\xFE\xDC\xBA\x98\x76\x54\x32\x10", }; + static const struct ix_hash_algo hash_alg_sha1 = { .cfgword = 0x00000005, .icv = "\x67\x45\x23\x01\xEF\xCD\xAB\x89\x98\xBA" @@ -244,12 +245,12 @@ static inline struct crypt_ctl *crypt_phys2virt(dma_addr_t phys) static inline u32 cipher_cfg_enc(struct crypto_tfm *tfm) { - return container_of(tfm->__crt_alg, struct ixp_alg,crypto.base)->cfg_enc; + return container_of(tfm->__crt_alg, struct ixp_alg, crypto.base)->cfg_enc; } static inline u32 cipher_cfg_dec(struct crypto_tfm *tfm) { - return container_of(tfm->__crt_alg, struct ixp_alg,crypto.base)->cfg_dec; + return container_of(tfm->__crt_alg, struct ixp_alg, crypto.base)->cfg_dec; } static inline const struct ix_hash_algo *ix_hash(struct crypto_tfm *tfm) @@ -260,6 +261,7 @@ static inline const struct ix_hash_algo *ix_hash(struct crypto_tfm *tfm) static int setup_crypt_desc(void) { struct device *dev = &pdev->dev; + BUILD_BUG_ON(sizeof(struct crypt_ctl) != 64); crypt_virt = dma_alloc_coherent(dev, NPE_QLEN * sizeof(struct crypt_ctl), @@ -290,7 +292,7 @@ static struct crypt_ctl *get_crypt_desc(void) idx = 0; crypt_virt[i].ctl_flags = CTL_FLAG_USED; spin_unlock_irqrestore(&desc_lock, flags); - return crypt_virt +i; + return crypt_virt + i; } else { spin_unlock_irqrestore(&desc_lock, flags); return NULL; @@ -318,7 +320,7 @@ static struct crypt_ctl *get_crypt_desc_emerg(void) idx = NPE_QLEN; crypt_virt[i].ctl_flags = CTL_FLAG_USED; spin_unlock_irqrestore(&emerg_lock, flags); - return crypt_virt +i; + return crypt_virt + i; } else { spin_unlock_irqrestore(&emerg_lock, flags); return NULL; @@ -417,7 +419,7 @@ static void one_packet(dma_addr_t phys) break; case CTL_FLAG_GEN_REVAES: ctx = crypto_tfm_ctx(crypt->data.tfm); - *(u32*)ctx->decrypt.npe_ctx &= cpu_to_be32(~CIPH_ENCR); + *(u32 *)ctx->decrypt.npe_ctx &= cpu_to_be32(~CIPH_ENCR); if (atomic_dec_and_test(&ctx->configuring)) complete(&ctx->completion); break; @@ -436,8 +438,9 @@ static void crypto_done_action(unsigned long arg) { int i; - for(i=0; i<4; i++) { + for (i = 0; i < 4; i++) { dma_addr_t phys = qmgr_get_entry(RECV_QID); + if (!phys) return; one_packet(phys); @@ -473,7 +476,7 @@ static int init_ixp_crypto(struct device *dev) goto npe_error; } - switch ((msg[1]>>16) & 0xff) { + switch ((msg[1] >> 16) & 0xff) { case 3: dev_warn(dev, "Firmware of %s lacks AES support\n", npe_name(npe_c)); support_aes = 0; @@ -619,6 +622,7 @@ static int init_tfm_aead(struct crypto_aead *tfm) static void exit_tfm(struct crypto_tfm *tfm) { struct ixp_ctx *ctx = crypto_tfm_ctx(tfm); + free_sa_dir(&ctx->encrypt); free_sa_dir(&ctx->decrypt); } @@ -709,11 +713,11 @@ static int setup_auth(struct crypto_tfm *tfm, int encrypt, unsigned int authsize algo = ix_hash(tfm); /* write cfg word to cryptinfo */ - cfgword = algo->cfgword | ( authsize << 6); /* (authsize/4) << 8 */ + cfgword = algo->cfgword | (authsize << 6); /* (authsize/4) << 8 */ #ifndef __ARMEB__ cfgword ^= 0xAA000000; /* change the "byte swap" flags */ #endif - *(u32*)cinfo = cpu_to_be32(cfgword); + *(u32 *)cinfo = cpu_to_be32(cfgword); cinfo += sizeof(cfgword); /* write ICV to cryptinfo */ @@ -750,7 +754,7 @@ static int gen_rev_aes_key(struct crypto_tfm *tfm) if (!crypt) { return -EAGAIN; } - *(u32*)dir->npe_ctx |= cpu_to_be32(CIPH_ENCR); + *(u32 *)dir->npe_ctx |= cpu_to_be32(CIPH_ENCR); crypt->data.tfm = tfm; crypt->crypt_offs = 0; @@ -802,21 +806,21 @@ static int setup_cipher(struct crypto_tfm *tfm, int encrypt, return err; } /* write cfg word to cryptinfo */ - *(u32*)cinfo = cpu_to_be32(cipher_cfg); + *(u32 *)cinfo = cpu_to_be32(cipher_cfg); cinfo += sizeof(cipher_cfg); /* write cipher key to cryptinfo */ memcpy(cinfo, key, key_len); /* NPE wants keylen set to DES3_EDE_KEY_SIZE even for single DES */ if (key_len < DES3_EDE_KEY_SIZE && !(cipher_cfg & MOD_AES)) { - memset(cinfo + key_len, 0, DES3_EDE_KEY_SIZE -key_len); + memset(cinfo + key_len, 0, DES3_EDE_KEY_SIZE - key_len); key_len = DES3_EDE_KEY_SIZE; } dir->npe_ctx_idx = sizeof(cipher_cfg) + key_len; dir->npe_mode |= NPE_OP_CRYPT_ENABLE; - if ((cipher_cfg & MOD_AES) && !encrypt) { + if ((cipher_cfg & MOD_AES) && !encrypt) return gen_rev_aes_key(tfm); - } + return 0; } @@ -971,6 +975,7 @@ static int ablk_perform(struct skcipher_request *req, int encrypt) } if (req->src != req->dst) { struct buffer_desc dst_hook; + crypt->mode |= NPE_OP_NOT_IN_PLACE; /* This was never tested by Intel * for more than one dst buffer, I think. */ @@ -1025,7 +1030,7 @@ static int ablk_rfc3686_crypt(struct skcipher_request *req) int ret; /* set up counter block */ - memcpy(iv, ctx->nonce, CTR_RFC3686_NONCE_SIZE); + memcpy(iv, ctx->nonce, CTR_RFC3686_NONCE_SIZE); memcpy(iv + CTR_RFC3686_NONCE_SIZE, info, CTR_RFC3686_IV_SIZE); /* initialize counter portion of counter block */ @@ -1067,7 +1072,7 @@ static int aead_perform(struct aead_request *req, int encrypt, } else { dir = &ctx->decrypt; /* req->cryptlen includes the authsize when decrypting */ - cryptlen = req->cryptlen -authsize; + cryptlen = req->cryptlen - authsize; eff_cryptlen -= authsize; } crypt = get_crypt_desc(); @@ -1188,7 +1193,7 @@ static int aead_setauthsize(struct crypto_aead *tfm, unsigned int authsize) { int max = crypto_aead_maxauthsize(tfm) >> 2; - if ((authsize>>2) < 1 || (authsize>>2) > max || (authsize & 3)) + if ((authsize >> 2) < 1 || (authsize >> 2) > max || (authsize & 3)) return -EINVAL; return aead_setup(tfm, authsize); } @@ -1453,7 +1458,7 @@ static int __init ixp_module_init(void) platform_device_unregister(pdev); return err; } - for (i=0; i< num; i++) { + for (i = 0; i < num; i++) { struct skcipher_alg *cra = &ixp4xx_algos[i].crypto; if (snprintf(cra->base.cra_driver_name, CRYPTO_MAX_ALG_NAME, @@ -1536,7 +1541,7 @@ static void __exit ixp_module_exit(void) crypto_unregister_aead(&ixp4xx_aeads[i].crypto); } - for (i=0; i< num; i++) { + for (i = 0; i < num; i++) { if (ixp4xx_algos[i].registered) crypto_unregister_skcipher(&ixp4xx_algos[i].crypto); } From 87d11a5e9621d2dd9edaee007b339e3afbfcf2ee Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Wed, 5 May 2021 20:26:14 +0000 Subject: [PATCH 022/142] crypto: ixp4xx - Do not initialize static to NULL This patch fixes all checkpatch report about static init. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/ixp4xx_crypto.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c index 5b8ffa4db45d7..954696a398750 100644 --- a/drivers/crypto/ixp4xx_crypto.c +++ b/drivers/crypto/ixp4xx_crypto.c @@ -221,10 +221,10 @@ static const struct ix_hash_algo hash_alg_sha1 = { }; static struct npe *npe_c; -static struct dma_pool *buffer_pool = NULL; -static struct dma_pool *ctx_pool = NULL; +static struct dma_pool *buffer_pool; +static struct dma_pool *ctx_pool; -static struct crypt_ctl *crypt_virt = NULL; +static struct crypt_ctl *crypt_virt; static dma_addr_t crypt_phys; static int support_aes = 1; @@ -275,7 +275,7 @@ static DEFINE_SPINLOCK(desc_lock); static struct crypt_ctl *get_crypt_desc(void) { int i; - static int idx = 0; + static int idx; unsigned long flags; spin_lock_irqsave(&desc_lock, flags); From ffb017e9ac66d3e4f368f556d13da79f80611997 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Wed, 5 May 2021 20:26:15 +0000 Subject: [PATCH 023/142] crypto: ixp4xx - remove brackets from single statement fixes all single statement issues reported by checkpatch Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/ixp4xx_crypto.c | 47 +++++++++++++++------------------- 1 file changed, 21 insertions(+), 26 deletions(-) diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c index 954696a398750..03ae9c3a8d97f 100644 --- a/drivers/crypto/ixp4xx_crypto.c +++ b/drivers/crypto/ixp4xx_crypto.c @@ -378,9 +378,9 @@ static void one_packet(dma_addr_t phys) free_buf_chain(dev, req_ctx->src, crypt->src_buf); free_buf_chain(dev, req_ctx->dst, crypt->dst_buf); - if (req_ctx->hmac_virt) { + if (req_ctx->hmac_virt) finish_scattered_hmac(crypt); - } + req->base.complete(&req->base, failed); break; } @@ -402,9 +402,9 @@ static void one_packet(dma_addr_t phys) } } - if (req_ctx->dst) { + if (req_ctx->dst) free_buf_chain(dev, req_ctx->dst, crypt->dst_buf); - } + free_buf_chain(dev, req_ctx->src, crypt->src_buf); req->base.complete(&req->base, failed); break; @@ -497,14 +497,14 @@ static int init_ixp_crypto(struct device *dev) buffer_pool = dma_pool_create("buffer", dev, sizeof(struct buffer_desc), 32, 0); ret = -ENOMEM; - if (!buffer_pool) { + if (!buffer_pool) goto err; - } + ctx_pool = dma_pool_create("context", dev, NPE_CTX_LEN, 16, 0); - if (!ctx_pool) { + if (!ctx_pool) goto err; - } + ret = qmgr_request_queue(SEND_QID, NPE_QLEN_TOTAL, 0, 0, "ixp_crypto:out", NULL); if (ret) @@ -545,11 +545,10 @@ static void release_ixp_crypto(struct device *dev) npe_release(npe_c); - if (crypt_virt) { + if (crypt_virt) dma_free_coherent(dev, NPE_QLEN * sizeof(struct crypt_ctl), crypt_virt, crypt_phys); - } } static void reset_sa_dir(struct ix_sa_dir *dir) @@ -562,9 +561,9 @@ static void reset_sa_dir(struct ix_sa_dir *dir) static int init_sa_dir(struct ix_sa_dir *dir) { dir->npe_ctx = dma_pool_alloc(ctx_pool, GFP_KERNEL, &dir->npe_ctx_phys); - if (!dir->npe_ctx) { + if (!dir->npe_ctx) return -ENOMEM; - } + reset_sa_dir(dir); return 0; } @@ -585,9 +584,9 @@ static int init_tfm(struct crypto_tfm *tfm) if (ret) return ret; ret = init_sa_dir(&ctx->decrypt); - if (ret) { + if (ret) free_sa_dir(&ctx->encrypt); - } + return ret; } @@ -669,9 +668,8 @@ static int register_chain_var(struct crypto_tfm *tfm, u8 xpad, u32 target, memcpy(pad, key, key_len); memset(pad + key_len, 0, HMAC_PAD_BLOCKLEN - key_len); - for (i = 0; i < HMAC_PAD_BLOCKLEN; i++) { + for (i = 0; i < HMAC_PAD_BLOCKLEN; i++) pad[i] ^= xpad; - } crypt->data.tfm = tfm; crypt->regist_ptr = pad; @@ -751,9 +749,9 @@ static int gen_rev_aes_key(struct crypto_tfm *tfm) struct ix_sa_dir *dir = &ctx->decrypt; crypt = get_crypt_desc_emerg(); - if (!crypt) { + if (!crypt) return -EAGAIN; - } + *(u32 *)dir->npe_ctx |= cpu_to_be32(CIPH_ENCR); crypt->data.tfm = tfm; @@ -1004,9 +1002,9 @@ static int ablk_perform(struct skcipher_request *req, int encrypt) free_buf_src: free_buf_chain(dev, req_ctx->src, crypt->src_buf); free_buf_dest: - if (req->src != req->dst) { + if (req->src != req->dst) free_buf_chain(dev, req_ctx->dst, crypt->dst_buf); - } + crypt->ctl_flags = CTL_FLAG_UNUSED; return -ENOMEM; } @@ -1462,14 +1460,11 @@ static int __init ixp_module_init(void) struct skcipher_alg *cra = &ixp4xx_algos[i].crypto; if (snprintf(cra->base.cra_driver_name, CRYPTO_MAX_ALG_NAME, - "%s"IXP_POSTFIX, cra->base.cra_name) >= - CRYPTO_MAX_ALG_NAME) - { + "%s"IXP_POSTFIX, cra->base.cra_name) >= + CRYPTO_MAX_ALG_NAME) continue; - } - if (!support_aes && (ixp4xx_algos[i].cfg_enc & MOD_AES)) { + if (!support_aes && (ixp4xx_algos[i].cfg_enc & MOD_AES)) continue; - } /* block ciphers */ cra->base.cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY | From c5e070311fab7aa8398f67b97d2a452d9eb1a112 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Wed, 5 May 2021 20:26:16 +0000 Subject: [PATCH 024/142] crypto: ixp4xx - Correct functions alignment This patch fixes all alignment issues reported by checkpatch. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/ixp4xx_crypto.c | 65 ++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 30 deletions(-) diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c index 03ae9c3a8d97f..b38650b0fea10 100644 --- a/drivers/crypto/ixp4xx_crypto.c +++ b/drivers/crypto/ixp4xx_crypto.c @@ -354,8 +354,8 @@ static void finish_scattered_hmac(struct crypt_ctl *crypt) int decryptlen = req->assoclen + req->cryptlen - authsize; if (req_ctx->encrypt) { - scatterwalk_map_and_copy(req_ctx->hmac_virt, - req->dst, decryptlen, authsize, 1); + scatterwalk_map_and_copy(req_ctx->hmac_virt, req->dst, + decryptlen, authsize, 1); } dma_pool_free(buffer_pool, req_ctx->hmac_virt, crypt->icv_rev_aes); } @@ -412,7 +412,7 @@ static void one_packet(dma_addr_t phys) case CTL_FLAG_GEN_ICV: ctx = crypto_tfm_ctx(crypt->data.tfm); dma_pool_free(ctx_pool, crypt->regist_ptr, - crypt->regist_buf->phys_addr); + crypt->regist_buf->phys_addr); dma_pool_free(buffer_pool, crypt->regist_buf, crypt->src_buf); if (atomic_dec_and_test(&ctx->configuring)) complete(&ctx->completion); @@ -494,14 +494,13 @@ static int init_ixp_crypto(struct device *dev) * so assure it is large enough */ BUILD_BUG_ON(SHA1_DIGEST_SIZE > sizeof(struct buffer_desc)); - buffer_pool = dma_pool_create("buffer", dev, - sizeof(struct buffer_desc), 32, 0); + buffer_pool = dma_pool_create("buffer", dev, sizeof(struct buffer_desc), + 32, 0); ret = -ENOMEM; if (!buffer_pool) goto err; - ctx_pool = dma_pool_create("context", dev, - NPE_CTX_LEN, 16, 0); + ctx_pool = dma_pool_create("context", dev, NPE_CTX_LEN, 16, 0); if (!ctx_pool) goto err; @@ -546,9 +545,8 @@ static void release_ixp_crypto(struct device *dev) npe_release(npe_c); if (crypt_virt) - dma_free_coherent(dev, - NPE_QLEN * sizeof(struct crypt_ctl), - crypt_virt, crypt_phys); + dma_free_coherent(dev, NPE_QLEN * sizeof(struct crypt_ctl), + crypt_virt, crypt_phys); } static void reset_sa_dir(struct ix_sa_dir *dir) @@ -641,7 +639,8 @@ static void exit_tfm_aead(struct crypto_aead *tfm) } static int register_chain_var(struct crypto_tfm *tfm, u8 xpad, u32 target, - int init_len, u32 ctx_addr, const u8 *key, int key_len) + int init_len, u32 ctx_addr, const u8 *key, + int key_len) { struct ixp_ctx *ctx = crypto_tfm_ctx(tfm); struct crypt_ctl *crypt; @@ -735,11 +734,11 @@ static int setup_auth(struct crypto_tfm *tfm, int encrypt, unsigned int authsize dir->npe_mode |= NPE_OP_HASH_VERIFY; ret = register_chain_var(tfm, HMAC_OPAD_VALUE, otarget, - init_len, npe_ctx_addr, key, key_len); + init_len, npe_ctx_addr, key, key_len); if (ret) return ret; return register_chain_var(tfm, HMAC_IPAD_VALUE, itarget, - init_len, npe_ctx_addr, key, key_len); + init_len, npe_ctx_addr, key, key_len); } static int gen_rev_aes_key(struct crypto_tfm *tfm) @@ -770,8 +769,8 @@ static int gen_rev_aes_key(struct crypto_tfm *tfm) return 0; } -static int setup_cipher(struct crypto_tfm *tfm, int encrypt, - const u8 *key, int key_len) +static int setup_cipher(struct crypto_tfm *tfm, int encrypt, const u8 *key, + int key_len) { u8 *cinfo; u32 cipher_cfg; @@ -791,9 +790,15 @@ static int setup_cipher(struct crypto_tfm *tfm, int encrypt, } if (cipher_cfg & MOD_AES) { switch (key_len) { - case 16: keylen_cfg = MOD_AES128; break; - case 24: keylen_cfg = MOD_AES192; break; - case 32: keylen_cfg = MOD_AES256; break; + case 16: + keylen_cfg = MOD_AES128; + break; + case 24: + keylen_cfg = MOD_AES192; + break; + case 32: + keylen_cfg = MOD_AES256; + break; default: return -EINVAL; } @@ -855,7 +860,7 @@ static struct buffer_desc *chainup_buffers(struct device *dev, } static int ablk_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int key_len) + unsigned int key_len) { struct ixp_ctx *ctx = crypto_skcipher_ctx(tfm); int ret; @@ -892,7 +897,7 @@ static int ablk_des3_setkey(struct crypto_skcipher *tfm, const u8 *key, } static int ablk_rfc3686_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int key_len) + unsigned int key_len) { struct ixp_ctx *ctx = crypto_skcipher_ctx(tfm); @@ -901,7 +906,7 @@ static int ablk_rfc3686_setkey(struct crypto_skcipher *tfm, const u8 *key, return -EINVAL; memcpy(ctx->nonce, key + (key_len - CTR_RFC3686_NONCE_SIZE), - CTR_RFC3686_NONCE_SIZE); + CTR_RFC3686_NONCE_SIZE); key_len -= CTR_RFC3686_NONCE_SIZE; return ablk_setkey(tfm, key, key_len); @@ -979,7 +984,7 @@ static int ablk_perform(struct skcipher_request *req, int encrypt) * for more than one dst buffer, I think. */ req_ctx->dst = NULL; if (!chainup_buffers(dev, req->dst, nbytes, &dst_hook, - flags, DMA_FROM_DEVICE)) + flags, DMA_FROM_DEVICE)) goto free_buf_dest; src_direction = DMA_TO_DEVICE; req_ctx->dst = dst_hook.next; @@ -988,8 +993,8 @@ static int ablk_perform(struct skcipher_request *req, int encrypt) req_ctx->dst = NULL; } req_ctx->src = NULL; - if (!chainup_buffers(dev, req->src, nbytes, &src_hook, - flags, src_direction)) + if (!chainup_buffers(dev, req->src, nbytes, &src_hook, flags, + src_direction)) goto free_buf_src; req_ctx->src = src_hook.next; @@ -1042,7 +1047,7 @@ static int ablk_rfc3686_crypt(struct skcipher_request *req) } static int aead_perform(struct aead_request *req, int encrypt, - int cryptoffset, int eff_cryptlen, u8 *iv) + int cryptoffset, int eff_cryptlen, u8 *iv) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct ixp_ctx *ctx = crypto_aead_ctx(tfm); @@ -1130,12 +1135,12 @@ static int aead_perform(struct aead_request *req, int encrypt, /* The 12 hmac bytes are scattered, * we need to copy them into a safe buffer */ req_ctx->hmac_virt = dma_pool_alloc(buffer_pool, flags, - &crypt->icv_rev_aes); + &crypt->icv_rev_aes); if (unlikely(!req_ctx->hmac_virt)) goto free_buf_dst; if (!encrypt) { scatterwalk_map_and_copy(req_ctx->hmac_virt, - req->src, cryptlen, authsize, 0); + req->src, cryptlen, authsize, 0); } req_ctx->encrypt = encrypt; } else { @@ -1176,11 +1181,11 @@ static int aead_setup(struct crypto_aead *tfm, unsigned int authsize) if (ret) goto out; ret = setup_auth(&tfm->base, 0, authsize, ctx->authkey, - ctx->authkey_len, digest_len); + ctx->authkey_len, digest_len); if (ret) goto out; ret = setup_auth(&tfm->base, 1, authsize, ctx->authkey, - ctx->authkey_len, digest_len); + ctx->authkey_len, digest_len); out: if (!atomic_dec_and_test(&ctx->configuring)) wait_for_completion(&ctx->completion); @@ -1197,7 +1202,7 @@ static int aead_setauthsize(struct crypto_aead *tfm, unsigned int authsize) } static int aead_setkey(struct crypto_aead *tfm, const u8 *key, - unsigned int keylen) + unsigned int keylen) { struct ixp_ctx *ctx = crypto_aead_ctx(tfm); struct crypto_authenc_keys keys; From 9ca04a51a7e0b08b0e402ddc65acba00678a91d7 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Wed, 5 May 2021 20:26:17 +0000 Subject: [PATCH 025/142] MAINTAINERS: add ixp4xx_crypto to the right arch list drivers/crypto/ixp4xx_crypto.c is missing in the IXP4XX arch file list. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index bd7aff0c120f2..75885258fae39 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1972,6 +1972,7 @@ F: Documentation/devicetree/bindings/interrupt-controller/intel,ixp4xx-interrupt F: Documentation/devicetree/bindings/timer/intel,ixp4xx-timer.yaml F: arch/arm/mach-ixp4xx/ F: drivers/clocksource/timer-ixp4xx.c +F: drivers/crypto/ixp4xx_crypto.c F: drivers/gpio/gpio-ixp4xx.c F: drivers/irqchip/irq-ixp4xx.c F: include/linux/irqchip/irq-ixp4xx.h From 653fdbbf2d2006322b73dfa50add020625947a60 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Wed, 5 May 2021 20:26:18 +0000 Subject: [PATCH 026/142] MAINTAINERS: add myself as maintainer of ixp4xx_crypto No maintainer exists for ixp4xx_crypto, since I have access to a board with it, I propose to maintain it. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- MAINTAINERS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 75885258fae39..6df5a401ff92f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9236,6 +9236,12 @@ F: Documentation/admin-guide/media/ipu3_rcb.svg F: Documentation/userspace-api/media/v4l/pixfmt-meta-intel-ipu3.rst F: drivers/staging/media/ipu3/ +INTEL IXP4XX CRYPTO SUPPORT +M: Corentin Labbe +L: linux-crypto@vger.kernel.org +S: Maintained +F: drivers/crypto/ixp4xx_crypto.c + INTEL IXP4XX QMGR, NPE, ETHERNET and HSS SUPPORT M: Krzysztof Halasa S: Maintained From 3c995c4c7575b7b248d16e765fe05c01795fcd14 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Fri, 7 May 2021 17:56:57 +0800 Subject: [PATCH 027/142] crypto: cavium/nitrox - Remove redundant initialization of 'sg' Pointer 'sg' is being initialized however this value is never read as 'sg' is assigned a same value in for_each_sg(). Remove the redundant assignment. Cleans up clang warning: drivers/crypto/cavium/nitrox/nitrox_reqmgr.c:161:22: warning: Value stored to 'sg' during its initialization is never read [clang-analyzer-deadcode.DeadStores] Reported-by: Abaci Robot Signed-off-by: Yang Li Signed-off-by: Herbert Xu --- drivers/crypto/cavium/nitrox/nitrox_reqmgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c index df95ba26b4141..bc35d4cc41b68 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c +++ b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c @@ -159,7 +159,7 @@ static int dma_map_inbufs(struct nitrox_softreq *sr, struct se_crypto_request *req) { struct device *dev = DEV(sr->ndev); - struct scatterlist *sg = req->src; + struct scatterlist *sg; int i, nents, ret = 0; nents = dma_map_sg(dev, req->src, sg_nents(req->src), From eb9e492f5c06fe197550e68973f88cba6e14274a Mon Sep 17 00:00:00 2001 From: Yang Li Date: Fri, 7 May 2021 17:58:07 +0800 Subject: [PATCH 028/142] crypto: cavium/nitrox - Fix kernel-doc Fix function name in nitrox_reqmgr.c kernel-doc comment to remove a warning. drivers/crypto/cavium/nitrox/nitrox_reqmgr.c:382: warning: expecting prototype for nitrox_se_request(). Prototype was for nitrox_process_se_request() instead Reported-by: Abaci Robot Signed-off-by: Yang Li Signed-off-by: Herbert Xu --- drivers/crypto/cavium/nitrox/nitrox_reqmgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c index bc35d4cc41b68..4434c92d6229f 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c +++ b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c @@ -369,7 +369,7 @@ static int nitrox_enqueue_request(struct nitrox_softreq *sr) } /** - * nitrox_se_request - Send request to SE core + * nitrox_process_se_request - Send request to SE core * @ndev: NITROX device * @req: Crypto request * From 06676aa1f455c74e3ad1624cea3acb9ed2ef71ae Mon Sep 17 00:00:00 2001 From: Bixuan Cui Date: Sat, 8 May 2021 11:14:55 +0800 Subject: [PATCH 029/142] crypto: nx - add missing MODULE_DEVICE_TABLE This patch adds missing MODULE_DEVICE_TABLE definition which generates correct modalias for automatic loading of this driver when it is built as an external module. Reported-by: Hulk Robot Signed-off-by: Bixuan Cui Signed-off-by: Herbert Xu --- drivers/crypto/nx/nx-842-pseries.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/nx/nx-842-pseries.c b/drivers/crypto/nx/nx-842-pseries.c index cc8dd3072b8b7..8ee547ee378ec 100644 --- a/drivers/crypto/nx/nx-842-pseries.c +++ b/drivers/crypto/nx/nx-842-pseries.c @@ -1069,6 +1069,7 @@ static const struct vio_device_id nx842_vio_driver_ids[] = { {"ibm,compression-v1", "ibm,compression"}, {"", ""}, }; +MODULE_DEVICE_TABLE(vio, nx842_vio_driver_ids); static struct vio_driver nx842_vio_driver = { .name = KBUILD_MODNAME, From b01360384009ab066940b45f34880991ea7ccbfb Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Sat, 8 May 2021 15:00:49 +0800 Subject: [PATCH 030/142] crypto: ux500 - Fix error return code in hash_hw_final() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: 8a63b1994c50 ("crypto: ux500 - Add driver for HASH hardware") Reported-by: Hulk Robot Signed-off-by: Zhen Lei Reviewed-by: Linus Walleij Signed-off-by: Herbert Xu --- drivers/crypto/ux500/hash/hash_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c index ecb7412e84e3e..51a6e1a424349 100644 --- a/drivers/crypto/ux500/hash/hash_core.c +++ b/drivers/crypto/ux500/hash/hash_core.c @@ -1011,6 +1011,7 @@ static int hash_hw_final(struct ahash_request *req) goto out; } } else if (req->nbytes == 0 && ctx->keylen > 0) { + ret = -EPERM; dev_err(device_data->dev, "%s: Empty message with keylength > 0, NOT supported\n", __func__); goto out; From 2d016672528a592ada5188e53ac746e1b8b7a978 Mon Sep 17 00:00:00 2001 From: Hui Tang Date: Mon, 10 May 2021 16:54:08 +0800 Subject: [PATCH 031/142] crypto: testmgr - fix initialization of 'secret_size' Actual data length of the 'secret' is not equal to the 'secret_size'. Since the 'curve_id' has removed in the 'secret', the 'secret_size' should subtract the length of the 'curve_id'. Fixes: 6763f5ea2d9a ("crypto: ecdh - move curve_id of ECDH from ...") Signed-off-by: Hui Tang Signed-off-by: Herbert Xu --- crypto/testmgr.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 34e4a3db39917..aead75d904933 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -2719,7 +2719,7 @@ static const struct kpp_testvec ecdh_p192_tv_template[] = { "\xf4\x57\xcc\x4f\x1f\x4e\x31\xcc" "\xe3\x40\x60\xc8\x06\x93\xc6\x2e" "\x99\x80\x81\x28\xaf\xc5\x51\x74", - .secret_size = 32, + .secret_size = 30, .b_public_size = 48, .expected_a_public_size = 48, .expected_ss_size = 24 @@ -2766,7 +2766,7 @@ static const struct kpp_testvec ecdh_p256_tv_template[] = { "\x9f\x4a\x38\xcc\xc0\x2c\x49\x2f" "\xb1\x32\xbb\xaf\x22\x61\xda\xcb" "\x6f\xdb\xa9\xaa\xfc\x77\x81\xf3", - .secret_size = 40, + .secret_size = 38, .b_public_size = 64, .expected_a_public_size = 64, .expected_ss_size = 32 @@ -2804,8 +2804,8 @@ static const struct kpp_testvec ecdh_p256_tv_template[] = { "\x37\x08\xcc\x40\x5e\x7a\xfd\x6a" "\x6a\x02\x6e\x41\x87\x68\x38\x77" "\xfa\xa9\x44\x43\x2d\xef\x09\xdf", - .secret_size = 8, - .b_secret_size = 40, + .secret_size = 6, + .b_secret_size = 38, .b_public_size = 64, .expected_a_public_size = 64, .expected_ss_size = 32, From c5ae16f5c6b91dc78a08885a753489d608de4abd Mon Sep 17 00:00:00 2001 From: Hui Tang Date: Mon, 10 May 2021 16:59:47 +0800 Subject: [PATCH 032/142] crypto: ecdh - extend 'cra_driver_name' with curve name Currently, 'cra_driver_name' cannot be used to specify ecdh algorithm with a special curve, so extending it with curve name. Although using 'cra_name' can also specify a special curve, but ecdh generic driver cannot be specified when vendor hardware accelerator has registered. Fixes: 6763f5ea2d9a ("crypto: ecdh - move curve_id of ECDH from ...") Signed-off-by: Hui Tang Signed-off-by: Herbert Xu --- crypto/ecdh.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crypto/ecdh.c b/crypto/ecdh.c index 04a427b8c9564..07eb34fef25b7 100644 --- a/crypto/ecdh.c +++ b/crypto/ecdh.c @@ -141,7 +141,7 @@ static struct kpp_alg ecdh_nist_p192 = { .init = ecdh_nist_p192_init_tfm, .base = { .cra_name = "ecdh-nist-p192", - .cra_driver_name = "ecdh-generic", + .cra_driver_name = "ecdh-nist-p192-generic", .cra_priority = 100, .cra_module = THIS_MODULE, .cra_ctxsize = sizeof(struct ecdh_ctx), @@ -166,7 +166,7 @@ static struct kpp_alg ecdh_nist_p256 = { .init = ecdh_nist_p256_init_tfm, .base = { .cra_name = "ecdh-nist-p256", - .cra_driver_name = "ecdh-generic", + .cra_driver_name = "ecdh-nist-p256-generic", .cra_priority = 100, .cra_module = THIS_MODULE, .cra_ctxsize = sizeof(struct ecdh_ctx), From a225762057d6818e4a75ad5c2c16495662d71495 Mon Sep 17 00:00:00 2001 From: Hui Tang Date: Mon, 10 May 2021 16:59:48 +0800 Subject: [PATCH 033/142] crypto: hisilicon/hpre - extend 'cra_driver_name' with curve name Currently,'cra_driver_name' cannot be used to specify ecdh algorithm with a special curve, so extending it with curve name. Fixes: 6763f5ea2d9a ("crypto: ecdh - move curve_id of ECDH from ...") Signed-off-by: Hui Tang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_crypto.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c index a380087c83f77..c07a7f52d857e 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c +++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c @@ -1940,7 +1940,7 @@ static struct kpp_alg ecdh_nist_p192 = { .cra_ctxsize = sizeof(struct hpre_ctx), .cra_priority = HPRE_CRYPTO_ALG_PRI, .cra_name = "ecdh-nist-p192", - .cra_driver_name = "hpre-ecdh", + .cra_driver_name = "hpre-ecdh-nist-p192", .cra_module = THIS_MODULE, }, }; @@ -1957,7 +1957,7 @@ static struct kpp_alg ecdh_nist_p256 = { .cra_ctxsize = sizeof(struct hpre_ctx), .cra_priority = HPRE_CRYPTO_ALG_PRI, .cra_name = "ecdh-nist-p256", - .cra_driver_name = "hpre-ecdh", + .cra_driver_name = "hpre-ecdh-nist-p256", .cra_module = THIS_MODULE, }, }; From 0b0553b701f830d820ba9026e5799c24e400a4b5 Mon Sep 17 00:00:00 2001 From: Hui Tang Date: Mon, 10 May 2021 17:02:55 +0800 Subject: [PATCH 034/142] crypto: hisilicon/hpre - fix unmapping invalid dma address Currently, an invalid dma address may be unmapped when calling 'xx_data_clr_all' in error path, so check dma address of sqe in/out if initialized before calling 'dma_free_coherent' or 'dma_unmap_single'. Fixes: a9214b0b6ed2 ("crypto: hisilicon - fix the check on dma address") Signed-off-by: Hui Tang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_crypto.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c index c07a7f52d857e..db00e9f763da0 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c +++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c @@ -298,6 +298,8 @@ static void hpre_hw_data_clr_all(struct hpre_ctx *ctx, dma_addr_t tmp; tmp = le64_to_cpu(sqe->in); + if (unlikely(dma_mapping_error(dev, tmp))) + return; if (src) { if (req->src) @@ -307,6 +309,8 @@ static void hpre_hw_data_clr_all(struct hpre_ctx *ctx, } tmp = le64_to_cpu(sqe->out); + if (unlikely(dma_mapping_error(dev, tmp))) + return; if (req->dst) { if (dst) @@ -524,6 +528,8 @@ static int hpre_msg_request_set(struct hpre_ctx *ctx, void *req, bool is_rsa) msg->key = cpu_to_le64(ctx->dh.dma_xa_p); } + msg->in = cpu_to_le64(DMA_MAPPING_ERROR); + msg->out = cpu_to_le64(DMA_MAPPING_ERROR); msg->dw0 |= cpu_to_le32(0x1 << HPRE_SQE_DONE_SHIFT); msg->task_len1 = (ctx->key_sz >> HPRE_BITS_2_BYTES_SHIFT) - 1; h_req->ctx = ctx; @@ -1372,11 +1378,15 @@ static void hpre_ecdh_hw_data_clr_all(struct hpre_ctx *ctx, dma_addr_t dma; dma = le64_to_cpu(sqe->in); + if (unlikely(dma_mapping_error(dev, dma))) + return; if (src && req->src) dma_free_coherent(dev, ctx->key_sz << 2, req->src, dma); dma = le64_to_cpu(sqe->out); + if (unlikely(dma_mapping_error(dev, dma))) + return; if (req->dst) dma_free_coherent(dev, ctx->key_sz << 1, req->dst, dma); @@ -1431,6 +1441,8 @@ static int hpre_ecdh_msg_request_set(struct hpre_ctx *ctx, h_req->areq.ecdh = req; msg = &h_req->req; memset(msg, 0, sizeof(*msg)); + msg->in = cpu_to_le64(DMA_MAPPING_ERROR); + msg->out = cpu_to_le64(DMA_MAPPING_ERROR); msg->key = cpu_to_le64(ctx->ecdh.dma_p); msg->dw0 |= cpu_to_le32(0x1U << HPRE_SQE_DONE_SHIFT); @@ -1667,11 +1679,15 @@ static void hpre_curve25519_hw_data_clr_all(struct hpre_ctx *ctx, dma_addr_t dma; dma = le64_to_cpu(sqe->in); + if (unlikely(dma_mapping_error(dev, dma))) + return; if (src && req->src) dma_free_coherent(dev, ctx->key_sz, req->src, dma); dma = le64_to_cpu(sqe->out); + if (unlikely(dma_mapping_error(dev, dma))) + return; if (req->dst) dma_free_coherent(dev, ctx->key_sz, req->dst, dma); @@ -1722,6 +1738,8 @@ static int hpre_curve25519_msg_request_set(struct hpre_ctx *ctx, h_req->areq.curve25519 = req; msg = &h_req->req; memset(msg, 0, sizeof(*msg)); + msg->in = cpu_to_le64(DMA_MAPPING_ERROR); + msg->out = cpu_to_le64(DMA_MAPPING_ERROR); msg->key = cpu_to_le64(ctx->curve25519.dma_p); msg->dw0 |= cpu_to_le32(0x1U << HPRE_SQE_DONE_SHIFT); From e0a6f390d44b7d4d04fb3f2dbba46824bdbd1b4f Mon Sep 17 00:00:00 2001 From: Hui Tang Date: Wed, 12 May 2021 14:27:04 +0800 Subject: [PATCH 035/142] crypto: hisilicon/hpre - the macro 'HPRE_ADDR' expands The macro 'HPRE_ADDR' is unnecessary, so expanding it. Signed-off-by: Hui Tang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_main.c | 59 +++++++++++------------ 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index 046bc962c8b2d..c914e0005859a 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -69,7 +69,6 @@ #define HPRE_DBGFS_VAL_MAX_LEN 20 #define HPRE_PCI_DEVICE_ID 0xa258 #define HPRE_PCI_VF_DEVICE_ID 0xa259 -#define HPRE_ADDR(qm, offset) ((qm)->io_base + (offset)) #define HPRE_QM_USR_CFG_MASK 0xfffffffe #define HPRE_QM_AXI_CFG_MASK 0xffff #define HPRE_QM_VFG_AX_MASK 0xff @@ -302,10 +301,10 @@ static int hpre_set_cluster(struct hisi_qm *qm) /* clusters initiating */ writel(cluster_core_mask, - HPRE_ADDR(qm, offset + HPRE_CORE_ENB)); - writel(0x1, HPRE_ADDR(qm, offset + HPRE_CORE_INI_CFG)); - ret = readl_relaxed_poll_timeout(HPRE_ADDR(qm, offset + - HPRE_CORE_INI_STATUS), val, + qm->io_base + offset + HPRE_CORE_ENB); + writel(0x1, qm->io_base + offset + HPRE_CORE_INI_CFG); + ret = readl_relaxed_poll_timeout(qm->io_base + offset + + HPRE_CORE_INI_STATUS, val, ((val & cluster_core_mask) == cluster_core_mask), HPRE_REG_RD_INTVRL_US, @@ -329,11 +328,11 @@ static void disable_flr_of_bme(struct hisi_qm *qm) { u32 val; - val = readl(HPRE_ADDR(qm, QM_PEH_AXUSER_CFG)); + val = readl(qm->io_base + QM_PEH_AXUSER_CFG); val &= ~(HPRE_QM_BME_FLR | HPRE_QM_SRIOV_FLR); val |= HPRE_QM_PM_FLR; - writel(val, HPRE_ADDR(qm, QM_PEH_AXUSER_CFG)); - writel(PEH_AXUSER_CFG_ENABLE, HPRE_ADDR(qm, QM_PEH_AXUSER_CFG_ENABLE)); + writel(val, qm->io_base + QM_PEH_AXUSER_CFG); + writel(PEH_AXUSER_CFG_ENABLE, qm->io_base + QM_PEH_AXUSER_CFG_ENABLE); } static int hpre_set_user_domain_and_cache(struct hisi_qm *qm) @@ -342,33 +341,33 @@ static int hpre_set_user_domain_and_cache(struct hisi_qm *qm) u32 val; int ret; - writel(HPRE_QM_USR_CFG_MASK, HPRE_ADDR(qm, QM_ARUSER_M_CFG_ENABLE)); - writel(HPRE_QM_USR_CFG_MASK, HPRE_ADDR(qm, QM_AWUSER_M_CFG_ENABLE)); - writel_relaxed(HPRE_QM_AXI_CFG_MASK, HPRE_ADDR(qm, QM_AXI_M_CFG)); + writel(HPRE_QM_USR_CFG_MASK, qm->io_base + QM_ARUSER_M_CFG_ENABLE); + writel(HPRE_QM_USR_CFG_MASK, qm->io_base + QM_AWUSER_M_CFG_ENABLE); + writel_relaxed(HPRE_QM_AXI_CFG_MASK, qm->io_base + QM_AXI_M_CFG); /* HPRE need more time, we close this interrupt */ - val = readl_relaxed(HPRE_ADDR(qm, HPRE_QM_ABNML_INT_MASK)); + val = readl_relaxed(qm->io_base + HPRE_QM_ABNML_INT_MASK); val |= BIT(HPRE_TIMEOUT_ABNML_BIT); - writel_relaxed(val, HPRE_ADDR(qm, HPRE_QM_ABNML_INT_MASK)); + writel_relaxed(val, qm->io_base + HPRE_QM_ABNML_INT_MASK); if (qm->ver >= QM_HW_V3) writel(HPRE_RSA_ENB | HPRE_ECC_ENB, - HPRE_ADDR(qm, HPRE_TYPES_ENB)); + qm->io_base + HPRE_TYPES_ENB); else - writel(HPRE_RSA_ENB, HPRE_ADDR(qm, HPRE_TYPES_ENB)); - - writel(HPRE_QM_VFG_AX_MASK, HPRE_ADDR(qm, HPRE_VFG_AXCACHE)); - writel(0x0, HPRE_ADDR(qm, HPRE_BD_ENDIAN)); - writel(0x0, HPRE_ADDR(qm, HPRE_INT_MASK)); - writel(0x0, HPRE_ADDR(qm, HPRE_POISON_BYPASS)); - writel(0x0, HPRE_ADDR(qm, HPRE_COMM_CNT_CLR_CE)); - writel(0x0, HPRE_ADDR(qm, HPRE_ECC_BYPASS)); - - writel(HPRE_BD_USR_MASK, HPRE_ADDR(qm, HPRE_BD_ARUSR_CFG)); - writel(HPRE_BD_USR_MASK, HPRE_ADDR(qm, HPRE_BD_AWUSR_CFG)); - writel(0x1, HPRE_ADDR(qm, HPRE_RDCHN_INI_CFG)); - ret = readl_relaxed_poll_timeout(HPRE_ADDR(qm, HPRE_RDCHN_INI_ST), val, - val & BIT(0), + writel(HPRE_RSA_ENB, qm->io_base + HPRE_TYPES_ENB); + + writel(HPRE_QM_VFG_AX_MASK, qm->io_base + HPRE_VFG_AXCACHE); + writel(0x0, qm->io_base + HPRE_BD_ENDIAN); + writel(0x0, qm->io_base + HPRE_INT_MASK); + writel(0x0, qm->io_base + HPRE_POISON_BYPASS); + writel(0x0, qm->io_base + HPRE_COMM_CNT_CLR_CE); + writel(0x0, qm->io_base + HPRE_ECC_BYPASS); + + writel(HPRE_BD_USR_MASK, qm->io_base + HPRE_BD_ARUSR_CFG); + writel(HPRE_BD_USR_MASK, qm->io_base + HPRE_BD_AWUSR_CFG); + writel(0x1, qm->io_base + HPRE_RDCHN_INI_CFG); + ret = readl_relaxed_poll_timeout(qm->io_base + HPRE_RDCHN_INI_ST, val, + val & BIT(0), HPRE_REG_RD_INTVRL_US, HPRE_REG_RD_TMOUT_US); if (ret) { @@ -802,9 +801,9 @@ static void hpre_open_axi_master_ooo(struct hisi_qm *qm) value = readl(qm->io_base + HPRE_AM_OOO_SHUTDOWN_ENB); writel(value & ~HPRE_AM_OOO_SHUTDOWN_ENABLE, - HPRE_ADDR(qm, HPRE_AM_OOO_SHUTDOWN_ENB)); + qm->io_base + HPRE_AM_OOO_SHUTDOWN_ENB); writel(value | HPRE_AM_OOO_SHUTDOWN_ENABLE, - HPRE_ADDR(qm, HPRE_AM_OOO_SHUTDOWN_ENB)); + qm->io_base + HPRE_AM_OOO_SHUTDOWN_ENB); } static void hpre_err_info_init(struct hisi_qm *qm) From c9a753b9733dd229ea736b27bdc55ef04cdc9f01 Mon Sep 17 00:00:00 2001 From: Hui Tang Date: Wed, 12 May 2021 14:27:05 +0800 Subject: [PATCH 036/142] crypto: hisilicon/hpre - init a structure member each line Only init a structure member each line, just to keep the code neat. Signed-off-by: Hui Tang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_main.c | 56 +++++++++++++++++------ 1 file changed, 42 insertions(+), 14 deletions(-) diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index c914e0005859a..47a169ce2833f 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -122,21 +122,49 @@ static const char * const hpre_debug_file_name[] = { }; static const struct hpre_hw_error hpre_hw_errors[] = { - { .int_msk = BIT(0), .msg = "core_ecc_1bit_err_int_set" }, - { .int_msk = BIT(1), .msg = "core_ecc_2bit_err_int_set" }, - { .int_msk = BIT(2), .msg = "dat_wb_poison_int_set" }, - { .int_msk = BIT(3), .msg = "dat_rd_poison_int_set" }, - { .int_msk = BIT(4), .msg = "bd_rd_poison_int_set" }, - { .int_msk = BIT(5), .msg = "ooo_ecc_2bit_err_int_set" }, - { .int_msk = BIT(6), .msg = "cluster1_shb_timeout_int_set" }, - { .int_msk = BIT(7), .msg = "cluster2_shb_timeout_int_set" }, - { .int_msk = BIT(8), .msg = "cluster3_shb_timeout_int_set" }, - { .int_msk = BIT(9), .msg = "cluster4_shb_timeout_int_set" }, - { .int_msk = GENMASK(15, 10), .msg = "ooo_rdrsp_err_int_set" }, - { .int_msk = GENMASK(21, 16), .msg = "ooo_wrrsp_err_int_set" }, - { .int_msk = BIT(22), .msg = "pt_rng_timeout_int_set"}, - { .int_msk = BIT(23), .msg = "sva_fsm_timeout_int_set"}, { + .int_msk = BIT(0), + .msg = "core_ecc_1bit_err_int_set" + }, { + .int_msk = BIT(1), + .msg = "core_ecc_2bit_err_int_set" + }, { + .int_msk = BIT(2), + .msg = "dat_wb_poison_int_set" + }, { + .int_msk = BIT(3), + .msg = "dat_rd_poison_int_set" + }, { + .int_msk = BIT(4), + .msg = "bd_rd_poison_int_set" + }, { + .int_msk = BIT(5), + .msg = "ooo_ecc_2bit_err_int_set" + }, { + .int_msk = BIT(6), + .msg = "cluster1_shb_timeout_int_set" + }, { + .int_msk = BIT(7), + .msg = "cluster2_shb_timeout_int_set" + }, { + .int_msk = BIT(8), + .msg = "cluster3_shb_timeout_int_set" + }, { + .int_msk = BIT(9), + .msg = "cluster4_shb_timeout_int_set" + }, { + .int_msk = GENMASK(15, 10), + .msg = "ooo_rdrsp_err_int_set" + }, { + .int_msk = GENMASK(21, 16), + .msg = "ooo_wrrsp_err_int_set" + }, { + .int_msk = BIT(22), + .msg = "pt_rng_timeout_int_set" + }, { + .int_msk = BIT(23), + .msg = "sva_fsm_timeout_int_set" + }, { /* sentinel */ } }; From 9201c0774c2203d5620eeb4f7cb872d7e33cbe75 Mon Sep 17 00:00:00 2001 From: Hui Tang Date: Wed, 12 May 2021 14:27:06 +0800 Subject: [PATCH 037/142] crypto: hisilicon/hpre - replace macro with inline function Functional macro lacks type checking, which is not as strict as function call checking. Signed-off-by: Hui Tang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_main.c | 25 +++++++++++++++-------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index 47a169ce2833f..1e7d1fb382ed0 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -87,11 +87,6 @@ #define HPRE_QM_PM_FLR BIT(11) #define HPRE_QM_SRIOV_FLR BIT(12) -#define HPRE_CLUSTERS_NUM(qm) \ - (((qm)->ver >= QM_HW_V3) ? HPRE_CLUSTERS_NUM_V3 : HPRE_CLUSTERS_NUM_V2) -#define HPRE_CLUSTER_CORE_MASK(qm) \ - (((qm)->ver >= QM_HW_V3) ? HPRE_CLUSTER_CORE_MASK_V3 :\ - HPRE_CLUSTER_CORE_MASK_V2) #define HPRE_VIA_MSI_DSM 1 #define HPRE_SQE_MASK_OFFSET 8 #define HPRE_SQE_MASK_LEN 24 @@ -251,6 +246,18 @@ static u32 vfs_num; module_param_cb(vfs_num, &vfs_num_ops, &vfs_num, 0444); MODULE_PARM_DESC(vfs_num, "Number of VFs to enable(1-63), 0(default)"); +static inline int hpre_cluster_num(struct hisi_qm *qm) +{ + return (qm->ver >= QM_HW_V3) ? HPRE_CLUSTERS_NUM_V3 : + HPRE_CLUSTERS_NUM_V2; +} + +static inline int hpre_cluster_core_mask(struct hisi_qm *qm) +{ + return (qm->ver >= QM_HW_V3) ? + HPRE_CLUSTER_CORE_MASK_V3 : HPRE_CLUSTER_CORE_MASK_V2; +} + struct hisi_qp *hpre_create_qp(u8 type) { int node = cpu_to_node(smp_processor_id()); @@ -317,8 +324,8 @@ static int hpre_cfg_by_dsm(struct hisi_qm *qm) static int hpre_set_cluster(struct hisi_qm *qm) { - u32 cluster_core_mask = HPRE_CLUSTER_CORE_MASK(qm); - u8 clusters_num = HPRE_CLUSTERS_NUM(qm); + u32 cluster_core_mask = hpre_cluster_core_mask(qm); + u8 clusters_num = hpre_cluster_num(qm); struct device *dev = &qm->pdev->dev; unsigned long offset; u32 val = 0; @@ -424,7 +431,7 @@ static int hpre_set_user_domain_and_cache(struct hisi_qm *qm) static void hpre_cnt_regs_clear(struct hisi_qm *qm) { - u8 clusters_num = HPRE_CLUSTERS_NUM(qm); + u8 clusters_num = hpre_cluster_num(qm); unsigned long offset; int i; @@ -677,7 +684,7 @@ static int hpre_pf_comm_regs_debugfs_init(struct hisi_qm *qm) static int hpre_cluster_debugfs_init(struct hisi_qm *qm) { - u8 clusters_num = HPRE_CLUSTERS_NUM(qm); + u8 clusters_num = hpre_cluster_num(qm); struct device *dev = &qm->pdev->dev; char buf[HPRE_DBGFS_VAL_MAX_LEN]; struct debugfs_regset32 *regset; From b94c910afda050a9e95465ff0c4fe2548ea5ac0a Mon Sep 17 00:00:00 2001 From: Hui Tang Date: Wed, 12 May 2021 14:27:07 +0800 Subject: [PATCH 038/142] crypto: hisilicon/hpre - remove the macro of 'HPRE_DEV' Remove complex macro of 'HPRE_DEV' and replace with the initialized device pointer. Signed-off-by: Hui Tang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_crypto.c | 53 ++++++++++----------- 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c index db00e9f763da0..3d0832b9c6134 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c +++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c @@ -30,7 +30,6 @@ struct hpre_ctx; #define HPRE_DH_G_FLAG 0x02 #define HPRE_TRY_SEND_TIMES 100 #define HPRE_INVLD_REQ_ID (-1) -#define HPRE_DEV(ctx) (&((ctx)->qp->qm->pdev->dev)) #define HPRE_SQE_ALG_BITS 5 #define HPRE_SQE_DONE_SHIFT 30 @@ -102,6 +101,7 @@ struct hpre_curve25519_ctx { struct hpre_ctx { struct hisi_qp *qp; + struct device *dev; struct hpre_asym_request **req_list; struct hpre *hpre; spinlock_t req_lock; @@ -214,8 +214,7 @@ static int hpre_get_data_dma_addr(struct hpre_asym_request *hpre_req, struct scatterlist *data, unsigned int len, int is_src, dma_addr_t *tmp) { - struct hpre_ctx *ctx = hpre_req->ctx; - struct device *dev = HPRE_DEV(ctx); + struct device *dev = hpre_req->ctx->dev; enum dma_data_direction dma_dir; if (is_src) { @@ -239,7 +238,7 @@ static int hpre_prepare_dma_buf(struct hpre_asym_request *hpre_req, int is_src, dma_addr_t *tmp) { struct hpre_ctx *ctx = hpre_req->ctx; - struct device *dev = HPRE_DEV(ctx); + struct device *dev = ctx->dev; void *ptr; int shift; @@ -293,7 +292,7 @@ static void hpre_hw_data_clr_all(struct hpre_ctx *ctx, struct scatterlist *dst, struct scatterlist *src) { - struct device *dev = HPRE_DEV(ctx); + struct device *dev = ctx->dev; struct hpre_sqe *sqe = &req->req; dma_addr_t tmp; @@ -325,7 +324,6 @@ static void hpre_hw_data_clr_all(struct hpre_ctx *ctx, static int hpre_alg_res_post_hf(struct hpre_ctx *ctx, struct hpre_sqe *sqe, void **kreq) { - struct device *dev = HPRE_DEV(ctx); struct hpre_asym_request *req; unsigned int err, done, alg; int id; @@ -350,7 +348,7 @@ static int hpre_alg_res_post_hf(struct hpre_ctx *ctx, struct hpre_sqe *sqe, return 0; alg = le32_to_cpu(sqe->dw0) & HREE_ALG_TYPE_MASK; - dev_err_ratelimited(dev, "alg[0x%x] error: done[0x%x], etype[0x%x]\n", + dev_err_ratelimited(ctx->dev, "alg[0x%x] error: done[0x%x], etype[0x%x]\n", alg, done, err); return -EINVAL; @@ -365,6 +363,7 @@ static int hpre_ctx_set(struct hpre_ctx *ctx, struct hisi_qp *qp, int qlen) spin_lock_init(&ctx->req_lock); ctx->qp = qp; + ctx->dev = &qp->qm->pdev->dev; hpre = container_of(ctx->qp->qm, struct hpre, qm); ctx->hpre = hpre; @@ -631,7 +630,7 @@ static int hpre_is_dh_params_length_valid(unsigned int key_sz) static int hpre_dh_set_params(struct hpre_ctx *ctx, struct dh *params) { - struct device *dev = HPRE_DEV(ctx); + struct device *dev = ctx->dev; unsigned int sz; if (params->p_size > HPRE_DH_MAX_P_SZ) @@ -670,7 +669,7 @@ static int hpre_dh_set_params(struct hpre_ctx *ctx, struct dh *params) static void hpre_dh_clear_ctx(struct hpre_ctx *ctx, bool is_clear_all) { - struct device *dev = HPRE_DEV(ctx); + struct device *dev = ctx->dev; unsigned int sz = ctx->key_sz; if (is_clear_all) @@ -883,18 +882,18 @@ static int hpre_rsa_set_n(struct hpre_ctx *ctx, const char *value, if (!hpre_rsa_key_size_is_support(ctx->key_sz)) return 0; - ctx->rsa.pubkey = dma_alloc_coherent(HPRE_DEV(ctx), vlen << 1, + ctx->rsa.pubkey = dma_alloc_coherent(ctx->dev, vlen << 1, &ctx->rsa.dma_pubkey, GFP_KERNEL); if (!ctx->rsa.pubkey) return -ENOMEM; if (private) { - ctx->rsa.prikey = dma_alloc_coherent(HPRE_DEV(ctx), vlen << 1, + ctx->rsa.prikey = dma_alloc_coherent(ctx->dev, vlen << 1, &ctx->rsa.dma_prikey, GFP_KERNEL); if (!ctx->rsa.prikey) { - dma_free_coherent(HPRE_DEV(ctx), vlen << 1, + dma_free_coherent(ctx->dev, vlen << 1, ctx->rsa.pubkey, ctx->rsa.dma_pubkey); ctx->rsa.pubkey = NULL; @@ -956,7 +955,7 @@ static int hpre_crt_para_get(char *para, size_t para_sz, static int hpre_rsa_setkey_crt(struct hpre_ctx *ctx, struct rsa_key *rsa_key) { unsigned int hlf_ksz = ctx->key_sz >> 1; - struct device *dev = HPRE_DEV(ctx); + struct device *dev = ctx->dev; u64 offset; int ret; @@ -1014,7 +1013,7 @@ static int hpre_rsa_setkey_crt(struct hpre_ctx *ctx, struct rsa_key *rsa_key) static void hpre_rsa_clear_ctx(struct hpre_ctx *ctx, bool is_clear_all) { unsigned int half_key_sz = ctx->key_sz >> 1; - struct device *dev = HPRE_DEV(ctx); + struct device *dev = ctx->dev; if (is_clear_all) hisi_qm_stop_qp(ctx->qp); @@ -1185,7 +1184,7 @@ static void hpre_key_to_big_end(u8 *data, int len) static void hpre_ecc_clear_ctx(struct hpre_ctx *ctx, bool is_clear_all, bool is_ecdh) { - struct device *dev = HPRE_DEV(ctx); + struct device *dev = ctx->dev; unsigned int sz = ctx->key_sz; unsigned int shift = sz << 1; @@ -1287,7 +1286,7 @@ static unsigned int hpre_ecdh_get_curvesz(unsigned short id) static int hpre_ecdh_set_param(struct hpre_ctx *ctx, struct ecdh *params) { - struct device *dev = HPRE_DEV(ctx); + struct device *dev = ctx->dev; unsigned int sz, shift, curve_sz; int ret; @@ -1338,7 +1337,7 @@ static int hpre_ecdh_set_secret(struct crypto_kpp *tfm, const void *buf, unsigned int len) { struct hpre_ctx *ctx = kpp_tfm_ctx(tfm); - struct device *dev = HPRE_DEV(ctx); + struct device *dev = ctx->dev; unsigned int sz, sz_shift; struct ecdh params; int ret; @@ -1373,7 +1372,7 @@ static void hpre_ecdh_hw_data_clr_all(struct hpre_ctx *ctx, struct scatterlist *dst, struct scatterlist *src) { - struct device *dev = HPRE_DEV(ctx); + struct device *dev = ctx->dev; struct hpre_sqe *sqe = &req->req; dma_addr_t dma; @@ -1462,7 +1461,7 @@ static int hpre_ecdh_src_data_init(struct hpre_asym_request *hpre_req, { struct hpre_sqe *msg = &hpre_req->req; struct hpre_ctx *ctx = hpre_req->ctx; - struct device *dev = HPRE_DEV(ctx); + struct device *dev = ctx->dev; unsigned int tmpshift; dma_addr_t dma = 0; void *ptr; @@ -1492,7 +1491,7 @@ static int hpre_ecdh_dst_data_init(struct hpre_asym_request *hpre_req, { struct hpre_sqe *msg = &hpre_req->req; struct hpre_ctx *ctx = hpre_req->ctx; - struct device *dev = HPRE_DEV(ctx); + struct device *dev = ctx->dev; dma_addr_t dma = 0; if (unlikely(!data || !sg_is_last(data) || len != ctx->key_sz << 1)) { @@ -1515,7 +1514,7 @@ static int hpre_ecdh_compute_value(struct kpp_request *req) { struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); struct hpre_ctx *ctx = kpp_tfm_ctx(tfm); - struct device *dev = HPRE_DEV(ctx); + struct device *dev = ctx->dev; void *tmp = kpp_request_ctx(req); struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ); struct hpre_sqe *msg = &hpre_req->req; @@ -1621,7 +1620,7 @@ static void hpre_curve25519_fill_curve(struct hpre_ctx *ctx, const void *buf, static int hpre_curve25519_set_param(struct hpre_ctx *ctx, const void *buf, unsigned int len) { - struct device *dev = HPRE_DEV(ctx); + struct device *dev = ctx->dev; unsigned int sz = ctx->key_sz; unsigned int shift = sz << 1; @@ -1646,7 +1645,7 @@ static int hpre_curve25519_set_secret(struct crypto_kpp *tfm, const void *buf, unsigned int len) { struct hpre_ctx *ctx = kpp_tfm_ctx(tfm); - struct device *dev = HPRE_DEV(ctx); + struct device *dev = ctx->dev; int ret = -EINVAL; if (len != CURVE25519_KEY_SIZE || @@ -1674,7 +1673,7 @@ static void hpre_curve25519_hw_data_clr_all(struct hpre_ctx *ctx, struct scatterlist *dst, struct scatterlist *src) { - struct device *dev = HPRE_DEV(ctx); + struct device *dev = ctx->dev; struct hpre_sqe *sqe = &req->req; dma_addr_t dma; @@ -1770,7 +1769,7 @@ static int hpre_curve25519_src_init(struct hpre_asym_request *hpre_req, { struct hpre_sqe *msg = &hpre_req->req; struct hpre_ctx *ctx = hpre_req->ctx; - struct device *dev = HPRE_DEV(ctx); + struct device *dev = ctx->dev; u8 p[CURVE25519_KEY_SIZE] = { 0 }; const struct ecc_curve *curve; dma_addr_t dma = 0; @@ -1825,7 +1824,7 @@ static int hpre_curve25519_dst_init(struct hpre_asym_request *hpre_req, { struct hpre_sqe *msg = &hpre_req->req; struct hpre_ctx *ctx = hpre_req->ctx; - struct device *dev = HPRE_DEV(ctx); + struct device *dev = ctx->dev; dma_addr_t dma = 0; if (!data || !sg_is_last(data) || len != ctx->key_sz) { @@ -1848,7 +1847,7 @@ static int hpre_curve25519_compute_value(struct kpp_request *req) { struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); struct hpre_ctx *ctx = kpp_tfm_ctx(tfm); - struct device *dev = HPRE_DEV(ctx); + struct device *dev = ctx->dev; void *tmp = kpp_request_ctx(req); struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ); struct hpre_sqe *msg = &hpre_req->req; From 58be5ce3461e3fd623091d0bdc1080d0e4df2859 Mon Sep 17 00:00:00 2001 From: Hui Tang Date: Wed, 12 May 2021 14:27:08 +0800 Subject: [PATCH 039/142] crypto: hisilicon/hpre - delete rudundant initialization Delete rudundant variable initialization. Signed-off-by: Hui Tang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_crypto.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c index 3d0832b9c6134..bc93cc926e225 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c +++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c @@ -1492,7 +1492,7 @@ static int hpre_ecdh_dst_data_init(struct hpre_asym_request *hpre_req, struct hpre_sqe *msg = &hpre_req->req; struct hpre_ctx *ctx = hpre_req->ctx; struct device *dev = ctx->dev; - dma_addr_t dma = 0; + dma_addr_t dma; if (unlikely(!data || !sg_is_last(data) || len != ctx->key_sz << 1)) { dev_err(dev, "data or data length is illegal!\n"); @@ -1825,7 +1825,7 @@ static int hpre_curve25519_dst_init(struct hpre_asym_request *hpre_req, struct hpre_sqe *msg = &hpre_req->req; struct hpre_ctx *ctx = hpre_req->ctx; struct device *dev = ctx->dev; - dma_addr_t dma = 0; + dma_addr_t dma; if (!data || !sg_is_last(data) || len != ctx->key_sz) { dev_err(dev, "data or data length is illegal!\n"); From 82119db8cacb3921ab95e3f078c08c4bffacef15 Mon Sep 17 00:00:00 2001 From: Hui Tang Date: Wed, 12 May 2021 14:27:09 +0800 Subject: [PATCH 040/142] crypto: hisilicon/hpre - use 'GENMASK' to generate mask value Use 'GENMASK' to generate mask value, just make the code clearer. Signed-off-by: Hui Tang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_crypto.c | 6 +++--- drivers/crypto/hisilicon/hpre/hpre_main.c | 14 +++++++------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c index bc93cc926e225..7449632986c5f 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c +++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c @@ -330,9 +330,9 @@ static int hpre_alg_res_post_hf(struct hpre_ctx *ctx, struct hpre_sqe *sqe, #define HPRE_NO_HW_ERR 0 #define HPRE_HW_TASK_DONE 3 -#define HREE_HW_ERR_MASK 0x7ff -#define HREE_SQE_DONE_MASK 0x3 -#define HREE_ALG_TYPE_MASK 0x1f +#define HREE_HW_ERR_MASK GENMASK(10, 0) +#define HREE_SQE_DONE_MASK GENMASK(1, 0) +#define HREE_ALG_TYPE_MASK GENMASK(4, 0) id = (int)le16_to_cpu(sqe->tag); req = ctx->req_list[id]; hpre_rm_req_from_ctx(req); diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index 1e7d1fb382ed0..46c24f90693b3 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -36,7 +36,7 @@ #define HPRE_INT_MASK 0x301400 #define HPRE_INT_STATUS 0x301800 #define HPRE_CORE_INT_ENABLE 0 -#define HPRE_CORE_INT_DISABLE 0x003fffff +#define HPRE_CORE_INT_DISABLE GENMASK(21, 0) #define HPRE_RDCHN_INI_ST 0x301a00 #define HPRE_CLSTR_BASE 0x302000 #define HPRE_CORE_EN_OFFSET 0x04 @@ -69,12 +69,12 @@ #define HPRE_DBGFS_VAL_MAX_LEN 20 #define HPRE_PCI_DEVICE_ID 0xa258 #define HPRE_PCI_VF_DEVICE_ID 0xa259 -#define HPRE_QM_USR_CFG_MASK 0xfffffffe -#define HPRE_QM_AXI_CFG_MASK 0xffff -#define HPRE_QM_VFG_AX_MASK 0xff -#define HPRE_BD_USR_MASK 0x3 -#define HPRE_CLUSTER_CORE_MASK_V2 0xf -#define HPRE_CLUSTER_CORE_MASK_V3 0xff +#define HPRE_QM_USR_CFG_MASK GENMASK(31, 1) +#define HPRE_QM_AXI_CFG_MASK GENMASK(15, 0) +#define HPRE_QM_VFG_AX_MASK GENMASK(7, 0) +#define HPRE_BD_USR_MASK GENMASK(1, 0) +#define HPRE_CLUSTER_CORE_MASK_V2 GENMASK(3, 0) +#define HPRE_CLUSTER_CORE_MASK_V3 GENMASK(7, 0) #define HPRE_AM_OOO_SHUTDOWN_ENB 0x301044 #define HPRE_AM_OOO_SHUTDOWN_ENABLE BIT(0) From 0c176d8d7d970db6fed82db3495a73d10d2251fb Mon Sep 17 00:00:00 2001 From: Hui Tang Date: Wed, 12 May 2021 14:27:10 +0800 Subject: [PATCH 041/142] crypto: hisilicon/hpre - delete rudundant macro definition Delete rudundant macro definition. Signed-off-by: Hui Tang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index 46c24f90693b3..31515ae054f8a 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -57,7 +57,6 @@ #define HPRE_CORE_INI_STATUS (HPRE_CLSTR_BASE + HPRE_CORE_INI_STATUS_OFFSET) #define HPRE_HAC_ECC1_CNT 0x301a04 #define HPRE_HAC_ECC2_CNT 0x301a08 -#define HPRE_HAC_INT_STATUS 0x301800 #define HPRE_HAC_SOURCE_INT 0x301600 #define HPRE_CLSTR_ADDR_INTRVL 0x1000 #define HPRE_CLUSTER_INQURY 0x100 @@ -822,7 +821,7 @@ static void hpre_log_hw_error(struct hisi_qm *qm, u32 err_sts) static u32 hpre_get_hw_err_status(struct hisi_qm *qm) { - return readl(qm->io_base + HPRE_HAC_INT_STATUS); + return readl(qm->io_base + HPRE_INT_STATUS); } static void hpre_clear_hw_err_status(struct hisi_qm *qm, u32 err_sts) From 302e909cb22b5456ae71a9fd54b98ee0e6505613 Mon Sep 17 00:00:00 2001 From: Hui Tang Date: Wed, 12 May 2021 14:27:11 +0800 Subject: [PATCH 042/142] crypto: hisilicon/hpre - add 'default' for switch statement Return error immediately if it goto 'default' path. Signed-off-by: Hui Tang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_crypto.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c index 7449632986c5f..294c3688aabb0 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c +++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c @@ -623,9 +623,9 @@ static int hpre_is_dh_params_length_valid(unsigned int key_sz) case _HPRE_DH_GRP15: case _HPRE_DH_GRP16: return 0; + default: + return -EINVAL; } - - return -EINVAL; } static int hpre_dh_set_params(struct hpre_ctx *ctx, struct dh *params) From 5c93a2ebc7ad85046fab23f8ac297fc3a86bc903 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Fri, 14 May 2021 11:12:41 -0500 Subject: [PATCH 043/142] hwrng: omap - Enable driver for TI K3 family The TI K3 family of SoCs have a SA2UL IP that contains a SafeXcel IP-76 RNG block which is supported by the OMAP RNG driver. Allow this driver to be built for TI K3 family as well. Signed-off-by: Suman Anna Reviewed-by: Tero Kristo Signed-off-by: Herbert Xu --- drivers/char/hw_random/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig index 1fe006f3f12fa..6450074c0ad7d 100644 --- a/drivers/char/hw_random/Kconfig +++ b/drivers/char/hw_random/Kconfig @@ -165,7 +165,7 @@ config HW_RANDOM_IXP4XX config HW_RANDOM_OMAP tristate "OMAP Random Number Generator support" - depends on ARCH_OMAP16XX || ARCH_OMAP2PLUS || ARCH_MVEBU + depends on ARCH_OMAP16XX || ARCH_OMAP2PLUS || ARCH_MVEBU || ARCH_K3 default HW_RANDOM help This driver provides kernel-side support for the Random Number From 4c0716ee1d973f6504d13f0e8d4d10350c85ad37 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Fri, 14 May 2021 11:12:42 -0500 Subject: [PATCH 044/142] crypto: sa2ul - Fix leaks on failure paths with sa_dma_init() The sa_dma_init() function doesn't release the requested dma channels on all failure paths. Any failure in this function also ends up leaking the dma pool created in sa_init_mem() in the sa_ul_probe() function. Fix all of these issues. Fixes: 7694b6ca649f ("crypto: sa2ul - Add crypto driver") Signed-off-by: Suman Anna Reviewed-by: Tero Kristo Signed-off-by: Herbert Xu --- drivers/crypto/sa2ul.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/crypto/sa2ul.c b/drivers/crypto/sa2ul.c index 1c6929fb3a131..3d6f0af2f9388 100644 --- a/drivers/crypto/sa2ul.c +++ b/drivers/crypto/sa2ul.c @@ -2300,9 +2300,9 @@ static int sa_dma_init(struct sa_crypto_data *dd) dd->dma_rx2 = dma_request_chan(dd->dev, "rx2"); if (IS_ERR(dd->dma_rx2)) { - dma_release_channel(dd->dma_rx1); - return dev_err_probe(dd->dev, PTR_ERR(dd->dma_rx2), - "Unable to request rx2 DMA channel\n"); + ret = dev_err_probe(dd->dev, PTR_ERR(dd->dma_rx2), + "Unable to request rx2 DMA channel\n"); + goto err_dma_rx2; } dd->dma_tx = dma_request_chan(dd->dev, "tx"); @@ -2323,28 +2323,31 @@ static int sa_dma_init(struct sa_crypto_data *dd) if (ret) { dev_err(dd->dev, "can't configure IN dmaengine slave: %d\n", ret); - return ret; + goto err_dma_config; } ret = dmaengine_slave_config(dd->dma_rx2, &cfg); if (ret) { dev_err(dd->dev, "can't configure IN dmaengine slave: %d\n", ret); - return ret; + goto err_dma_config; } ret = dmaengine_slave_config(dd->dma_tx, &cfg); if (ret) { dev_err(dd->dev, "can't configure OUT dmaengine slave: %d\n", ret); - return ret; + goto err_dma_config; } return 0; +err_dma_config: + dma_release_channel(dd->dma_tx); err_dma_tx: - dma_release_channel(dd->dma_rx1); dma_release_channel(dd->dma_rx2); +err_dma_rx2: + dma_release_channel(dd->dma_rx1); return ret; } @@ -2414,7 +2417,7 @@ static int sa_ul_probe(struct platform_device *pdev) sa_init_mem(dev_data); ret = sa_dma_init(dev_data); if (ret) - goto disable_pm_runtime; + goto destroy_dma_pool; match = of_match_node(of_match, dev->of_node); if (!match) { @@ -2454,9 +2457,9 @@ static int sa_ul_probe(struct platform_device *pdev) dma_release_channel(dev_data->dma_rx1); dma_release_channel(dev_data->dma_tx); +destroy_dma_pool: dma_pool_destroy(dev_data->sc_pool); -disable_pm_runtime: pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); From 5c8552325e013cbdabc443cd1f1b4d03c4a2e64e Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Fri, 14 May 2021 11:12:43 -0500 Subject: [PATCH 045/142] crypto: sa2ul - Fix pm_runtime enable in sa_ul_probe() The pm_runtime APIs added first in commit 7694b6ca649f ("crypto: sa2ul - Add crypto driver") are not unwound properly and was fixed up partially in commit 13343badae09 ("crypto: sa2ul - Fix PM reference leak in sa_ul_probe()"). This fixed up the pm_runtime usage count but not the state. Fix this properly. Fixes: 13343badae09 ("crypto: sa2ul - Fix PM reference leak in sa_ul_probe()") Signed-off-by: Suman Anna Reviewed-by: Tero Kristo Signed-off-by: Herbert Xu --- drivers/crypto/sa2ul.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/sa2ul.c b/drivers/crypto/sa2ul.c index 3d6f0af2f9388..a215daedf78af 100644 --- a/drivers/crypto/sa2ul.c +++ b/drivers/crypto/sa2ul.c @@ -2411,6 +2411,7 @@ static int sa_ul_probe(struct platform_device *pdev) if (ret < 0) { dev_err(&pdev->dev, "%s: failed to get sync: %d\n", __func__, ret); + pm_runtime_disable(dev); return ret; } From d699c5d0bd811e48de72aeeb8e3872c63e957745 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Fri, 14 May 2021 11:12:44 -0500 Subject: [PATCH 046/142] crypto: sa2ul - Use of_device_get_match_data() helper Simplify the probe function by using the of_device_get_match_data() helper instead of open coding. The logic is also moved up to fix the missing pm_runtime cleanup in case of a match failure. Fixes: 0bc42311cdff ("crypto: sa2ul - Add support for AM64") Signed-off-by: Suman Anna Reviewed-by: Tero Kristo Signed-off-by: Herbert Xu --- drivers/crypto/sa2ul.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/crypto/sa2ul.c b/drivers/crypto/sa2ul.c index a215daedf78af..9f077ec9dbb7f 100644 --- a/drivers/crypto/sa2ul.c +++ b/drivers/crypto/sa2ul.c @@ -2388,7 +2388,6 @@ MODULE_DEVICE_TABLE(of, of_match); static int sa_ul_probe(struct platform_device *pdev) { - const struct of_device_id *match; struct device *dev = &pdev->dev; struct device_node *node = dev->of_node; struct resource *res; @@ -2400,6 +2399,10 @@ static int sa_ul_probe(struct platform_device *pdev) if (!dev_data) return -ENOMEM; + dev_data->match_data = of_device_get_match_data(dev); + if (!dev_data->match_data) + return -ENODEV; + sa_k3_dev = dev; dev_data->dev = dev; dev_data->pdev = pdev; @@ -2420,13 +2423,6 @@ static int sa_ul_probe(struct platform_device *pdev) if (ret) goto destroy_dma_pool; - match = of_match_node(of_match, dev->of_node); - if (!match) { - dev_err(dev, "No compatible match found\n"); - return -ENODEV; - } - dev_data->match_data = match->data; - spin_lock_init(&dev_data->scid_lock); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); saul_base = devm_ioremap_resource(dev, res); From daeec7388eb2c5dbff17630b76c22786ffa1e55a Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Fri, 14 May 2021 11:12:45 -0500 Subject: [PATCH 047/142] crypto: sa2ul - Use devm_platform_ioremap_resource() Simplify the platform_get_resource() and devm_ioremap_resource() calls with devm_platform_ioremap_resource(). Also add error checking and move up this block to simplify the cleanup in sa_ul_probe(). Signed-off-by: Suman Anna Reviewed-by: Tero Kristo Signed-off-by: Herbert Xu --- drivers/crypto/sa2ul.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/crypto/sa2ul.c b/drivers/crypto/sa2ul.c index 9f077ec9dbb7f..216702fef9451 100644 --- a/drivers/crypto/sa2ul.c +++ b/drivers/crypto/sa2ul.c @@ -2390,7 +2390,6 @@ static int sa_ul_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct device_node *node = dev->of_node; - struct resource *res; static void __iomem *saul_base; struct sa_crypto_data *dev_data; int ret; @@ -2403,9 +2402,14 @@ static int sa_ul_probe(struct platform_device *pdev) if (!dev_data->match_data) return -ENODEV; + saul_base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(saul_base)) + return PTR_ERR(saul_base); + sa_k3_dev = dev; dev_data->dev = dev; dev_data->pdev = pdev; + dev_data->base = saul_base; platform_set_drvdata(pdev, dev_data); dev_set_drvdata(sa_k3_dev, dev_data); @@ -2424,10 +2428,6 @@ static int sa_ul_probe(struct platform_device *pdev) goto destroy_dma_pool; spin_lock_init(&dev_data->scid_lock); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - saul_base = devm_ioremap_resource(dev, res); - - dev_data->base = saul_base; if (!dev_data->match_data->skip_engine_control) { u32 val = SA_EEC_ENCSS_EN | SA_EEC_AUTHSS_EN | SA_EEC_CTXCACH_EN | From c858401cb4a884b840fa3214b8999e8feba3a59b Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Fri, 14 May 2021 11:12:46 -0500 Subject: [PATCH 048/142] crypto: sa2ul - Remove child devices in remove The sa_ul_probe creates child devices using of_platform_populate(), but these are not cleaned up in driver remove. Clean these up by removing the child devices using of_platform_depopulate(). Signed-off-by: Suman Anna Reviewed-by: Tero Kristo Signed-off-by: Herbert Xu --- drivers/crypto/sa2ul.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/crypto/sa2ul.c b/drivers/crypto/sa2ul.c index 216702fef9451..51bb69bc573c3 100644 --- a/drivers/crypto/sa2ul.c +++ b/drivers/crypto/sa2ul.c @@ -2467,6 +2467,8 @@ static int sa_ul_remove(struct platform_device *pdev) { struct sa_crypto_data *dev_data = platform_get_drvdata(pdev); + of_platform_depopulate(&pdev->dev); + sa_unregister_algos(&pdev->dev); dma_release_channel(dev_data->dma_rx2); From dbbc5c06955cb9a56aed51170040a3967b79371d Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Sat, 15 May 2021 18:44:37 +0800 Subject: [PATCH 049/142] crypto: hisilicon/qm - initialize the device before doing tasks The device needs to be initialized first, and then restart the queue to execute tasks after PF reset. Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index ce439a0c66c9e..6a9c18f948769 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -4085,6 +4085,14 @@ void hisi_qm_reset_done(struct pci_dev *pdev) struct hisi_qm *qm = pci_get_drvdata(pdev); int ret; + if (qm->fun_type == QM_HW_PF) { + ret = qm_dev_hw_init(qm); + if (ret) { + pci_err(pdev, "Failed to init PF, ret = %d.\n", ret); + goto flr_done; + } + } + hisi_qm_dev_err_init(pf_qm); ret = qm_restart(qm); @@ -4094,12 +4102,6 @@ void hisi_qm_reset_done(struct pci_dev *pdev) } if (qm->fun_type == QM_HW_PF) { - ret = qm_dev_hw_init(qm); - if (ret) { - pci_err(pdev, "Failed to init PF, ret = %d.\n", ret); - goto flr_done; - } - if (!qm->vfs_num) goto flr_done; From 3121f021c00aeed599d6f5d1c737b1bc8e6a05d8 Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Sat, 15 May 2021 18:44:38 +0800 Subject: [PATCH 050/142] crypto: hisilicon/qm - modify 'QM_RESETTING' clearing error Before device reset, the driver sets 'QM_RESETTING' flag, but after reset, the wrong flag is cleared. This patch modifies the problem of inconsistent flags. Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 6a9c18f948769..09f0370881ab8 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -4122,7 +4122,7 @@ void hisi_qm_reset_done(struct pci_dev *pdev) if (qm_flr_reset_complete(pdev)) pci_info(pdev, "FLR reset complete\n"); - clear_bit(QM_RESETTING, &qm->misc_ctl); + clear_bit(QM_RESETTING, &pf_qm->misc_ctl); } EXPORT_SYMBOL_GPL(hisi_qm_reset_done); From 3b9c24dec891d418e26032709d6f01fe3757a4a6 Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Sat, 15 May 2021 18:44:39 +0800 Subject: [PATCH 051/142] crypto: hisilicon/qm - adjust order of device error configuration In order to avoid reporting an exception but the error type is not configured, the driver needs to configure the error type first, and then enable the error interrupt. Before executing the task, hardware error initialization is needed so that the hardware can detect the error in time. Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_main.c | 6 ++++-- drivers/crypto/hisilicon/qm.c | 2 +- drivers/crypto/hisilicon/sec2/sec_main.c | 24 ++++++++++------------- drivers/crypto/hisilicon/zip/zip_main.c | 6 +++--- 4 files changed, 18 insertions(+), 20 deletions(-) diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index 31515ae054f8a..1f47bb5e39902 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -466,8 +466,7 @@ static void hpre_hw_error_enable(struct hisi_qm *qm) /* clear HPRE hw error source if having */ writel(HPRE_CORE_INT_DISABLE, qm->io_base + HPRE_HAC_SOURCE_INT); - /* enable hpre hw error interrupts */ - writel(HPRE_CORE_INT_ENABLE, qm->io_base + HPRE_INT_MASK); + /* configure error type */ writel(HPRE_HAC_RAS_CE_ENABLE, qm->io_base + HPRE_RAS_CE_ENB); writel(HPRE_HAC_RAS_NFE_ENABLE, qm->io_base + HPRE_RAS_NFE_ENB); writel(HPRE_HAC_RAS_FE_ENABLE, qm->io_base + HPRE_RAS_FE_ENB); @@ -476,6 +475,9 @@ static void hpre_hw_error_enable(struct hisi_qm *qm) val = readl(qm->io_base + HPRE_AM_OOO_SHUTDOWN_ENB); val |= HPRE_AM_OOO_SHUTDOWN_ENABLE; writel(val, qm->io_base + HPRE_AM_OOO_SHUTDOWN_ENB); + + /* enable hpre hw error interrupts */ + writel(HPRE_CORE_INT_ENABLE, qm->io_base + HPRE_INT_MASK); } static inline struct hisi_qm *hpre_file_to_qm(struct hpre_debugfs_file *file) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 09f0370881ab8..04c09053ea11c 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -3917,6 +3917,7 @@ static int qm_controller_reset_done(struct hisi_qm *qm) } qm_restart_prepare(qm); + hisi_qm_dev_err_init(qm); ret = qm_restart(qm); if (ret) { @@ -3938,7 +3939,6 @@ static int qm_controller_reset_done(struct hisi_qm *qm) return -EPERM; } - hisi_qm_dev_err_init(qm); qm_restart_done(qm); clear_bit(QM_RESETTING, &qm->misc_ctl); diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 6f0062d4408c3..e57167da6be0f 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -440,43 +440,39 @@ static void sec_hw_error_enable(struct hisi_qm *qm) return; } - val = readl(qm->io_base + SEC_CONTROL_REG); - /* clear SEC hw error source if having */ writel(SEC_CORE_INT_CLEAR, qm->io_base + SEC_CORE_INT_SOURCE); - /* enable SEC hw error interrupts */ - writel(SEC_CORE_INT_ENABLE, qm->io_base + SEC_CORE_INT_MASK); - /* enable RAS int */ writel(SEC_RAS_CE_ENB_MSK, qm->io_base + SEC_RAS_CE_REG); writel(SEC_RAS_FE_ENB_MSK, qm->io_base + SEC_RAS_FE_REG); writel(SEC_RAS_NFE_ENB_MSK, qm->io_base + SEC_RAS_NFE_REG); /* enable SEC block master OOO when m-bit error occur */ + val = readl(qm->io_base + SEC_CONTROL_REG); val = val | SEC_AXI_SHUTDOWN_ENABLE; - writel(val, qm->io_base + SEC_CONTROL_REG); + + /* enable SEC hw error interrupts */ + writel(SEC_CORE_INT_ENABLE, qm->io_base + SEC_CORE_INT_MASK); } static void sec_hw_error_disable(struct hisi_qm *qm) { u32 val; - val = readl(qm->io_base + SEC_CONTROL_REG); - - /* disable RAS int */ - writel(SEC_RAS_DISABLE, qm->io_base + SEC_RAS_CE_REG); - writel(SEC_RAS_DISABLE, qm->io_base + SEC_RAS_FE_REG); - writel(SEC_RAS_DISABLE, qm->io_base + SEC_RAS_NFE_REG); - /* disable SEC hw error interrupts */ writel(SEC_CORE_INT_DISABLE, qm->io_base + SEC_CORE_INT_MASK); /* disable SEC block master OOO when m-bit error occur */ + val = readl(qm->io_base + SEC_CONTROL_REG); val = val & SEC_AXI_SHUTDOWN_DISABLE; - writel(val, qm->io_base + SEC_CONTROL_REG); + + /* disable RAS int */ + writel(SEC_RAS_DISABLE, qm->io_base + SEC_RAS_CE_REG); + writel(SEC_RAS_DISABLE, qm->io_base + SEC_RAS_FE_REG); + writel(SEC_RAS_DISABLE, qm->io_base + SEC_RAS_NFE_REG); } static u32 sec_clear_enable_read(struct sec_debug_file *file) diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index 2178b40e9f825..8e3a52218774a 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -333,13 +333,13 @@ static void hisi_zip_hw_error_enable(struct hisi_qm *qm) writel(HZIP_CORE_INT_RAS_NFE_ENABLE, qm->io_base + HZIP_CORE_INT_RAS_NFE_ENB); - /* enable ZIP hw error interrupts */ - writel(0, qm->io_base + HZIP_CORE_INT_MASK_REG); - /* enable ZIP block master OOO when m-bit error occur */ val = readl(qm->io_base + HZIP_SOFT_CTRL_ZIP_CONTROL); val = val | HZIP_AXI_SHUTDOWN_ENABLE; writel(val, qm->io_base + HZIP_SOFT_CTRL_ZIP_CONTROL); + + /* enable ZIP hw error interrupts */ + writel(0, qm->io_base + HZIP_CORE_INT_MASK_REG); } static void hisi_zip_hw_error_disable(struct hisi_qm *qm) From b7da13d092a4919823c2b260ca7ea6ef1690b80b Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Sat, 15 May 2021 18:44:40 +0800 Subject: [PATCH 052/142] crypto: hisilicon/qm - enable to close master ooo when NFE occurs Kunpeng930 could be able to close master ooo when NFE occurs, which will disable memory accessing from device and execute tasks. This ensures that errors do not spread. This patch enables the hardware to close master ooo when an error occurs by writing hardware registers, and ensures that the driver will not drain qp because the hardware will empty the tasks automatically. Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_main.c | 36 ++++--- drivers/crypto/hisilicon/qm.c | 118 ++++++++++++++-------- drivers/crypto/hisilicon/sec2/sec_main.c | 36 ++++--- drivers/crypto/hisilicon/zip/zip_main.c | 36 ++++--- 4 files changed, 150 insertions(+), 76 deletions(-) diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index 1f47bb5e39902..13323baf393e8 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -50,6 +50,7 @@ #define HPRE_RAS_NFE_ENB 0x301414 #define HPRE_HAC_RAS_NFE_ENABLE 0x3ffffe #define HPRE_RAS_FE_ENB 0x301418 +#define HPRE_OOO_SHUTDOWN_SEL 0x301a3c #define HPRE_HAC_RAS_FE_ENABLE 0 #define HPRE_CORE_ENB (HPRE_CLSTR_BASE + HPRE_CORE_EN_OFFSET) @@ -446,23 +447,36 @@ static void hpre_cnt_regs_clear(struct hisi_qm *qm) hisi_qm_debug_regs_clear(qm); } -static void hpre_hw_error_disable(struct hisi_qm *qm) +static void hpre_master_ooo_ctrl(struct hisi_qm *qm, bool enable) { - u32 val; + u32 val1, val2; + + val1 = readl(qm->io_base + HPRE_AM_OOO_SHUTDOWN_ENB); + if (enable) { + val1 |= HPRE_AM_OOO_SHUTDOWN_ENABLE; + val2 = HPRE_HAC_RAS_NFE_ENABLE; + } else { + val1 &= ~HPRE_AM_OOO_SHUTDOWN_ENABLE; + val2 = 0x0; + } + + if (qm->ver > QM_HW_V2) + writel(val2, qm->io_base + HPRE_OOO_SHUTDOWN_SEL); + writel(val1, qm->io_base + HPRE_AM_OOO_SHUTDOWN_ENB); +} + +static void hpre_hw_error_disable(struct hisi_qm *qm) +{ /* disable hpre hw error interrupts */ writel(HPRE_CORE_INT_DISABLE, qm->io_base + HPRE_INT_MASK); - /* disable HPRE block master OOO when m-bit error occur */ - val = readl(qm->io_base + HPRE_AM_OOO_SHUTDOWN_ENB); - val &= ~HPRE_AM_OOO_SHUTDOWN_ENABLE; - writel(val, qm->io_base + HPRE_AM_OOO_SHUTDOWN_ENB); + /* disable HPRE block master OOO when nfe occurs on Kunpeng930 */ + hpre_master_ooo_ctrl(qm, false); } static void hpre_hw_error_enable(struct hisi_qm *qm) { - u32 val; - /* clear HPRE hw error source if having */ writel(HPRE_CORE_INT_DISABLE, qm->io_base + HPRE_HAC_SOURCE_INT); @@ -471,10 +485,8 @@ static void hpre_hw_error_enable(struct hisi_qm *qm) writel(HPRE_HAC_RAS_NFE_ENABLE, qm->io_base + HPRE_RAS_NFE_ENB); writel(HPRE_HAC_RAS_FE_ENABLE, qm->io_base + HPRE_RAS_FE_ENB); - /* enable HPRE block master OOO when m-bit error occur */ - val = readl(qm->io_base + HPRE_AM_OOO_SHUTDOWN_ENB); - val |= HPRE_AM_OOO_SHUTDOWN_ENABLE; - writel(val, qm->io_base + HPRE_AM_OOO_SHUTDOWN_ENB); + /* enable HPRE block master OOO when nfe occurs on Kunpeng930 */ + hpre_master_ooo_ctrl(qm, true); /* enable hpre hw error interrupts */ writel(HPRE_CORE_INT_ENABLE, qm->io_base + HPRE_INT_MASK); diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 04c09053ea11c..8f7ea504ce80c 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -155,6 +155,7 @@ #define QM_RAS_CE_THRESHOLD 0x1000f8 #define QM_RAS_CE_TIMES_PER_IRQ 1 #define QM_RAS_MSI_INT_SEL 0x1040f4 +#define QM_OOO_SHUTDOWN_SEL 0x1040f8 #define QM_RESET_WAIT_TIMEOUT 400 #define QM_PEH_VENDOR_ID 0x1000d8 @@ -1623,13 +1624,9 @@ static void qm_hw_error_init_v1(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe) writel(QM_ABNORMAL_INT_MASK_VALUE, qm->io_base + QM_ABNORMAL_INT_MASK); } -static void qm_hw_error_init_v2(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe) +static void qm_hw_error_cfg(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe) { - u32 irq_enable = ce | nfe | fe; - u32 irq_unmask = ~irq_enable; - qm->error_mask = ce | nfe | fe; - /* clear QM hw residual error source */ writel(QM_ABNORMAL_INT_SOURCE_CLR, qm->io_base + QM_ABNORMAL_INT_SOURCE); @@ -1639,6 +1636,14 @@ static void qm_hw_error_init_v2(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe) writel(QM_RAS_CE_TIMES_PER_IRQ, qm->io_base + QM_RAS_CE_THRESHOLD); writel(nfe, qm->io_base + QM_RAS_NFE_ENABLE); writel(fe, qm->io_base + QM_RAS_FE_ENABLE); +} + +static void qm_hw_error_init_v2(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe) +{ + u32 irq_enable = ce | nfe | fe; + u32 irq_unmask = ~irq_enable; + + qm_hw_error_cfg(qm, ce, nfe, fe); irq_unmask &= readl(qm->io_base + QM_ABNORMAL_INT_MASK); writel(irq_unmask, qm->io_base + QM_ABNORMAL_INT_MASK); @@ -1649,6 +1654,28 @@ static void qm_hw_error_uninit_v2(struct hisi_qm *qm) writel(QM_ABNORMAL_INT_MASK_VALUE, qm->io_base + QM_ABNORMAL_INT_MASK); } +static void qm_hw_error_init_v3(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe) +{ + u32 irq_enable = ce | nfe | fe; + u32 irq_unmask = ~irq_enable; + + qm_hw_error_cfg(qm, ce, nfe, fe); + + /* enable close master ooo when hardware error happened */ + writel(nfe & (~QM_DB_RANDOM_INVALID), qm->io_base + QM_OOO_SHUTDOWN_SEL); + + irq_unmask &= readl(qm->io_base + QM_ABNORMAL_INT_MASK); + writel(irq_unmask, qm->io_base + QM_ABNORMAL_INT_MASK); +} + +static void qm_hw_error_uninit_v3(struct hisi_qm *qm) +{ + writel(QM_ABNORMAL_INT_MASK_VALUE, qm->io_base + QM_ABNORMAL_INT_MASK); + + /* disable close master ooo when hardware error happened */ + writel(0x0, qm->io_base + QM_OOO_SHUTDOWN_SEL); +} + static void qm_log_hw_error(struct hisi_qm *qm, u32 error_status) { const struct hisi_qm_hw_error *err; @@ -1715,6 +1742,35 @@ static enum acc_err_result qm_hw_error_handle_v2(struct hisi_qm *qm) return ACC_ERR_RECOVERED; } +static u32 qm_get_hw_error_status(struct hisi_qm *qm) +{ + return readl(qm->io_base + QM_ABNORMAL_INT_STATUS); +} + +static u32 qm_get_dev_err_status(struct hisi_qm *qm) +{ + return qm->err_ini->get_dev_hw_err_status(qm); +} + +/* Check if the error causes the master ooo block */ +static int qm_check_dev_error(struct hisi_qm *qm) +{ + u32 val, dev_val; + + if (qm->fun_type == QM_HW_VF) + return 0; + + val = qm_get_hw_error_status(qm); + dev_val = qm_get_dev_err_status(qm); + + if (qm->ver < QM_HW_V3) + return (val & QM_ECC_MBIT) || + (dev_val & qm->err_info.ecc_2bits_mask); + + return (val & readl(qm->io_base + QM_OOO_SHUTDOWN_SEL)) || + (dev_val & (~qm->err_info.dev_ce_mask)); +} + static int qm_stop_qp(struct hisi_qp *qp) { return qm_mb(qp->qm, QM_MB_CMD_STOP_QP, 0, qp->qp_id, 0); @@ -1739,8 +1795,8 @@ static const struct hisi_qm_hw_ops qm_hw_ops_v3 = { .get_vft = qm_get_vft_v2, .qm_db = qm_db_v2, .get_irq_num = qm_get_irq_num_v2, - .hw_error_init = qm_hw_error_init_v2, - .hw_error_uninit = qm_hw_error_uninit_v2, + .hw_error_init = qm_hw_error_init_v3, + .hw_error_uninit = qm_hw_error_uninit_v3, .hw_error_handle = qm_hw_error_handle_v2, .stop_qp = qm_stop_qp, }; @@ -2017,11 +2073,8 @@ static int qm_drain_qp(struct hisi_qp *qp) int ret = 0, i = 0; void *addr; - /* - * No need to judge if ECC multi-bit error occurs because the - * master OOO will be blocked. - */ - if (qm->err_status.is_qm_ecc_mbit || qm->err_status.is_dev_ecc_mbit) + /* No need to judge if master OOO is blocked. */ + if (qm_check_dev_error(qm)) return 0; /* Kunpeng930 supports drain qp by device */ @@ -3527,11 +3580,6 @@ pci_ers_result_t hisi_qm_dev_err_detected(struct pci_dev *pdev, } EXPORT_SYMBOL_GPL(hisi_qm_dev_err_detected); -static u32 qm_get_hw_error_status(struct hisi_qm *qm) -{ - return readl(qm->io_base + QM_ABNORMAL_INT_STATUS); -} - static int qm_check_req_recv(struct hisi_qm *qm) { struct pci_dev *pdev = qm->pdev; @@ -3712,6 +3760,10 @@ static void qm_dev_ecc_mbit_handle(struct hisi_qm *qm) { u32 nfe_enb = 0; + /* Kunpeng930 hardware automatically close master ooo when NFE occurs */ + if (qm->ver >= QM_HW_V3) + return; + if (!qm->err_status.is_dev_ecc_mbit && qm->err_status.is_qm_ecc_mbit && qm->err_ini->close_axi_master_ooo) { @@ -3830,11 +3882,6 @@ static int qm_vf_reset_done(struct hisi_qm *qm) return ret; } -static u32 qm_get_dev_err_status(struct hisi_qm *qm) -{ - return qm->err_ini->get_dev_hw_err_status(qm); -} - static int qm_dev_hw_init(struct hisi_qm *qm) { return qm->err_ini->hw_init(qm); @@ -3844,6 +3891,9 @@ static void qm_restart_prepare(struct hisi_qm *qm) { u32 value; + if (qm->ver >= QM_HW_V3) + return; + if (!qm->err_status.is_qm_ecc_mbit && !qm->err_status.is_dev_ecc_mbit) return; @@ -3863,15 +3913,15 @@ static void qm_restart_prepare(struct hisi_qm *qm) /* clear AM Reorder Buffer ecc mbit source */ writel(ACC_ROB_ECC_ERR_MULTPL, qm->io_base + ACC_AM_ROB_ECC_INT_STS); - - if (qm->err_ini->open_axi_master_ooo) - qm->err_ini->open_axi_master_ooo(qm); } static void qm_restart_done(struct hisi_qm *qm) { u32 value; + if (qm->ver >= QM_HW_V3) + goto clear_flags; + if (!qm->err_status.is_qm_ecc_mbit && !qm->err_status.is_dev_ecc_mbit) return; @@ -3881,6 +3931,7 @@ static void qm_restart_done(struct hisi_qm *qm) value |= qm->err_info.msi_wr_port; writel(value, qm->io_base + ACC_AM_CFG_PORT_WR_EN); +clear_flags: qm->err_status.is_qm_ecc_mbit = false; qm->err_status.is_dev_ecc_mbit = false; } @@ -3918,6 +3969,8 @@ static int qm_controller_reset_done(struct hisi_qm *qm) qm_restart_prepare(qm); hisi_qm_dev_err_init(qm); + if (qm->err_ini->open_axi_master_ooo) + qm->err_ini->open_axi_master_ooo(qm); ret = qm_restart(qm); if (ret) { @@ -4005,21 +4058,6 @@ pci_ers_result_t hisi_qm_dev_slot_reset(struct pci_dev *pdev) } EXPORT_SYMBOL_GPL(hisi_qm_dev_slot_reset); -/* check the interrupt is ecc-mbit error or not */ -static int qm_check_dev_error(struct hisi_qm *qm) -{ - int ret; - - if (qm->fun_type == QM_HW_VF) - return 0; - - ret = qm_get_hw_error_status(qm) & QM_ECC_MBIT; - if (ret) - return ret; - - return (qm_get_dev_err_status(qm) & qm->err_info.ecc_2bits_mask); -} - void hisi_qm_reset_prepare(struct pci_dev *pdev) { struct hisi_qm *pf_qm = pci_get_drvdata(pci_physfn(pdev)); diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index e57167da6be0f..6a4408ea18c1c 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -52,6 +52,7 @@ #define SEC_RAS_CE_ENB_MSK 0x88 #define SEC_RAS_FE_ENB_MSK 0x0 #define SEC_RAS_NFE_ENB_MSK 0x7c177 +#define SEC_OOO_SHUTDOWN_SEL 0x301014 #define SEC_RAS_DISABLE 0x0 #define SEC_MEM_START_INIT_REG 0x301100 #define SEC_MEM_INIT_DONE_REG 0x301104 @@ -430,10 +431,27 @@ static void sec_debug_regs_clear(struct hisi_qm *qm) hisi_qm_debug_regs_clear(qm); } -static void sec_hw_error_enable(struct hisi_qm *qm) +static void sec_master_ooo_ctrl(struct hisi_qm *qm, bool enable) { - u32 val; + u32 val1, val2; + + val1 = readl(qm->io_base + SEC_CONTROL_REG); + if (enable) { + val1 |= SEC_AXI_SHUTDOWN_ENABLE; + val2 = SEC_RAS_NFE_ENB_MSK; + } else { + val1 &= SEC_AXI_SHUTDOWN_DISABLE; + val2 = 0x0; + } + + if (qm->ver > QM_HW_V2) + writel(val2, qm->io_base + SEC_OOO_SHUTDOWN_SEL); + + writel(val1, qm->io_base + SEC_CONTROL_REG); +} +static void sec_hw_error_enable(struct hisi_qm *qm) +{ if (qm->ver == QM_HW_V1) { writel(SEC_CORE_INT_DISABLE, qm->io_base + SEC_CORE_INT_MASK); pci_info(qm->pdev, "V1 not support hw error handle\n"); @@ -448,10 +466,8 @@ static void sec_hw_error_enable(struct hisi_qm *qm) writel(SEC_RAS_FE_ENB_MSK, qm->io_base + SEC_RAS_FE_REG); writel(SEC_RAS_NFE_ENB_MSK, qm->io_base + SEC_RAS_NFE_REG); - /* enable SEC block master OOO when m-bit error occur */ - val = readl(qm->io_base + SEC_CONTROL_REG); - val = val | SEC_AXI_SHUTDOWN_ENABLE; - writel(val, qm->io_base + SEC_CONTROL_REG); + /* enable SEC block master OOO when nfe occurs on Kunpeng930 */ + sec_master_ooo_ctrl(qm, true); /* enable SEC hw error interrupts */ writel(SEC_CORE_INT_ENABLE, qm->io_base + SEC_CORE_INT_MASK); @@ -459,15 +475,11 @@ static void sec_hw_error_enable(struct hisi_qm *qm) static void sec_hw_error_disable(struct hisi_qm *qm) { - u32 val; - /* disable SEC hw error interrupts */ writel(SEC_CORE_INT_DISABLE, qm->io_base + SEC_CORE_INT_MASK); - /* disable SEC block master OOO when m-bit error occur */ - val = readl(qm->io_base + SEC_CONTROL_REG); - val = val & SEC_AXI_SHUTDOWN_DISABLE; - writel(val, qm->io_base + SEC_CONTROL_REG); + /* disable SEC block master OOO when nfe occurs on Kunpeng930 */ + sec_master_ooo_ctrl(qm, false); /* disable RAS int */ writel(SEC_RAS_DISABLE, qm->io_base + SEC_RAS_CE_REG); diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index 8e3a52218774a..3e23f2a1cf5a5 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -68,6 +68,7 @@ #define HZIP_CORE_INT_RAS_CE_ENABLE 0x1 #define HZIP_CORE_INT_RAS_NFE_ENB 0x301164 #define HZIP_CORE_INT_RAS_FE_ENB 0x301168 +#define HZIP_OOO_SHUTDOWN_SEL 0x30120C #define HZIP_CORE_INT_RAS_NFE_ENABLE 0x1FFE #define HZIP_SRAM_ECC_ERR_NUM_SHIFT 16 #define HZIP_SRAM_ECC_ERR_ADDR_SHIFT 24 @@ -312,10 +313,27 @@ static int hisi_zip_set_user_domain_and_cache(struct hisi_qm *qm) return 0; } -static void hisi_zip_hw_error_enable(struct hisi_qm *qm) +static void hisi_zip_master_ooo_ctrl(struct hisi_qm *qm, bool enable) { - u32 val; + u32 val1, val2; + + val1 = readl(qm->io_base + HZIP_SOFT_CTRL_ZIP_CONTROL); + if (enable) { + val1 |= HZIP_AXI_SHUTDOWN_ENABLE; + val2 = HZIP_CORE_INT_RAS_NFE_ENABLE; + } else { + val1 &= ~HZIP_AXI_SHUTDOWN_ENABLE; + val2 = 0x0; + } + + if (qm->ver > QM_HW_V2) + writel(val2, qm->io_base + HZIP_OOO_SHUTDOWN_SEL); + + writel(val1, qm->io_base + HZIP_SOFT_CTRL_ZIP_CONTROL); +} +static void hisi_zip_hw_error_enable(struct hisi_qm *qm) +{ if (qm->ver == QM_HW_V1) { writel(HZIP_CORE_INT_MASK_ALL, qm->io_base + HZIP_CORE_INT_MASK_REG); @@ -333,10 +351,8 @@ static void hisi_zip_hw_error_enable(struct hisi_qm *qm) writel(HZIP_CORE_INT_RAS_NFE_ENABLE, qm->io_base + HZIP_CORE_INT_RAS_NFE_ENB); - /* enable ZIP block master OOO when m-bit error occur */ - val = readl(qm->io_base + HZIP_SOFT_CTRL_ZIP_CONTROL); - val = val | HZIP_AXI_SHUTDOWN_ENABLE; - writel(val, qm->io_base + HZIP_SOFT_CTRL_ZIP_CONTROL); + /* enable ZIP block master OOO when nfe occurs on Kunpeng930 */ + hisi_zip_master_ooo_ctrl(qm, true); /* enable ZIP hw error interrupts */ writel(0, qm->io_base + HZIP_CORE_INT_MASK_REG); @@ -344,15 +360,11 @@ static void hisi_zip_hw_error_enable(struct hisi_qm *qm) static void hisi_zip_hw_error_disable(struct hisi_qm *qm) { - u32 val; - /* disable ZIP hw error interrupts */ writel(HZIP_CORE_INT_MASK_ALL, qm->io_base + HZIP_CORE_INT_MASK_REG); - /* disable ZIP block master OOO when m-bit error occur */ - val = readl(qm->io_base + HZIP_SOFT_CTRL_ZIP_CONTROL); - val = val & ~HZIP_AXI_SHUTDOWN_ENABLE; - writel(val, qm->io_base + HZIP_SOFT_CTRL_ZIP_CONTROL); + /* disable ZIP block master OOO when nfe occurs on Kunpeng930 */ + hisi_zip_master_ooo_ctrl(qm, false); } static inline struct hisi_qm *file_to_qm(struct ctrl_debug_file *file) From a6f8e68e238a15bb15f1726b35c695136c64eaba Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 16 May 2021 08:58:04 +0200 Subject: [PATCH 053/142] crypto: ccp - Fix a resource leak in an error handling path If an error occurs after calling 'sp_get_irqs()', 'sp_free_irqs()' must be called as already done in the error handling path. Fixes: f4d18d656f88 ("crypto: ccp - Abstract interrupt registeration") Signed-off-by: Christophe JAILLET Acked-by: John Allen Signed-off-by: Herbert Xu --- drivers/crypto/ccp/sp-pci.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/ccp/sp-pci.c b/drivers/crypto/ccp/sp-pci.c index f468594ef8afa..6fb6ba35f89d4 100644 --- a/drivers/crypto/ccp/sp-pci.c +++ b/drivers/crypto/ccp/sp-pci.c @@ -222,7 +222,7 @@ static int sp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (ret) { dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n", ret); - goto e_err; + goto free_irqs; } } @@ -230,10 +230,12 @@ static int sp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) ret = sp_init(sp); if (ret) - goto e_err; + goto free_irqs; return 0; +free_irqs: + sp_free_irqs(sp); e_err: dev_notice(dev, "initialization failed\n"); return ret; From fc9818e6c54d19f1189bbda906042af3aaf93d80 Mon Sep 17 00:00:00 2001 From: Jack Xu Date: Mon, 17 May 2021 05:13:12 -0400 Subject: [PATCH 054/142] crypto: qat - return error when failing to map FW Save the return value of qat_uclo_map_auth_fw() function so that the function qat_uclo_wr_mimage() could return the correct value. This way, the procedure of adf_gen2_ae_fw_load() function could stop and exit properly by checking the return value of qat_uclo_wr_mimage(). Signed-off-by: Jack Xu Co-developed-by: Zhehui Xiang Signed-off-by: Zhehui Xiang Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/qat_uclo.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/qat/qat_common/qat_uclo.c b/drivers/crypto/qat/qat_common/qat_uclo.c index 1fb5fc852f6b8..d2c2db58c93ff 100644 --- a/drivers/crypto/qat/qat_common/qat_uclo.c +++ b/drivers/crypto/qat/qat_common/qat_uclo.c @@ -1546,7 +1546,8 @@ int qat_uclo_wr_mimage(struct icp_qat_fw_loader_handle *handle, int status = 0; if (handle->chip_info->fw_auth) { - if (!qat_uclo_map_auth_fw(handle, addr_ptr, mem_size, &desc)) + status = qat_uclo_map_auth_fw(handle, addr_ptr, mem_size, &desc); + if (!status) status = qat_uclo_auth_fw(handle, desc); qat_uclo_ummap_auth_fw(handle, &desc); } else { From b9f7c36de97814d13c31aba560d520d61b86be0e Mon Sep 17 00:00:00 2001 From: Jack Xu Date: Mon, 17 May 2021 05:13:13 -0400 Subject: [PATCH 055/142] crypto: qat - check MMP size before writing to the SRAM Change "sram_visible" to "mmp_sram_size" and compare it with the MMP size to prevent an overly large MMP file being written to SRAM. Signed-off-by: Jack Xu Co-developed-by: Zhehui Xiang Signed-off-by: Zhehui Xiang Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/icp_qat_fw_loader_handle.h | 2 +- drivers/crypto/qat/qat_common/qat_hal.c | 8 ++++---- drivers/crypto/qat/qat_common/qat_uclo.c | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/qat/qat_common/icp_qat_fw_loader_handle.h b/drivers/crypto/qat/qat_common/icp_qat_fw_loader_handle.h index b8f3463be6ef4..7eb5daef4f885 100644 --- a/drivers/crypto/qat/qat_common/icp_qat_fw_loader_handle.h +++ b/drivers/crypto/qat/qat_common/icp_qat_fw_loader_handle.h @@ -24,7 +24,7 @@ struct icp_qat_fw_loader_hal_handle { }; struct icp_qat_fw_loader_chip_info { - bool sram_visible; + int mmp_sram_size; bool nn; bool lm2lm3; u32 lm_size; diff --git a/drivers/crypto/qat/qat_common/qat_hal.c b/drivers/crypto/qat/qat_common/qat_hal.c index bd3028126cbe6..ed9b813471443 100644 --- a/drivers/crypto/qat/qat_common/qat_hal.c +++ b/drivers/crypto/qat/qat_common/qat_hal.c @@ -696,7 +696,7 @@ static int qat_hal_chip_init(struct icp_qat_fw_loader_handle *handle, handle->pci_dev = pci_info->pci_dev; switch (handle->pci_dev->device) { case ADF_4XXX_PCI_DEVICE_ID: - handle->chip_info->sram_visible = false; + handle->chip_info->mmp_sram_size = 0; handle->chip_info->nn = false; handle->chip_info->lm2lm3 = true; handle->chip_info->lm_size = ICP_QAT_UCLO_MAX_LMEM_REG_2X; @@ -730,7 +730,7 @@ static int qat_hal_chip_init(struct icp_qat_fw_loader_handle *handle, break; case PCI_DEVICE_ID_INTEL_QAT_C62X: case PCI_DEVICE_ID_INTEL_QAT_C3XXX: - handle->chip_info->sram_visible = false; + handle->chip_info->mmp_sram_size = 0; handle->chip_info->nn = true; handle->chip_info->lm2lm3 = false; handle->chip_info->lm_size = ICP_QAT_UCLO_MAX_LMEM_REG; @@ -763,7 +763,7 @@ static int qat_hal_chip_init(struct icp_qat_fw_loader_handle *handle, + LOCAL_TO_XFER_REG_OFFSET); break; case PCI_DEVICE_ID_INTEL_QAT_DH895XCC: - handle->chip_info->sram_visible = true; + handle->chip_info->mmp_sram_size = 0x40000; handle->chip_info->nn = true; handle->chip_info->lm2lm3 = false; handle->chip_info->lm_size = ICP_QAT_UCLO_MAX_LMEM_REG; @@ -800,7 +800,7 @@ static int qat_hal_chip_init(struct icp_qat_fw_loader_handle *handle, goto out_err; } - if (handle->chip_info->sram_visible) { + if (handle->chip_info->mmp_sram_size > 0) { sram_bar = &pci_info->pci_bars[hw_data->get_sram_bar_id(hw_data)]; handle->hal_sram_addr_v = sram_bar->virt_addr; diff --git a/drivers/crypto/qat/qat_common/qat_uclo.c b/drivers/crypto/qat/qat_common/qat_uclo.c index d2c2db58c93ff..8adf25769128a 100644 --- a/drivers/crypto/qat/qat_common/qat_uclo.c +++ b/drivers/crypto/qat/qat_common/qat_uclo.c @@ -1551,7 +1551,7 @@ int qat_uclo_wr_mimage(struct icp_qat_fw_loader_handle *handle, status = qat_uclo_auth_fw(handle, desc); qat_uclo_ummap_auth_fw(handle, &desc); } else { - if (!handle->chip_info->sram_visible) { + if (handle->chip_info->mmp_sram_size < mem_size) { dev_dbg(&handle->pci_dev->dev, "QAT MMP fw not loaded for device 0x%x", handle->pci_dev->device); From 78b4267a1524849202d4ab45d32ca561775e0f23 Mon Sep 17 00:00:00 2001 From: Jack Xu Date: Mon, 17 May 2021 05:13:14 -0400 Subject: [PATCH 056/142] crypto: qat - report an error if MMP file size is too large Change the return status to error if MMP file size is too large so the driver load fails early if a large MMP firmware is loaded. Signed-off-by: Jack Xu Co-developed-by: Zhehui Xiang Signed-off-by: Zhehui Xiang Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/qat_uclo.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/qat/qat_common/qat_uclo.c b/drivers/crypto/qat/qat_common/qat_uclo.c index 8adf25769128a..ed1343bb36ac7 100644 --- a/drivers/crypto/qat/qat_common/qat_uclo.c +++ b/drivers/crypto/qat/qat_common/qat_uclo.c @@ -1552,10 +1552,8 @@ int qat_uclo_wr_mimage(struct icp_qat_fw_loader_handle *handle, qat_uclo_ummap_auth_fw(handle, &desc); } else { if (handle->chip_info->mmp_sram_size < mem_size) { - dev_dbg(&handle->pci_dev->dev, - "QAT MMP fw not loaded for device 0x%x", - handle->pci_dev->device); - return status; + pr_err("QAT: MMP size is too large: 0x%x\n", mem_size); + return -EFBIG; } qat_uclo_wr_sram_by_words(handle, 0, addr_ptr, mem_size); } From 96b57229209490c8bca4335b01a426a96173dc56 Mon Sep 17 00:00:00 2001 From: Jack Xu Date: Mon, 17 May 2021 05:13:15 -0400 Subject: [PATCH 057/142] crypto: qat - check return code of qat_hal_rd_rel_reg() Check the return code of the function qat_hal_rd_rel_reg() and return it to the caller. This is to fix the following warning when compiling the driver with clang scan-build: drivers/crypto/qat/qat_common/qat_hal.c:1436:2: warning: 6th function call argument is an uninitialized value Signed-off-by: Jack Xu Co-developed-by: Zhehui Xiang Signed-off-by: Zhehui Xiang Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/qat_hal.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/qat/qat_common/qat_hal.c b/drivers/crypto/qat/qat_common/qat_hal.c index ed9b813471443..12ca6b8764aaa 100644 --- a/drivers/crypto/qat/qat_common/qat_hal.c +++ b/drivers/crypto/qat/qat_common/qat_hal.c @@ -1417,7 +1417,11 @@ static int qat_hal_put_rel_wr_xfer(struct icp_qat_fw_loader_handle *handle, pr_err("QAT: bad xfrAddr=0x%x\n", xfr_addr); return -EINVAL; } - qat_hal_rd_rel_reg(handle, ae, ctx, ICP_GPB_REL, gprnum, &gprval); + status = qat_hal_rd_rel_reg(handle, ae, ctx, ICP_GPB_REL, gprnum, &gprval); + if (status) { + pr_err("QAT: failed to read register"); + return status; + } gpr_addr = qat_hal_get_reg_addr(ICP_GPB_REL, gprnum); data16low = 0xffff & data; data16hi = 0xffff & (data >> 0x10); From 9afe77cf25d9670e61b489fd52cc6f75fd7f6803 Mon Sep 17 00:00:00 2001 From: Jack Xu Date: Mon, 17 May 2021 05:13:16 -0400 Subject: [PATCH 058/142] crypto: qat - remove unused macro in FW loader Remove the unused macro ICP_DH895XCC_PESRAM_BAR_SIZE in the firmware loader. This is to fix the following warning when compiling the driver using the clang compiler with CC=clang W=2: drivers/crypto/qat/qat_common/qat_uclo.c:345:9: warning: macro is not used [-Wunused-macros] Signed-off-by: Jack Xu Co-developed-by: Zhehui Xiang Signed-off-by: Zhehui Xiang Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/qat_uclo.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/crypto/qat/qat_common/qat_uclo.c b/drivers/crypto/qat/qat_common/qat_uclo.c index ed1343bb36ac7..2026cc6be8f0e 100644 --- a/drivers/crypto/qat/qat_common/qat_uclo.c +++ b/drivers/crypto/qat/qat_common/qat_uclo.c @@ -342,7 +342,6 @@ static int qat_uclo_init_umem_seg(struct icp_qat_fw_loader_handle *handle, return 0; } -#define ICP_DH895XCC_PESRAM_BAR_SIZE 0x80000 static int qat_uclo_init_ae_memory(struct icp_qat_fw_loader_handle *handle, struct icp_qat_uof_initmem *init_mem) { From 11e0ca8f028c24cade0022888599e1273ce31694 Mon Sep 17 00:00:00 2001 From: Juerg Haefliger Date: Mon, 17 May 2021 11:58:31 +0200 Subject: [PATCH 059/142] hwrng: Kconfig - Remove leading spaces Remove leading spaces before tabs in Kconfig file(s) by running the following command: $ find drivers/char/hw_random -name 'Kconfig*' | x\ args sed -r -i 's/^[ ]+\t/\t/' Signed-off-by: Juerg Haefliger Signed-off-by: Herbert Xu --- drivers/char/hw_random/Kconfig | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig index 6450074c0ad7d..c11f12d4ab534 100644 --- a/drivers/char/hw_random/Kconfig +++ b/drivers/char/hw_random/Kconfig @@ -168,14 +168,14 @@ config HW_RANDOM_OMAP depends on ARCH_OMAP16XX || ARCH_OMAP2PLUS || ARCH_MVEBU || ARCH_K3 default HW_RANDOM help - This driver provides kernel-side support for the Random Number + This driver provides kernel-side support for the Random Number Generator hardware found on OMAP16xx, OMAP2/3/4/5, AM33xx/AM43xx multimedia processors, and Marvell Armada 7k/8k SoCs. To compile this driver as a module, choose M here: the module will be called omap-rng. - If unsure, say Y. + If unsure, say Y. config HW_RANDOM_OMAP3_ROM tristate "OMAP3 ROM Random Number Generator support" @@ -485,13 +485,13 @@ config HW_RANDOM_NPCM depends on ARCH_NPCM || COMPILE_TEST default HW_RANDOM help - This driver provides support for the Random Number + This driver provides support for the Random Number Generator hardware available in Nuvoton NPCM SoCs. To compile this driver as a module, choose M here: the module will be called npcm-rng. - If unsure, say Y. + If unsure, say Y. config HW_RANDOM_KEYSTONE depends on ARCH_KEYSTONE || COMPILE_TEST From 155f7d321f021c084595d33efafa66f197fb2c00 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Mon, 17 May 2021 19:02:34 +0800 Subject: [PATCH 060/142] crypto: header - Fix spelling errors Fix some spelling mistakes in comments: cipherntext ==> ciphertext syncronise ==> synchronise feeded ==> fed Signed-off-by: Zhen Lei Signed-off-by: Herbert Xu --- include/crypto/aead.h | 2 +- include/crypto/engine.h | 2 +- include/crypto/hash.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/crypto/aead.h b/include/crypto/aead.h index e728469c4cccb..5af914c1ab8ee 100644 --- a/include/crypto/aead.h +++ b/include/crypto/aead.h @@ -490,7 +490,7 @@ static inline void aead_request_set_callback(struct aead_request *req, * The memory structure for cipher operation has the following structure: * * - AEAD encryption input: assoc data || plaintext - * - AEAD encryption output: assoc data || cipherntext || auth tag + * - AEAD encryption output: assoc data || ciphertext || auth tag * - AEAD decryption input: assoc data || ciphertext || auth tag * - AEAD decryption output: assoc data || plaintext * diff --git a/include/crypto/engine.h b/include/crypto/engine.h index 3f06e40d063a6..26cac19b0f46c 100644 --- a/include/crypto/engine.h +++ b/include/crypto/engine.h @@ -28,7 +28,7 @@ * of a failed backlog request * crypto-engine, in head position to keep order * @list: link with the global crypto engine list - * @queue_lock: spinlock to syncronise access to request queue + * @queue_lock: spinlock to synchronise access to request queue * @queue: the crypto queue of the engine * @rt: whether this queue is set to run as a realtime task * @prepare_crypt_hardware: a request will soon arrive from the queue diff --git a/include/crypto/hash.h b/include/crypto/hash.h index b2bc1e46e86a7..f140e4643949b 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -458,7 +458,7 @@ int crypto_ahash_finup(struct ahash_request *req); * * Return: * 0 if the message digest was successfully calculated; - * -EINPROGRESS if data is feeded into hardware (DMA) or queued for later; + * -EINPROGRESS if data is fed into hardware (DMA) or queued for later; * -EBUSY if queue is full and request should be resubmitted later; * other < 0 if an error occurred */ From 308365483351fad2c2c15e173df60c7168c828a5 Mon Sep 17 00:00:00 2001 From: Hui Tang Date: Wed, 19 May 2021 13:59:44 +0800 Subject: [PATCH 061/142] crypto: khazad,wp512 - remove leading spaces before tabs There are a few leading spaces before tabs and remove it by running the following commard: $ find . -name '*.c' | xargs sed -r -i 's/^[ ]+\t/\t/' At the same time, fix two warning by running checkpatch.pl: WARNING: suspect code indent for conditional statements (16, 16) WARNING: braces {} are not necessary for single statement blocks Signed-off-by: Hui Tang Signed-off-by: Herbert Xu --- crypto/khazad.c | 2 +- crypto/wp512.c | 40 +++++++++++++++++++--------------------- 2 files changed, 20 insertions(+), 22 deletions(-) diff --git a/crypto/khazad.c b/crypto/khazad.c index 14ca7f1631c79..f19339954c89e 100644 --- a/crypto/khazad.c +++ b/crypto/khazad.c @@ -819,7 +819,7 @@ static void khazad_crypt(const u64 roundKey[KHAZAD_ROUNDS + 1], T6[(int)(state >> 8) & 0xff] ^ T7[(int)(state ) & 0xff] ^ roundKey[r]; - } + } state = (T0[(int)(state >> 56) ] & 0xff00000000000000ULL) ^ (T1[(int)(state >> 48) & 0xff] & 0x00ff000000000000ULL) ^ diff --git a/crypto/wp512.c b/crypto/wp512.c index feadc13ccae06..bf79fbb2340fa 100644 --- a/crypto/wp512.c +++ b/crypto/wp512.c @@ -1066,33 +1066,31 @@ static int wp512_final(struct shash_desc *desc, u8 *out) { struct wp512_ctx *wctx = shash_desc_ctx(desc); int i; - u8 *buffer = wctx->buffer; - u8 *bitLength = wctx->bitLength; - int bufferBits = wctx->bufferBits; - int bufferPos = wctx->bufferPos; + u8 *buffer = wctx->buffer; + u8 *bitLength = wctx->bitLength; + int bufferBits = wctx->bufferBits; + int bufferPos = wctx->bufferPos; __be64 *digest = (__be64 *)out; - buffer[bufferPos] |= 0x80U >> (bufferBits & 7); - bufferPos++; - if (bufferPos > WP512_BLOCK_SIZE - WP512_LENGTHBYTES) { - if (bufferPos < WP512_BLOCK_SIZE) { - memset(&buffer[bufferPos], 0, WP512_BLOCK_SIZE - bufferPos); - } - wp512_process_buffer(wctx); - bufferPos = 0; - } - if (bufferPos < WP512_BLOCK_SIZE - WP512_LENGTHBYTES) { - memset(&buffer[bufferPos], 0, + buffer[bufferPos] |= 0x80U >> (bufferBits & 7); + bufferPos++; + if (bufferPos > WP512_BLOCK_SIZE - WP512_LENGTHBYTES) { + if (bufferPos < WP512_BLOCK_SIZE) + memset(&buffer[bufferPos], 0, WP512_BLOCK_SIZE - bufferPos); + wp512_process_buffer(wctx); + bufferPos = 0; + } + if (bufferPos < WP512_BLOCK_SIZE - WP512_LENGTHBYTES) + memset(&buffer[bufferPos], 0, (WP512_BLOCK_SIZE - WP512_LENGTHBYTES) - bufferPos); - } - bufferPos = WP512_BLOCK_SIZE - WP512_LENGTHBYTES; - memcpy(&buffer[WP512_BLOCK_SIZE - WP512_LENGTHBYTES], + bufferPos = WP512_BLOCK_SIZE - WP512_LENGTHBYTES; + memcpy(&buffer[WP512_BLOCK_SIZE - WP512_LENGTHBYTES], bitLength, WP512_LENGTHBYTES); - wp512_process_buffer(wctx); + wp512_process_buffer(wctx); for (i = 0; i < WP512_DIGEST_SIZE/8; i++) digest[i] = cpu_to_be64(wctx->hash[i]); - wctx->bufferBits = bufferBits; - wctx->bufferPos = bufferPos; + wctx->bufferBits = bufferBits; + wctx->bufferPos = bufferPos; return 0; } From 5c0ecc2e81ecfd9eba8a4945d49c401615c167ca Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 19 May 2021 14:16:50 +0000 Subject: [PATCH 062/142] crypto: qce - Fix some error handling path Fix to return negative error code from the error handling cases instead of 0. Fixes: 9363efb4181c ("crypto: qce - Add support for AEAD algorithms") Reported-by: Hulk Robot Signed-off-by: Wei Yongjun Signed-off-by: Herbert Xu --- drivers/crypto/qce/aead.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/qce/aead.c b/drivers/crypto/qce/aead.c index 6d06a19b48e49..d47f4171ad830 100644 --- a/drivers/crypto/qce/aead.c +++ b/drivers/crypto/qce/aead.c @@ -280,8 +280,10 @@ qce_aead_ccm_prepare_buf_assoclen(struct aead_request *req) if (diff_dst) { sg = qce_aead_prepare_dst_buf(req); - if (IS_ERR(sg)) + if (IS_ERR(sg)) { + ret = PTR_ERR(sg); goto err_free; + } } else { if (IS_ENCRYPT(rctx->flags)) rctx->dst_nents = rctx->src_nents + 1; @@ -448,13 +450,17 @@ qce_aead_async_req_handle(struct crypto_async_request *async_req) if (ret) return ret; dst_nents = dma_map_sg(qce->dev, rctx->dst_sg, rctx->dst_nents, dir_dst); - if (dst_nents < 0) + if (dst_nents < 0) { + ret = dst_nents; goto error_free; + } if (diff_dst) { src_nents = dma_map_sg(qce->dev, rctx->src_sg, rctx->src_nents, dir_src); - if (src_nents < 0) + if (src_nents < 0) { + ret = src_nents; goto error_unmap_dst; + } } else { if (IS_CCM(rctx->flags) && IS_DECRYPT(rctx->flags)) src_nents = dst_nents; From cb028f1662a9910d4b8e3fbe9eb38f7a545540a3 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Thu, 20 May 2021 15:56:11 +0800 Subject: [PATCH 063/142] hwrng: core - remove redundant initialization of variable err 'err' will be assigned later and cleanup the redundant initialization. Cc: PrasannaKumar Muralidharan Cc: Herbert Xu Signed-off-by: Shaokun Zhang Signed-off-by: Herbert Xu --- drivers/char/hw_random/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index adb3c2bd7783e..322e3d0ea98ca 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -323,7 +323,7 @@ static ssize_t hwrng_attr_current_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { - int err = -ENODEV; + int err; struct hwrng *rng, *old_rng, *new_rng; err = mutex_lock_interruptible(&rng_mutex); From c4d7d31874a7a8aa804721e082ffe1491f279dd2 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 20 May 2021 10:27:31 +0100 Subject: [PATCH 064/142] crypto: cavium: Fix a bunch of kernel-doc related issues Fixes the following W=1 kernel build warning(s): drivers/crypto/cavium/nitrox/nitrox_main.c:41: warning: cannot understand function prototype: 'const struct pci_device_id nitrox_pci_tbl[] = ' drivers/crypto/cavium/nitrox/nitrox_main.c:73: warning: Function parameter or member 'ndev' not described in 'write_to_ucd_unit' drivers/crypto/cavium/nitrox/nitrox_main.c:73: warning: Function parameter or member 'ucode_size' not described in 'write_to_ucd_unit' drivers/crypto/cavium/nitrox/nitrox_main.c:73: warning: Function parameter or member 'ucode_data' not described in 'write_to_ucd_unit' drivers/crypto/cavium/nitrox/nitrox_main.c:73: warning: Function parameter or member 'block_num' not described in 'write_to_ucd_unit' drivers/crypto/cavium/nitrox/nitrox_reqmgr.c:46: warning: Function parameter or member 'index' not described in 'incr_index' drivers/crypto/cavium/nitrox/nitrox_reqmgr.c:46: warning: Function parameter or member 'count' not described in 'incr_index' drivers/crypto/cavium/nitrox/nitrox_reqmgr.c:46: warning: Function parameter or member 'max' not described in 'incr_index' drivers/crypto/cavium/nitrox/nitrox_reqmgr.c:46: warning: expecting prototype for Response codes from SE microcode(). Prototype was for incr_index() instead drivers/crypto/cavium/nitrox/nitrox_reqmgr.c:287: warning: Function parameter or member 'cmdq' not described in 'post_se_instr' drivers/crypto/cavium/nitrox/nitrox_reqmgr.c:381: warning: Function parameter or member 'callback' not described in 'nitrox_process_se_request' drivers/crypto/cavium/nitrox/nitrox_reqmgr.c:381: warning: Function parameter or member 'cb_arg' not described in 'nitrox_process_se_request' drivers/crypto/cavium/nitrox/nitrox_reqmgr.c:381: warning: expecting prototype for nitrox_se_request(). Prototype was for nitrox_process_se_request() instead drivers/crypto/cavium/nitrox/nitrox_reqmgr.c:535: warning: Function parameter or member 'cmdq' not described in 'process_response_list' drivers/crypto/cavium/nitrox/nitrox_reqmgr.c:535: warning: expecting prototype for process_request_list(). Prototype was for process_response_list() instead drivers/crypto/cavium/nitrox/nitrox_reqmgr.c:584: warning: Function parameter or member 'data' not described in 'pkt_slc_resp_tasklet' drivers/crypto/cavium/nitrox/nitrox_mbx.c:14: warning: cannot understand function prototype: 'enum mbx_msg_type ' drivers/crypto/cavium/nitrox/nitrox_mbx.c:24: warning: cannot understand function prototype: 'enum mbx_msg_opcode ' drivers/crypto/cavium/nitrox/nitrox_skcipher.c:26: warning: cannot understand function prototype: 'const struct nitrox_cipher flexi_cipher_table[] = ' drivers/crypto/cavium/cpt/cptpf_main.c:411: warning: Function parameter or member 'cpt' not described in 'cpt_unload_microcode' drivers/crypto/cavium/cpt/cptpf_main.c:411: warning: expecting prototype for Ensure all cores are disengaged from all groups by(). Prototype was for cpt_unload_microcode() instead drivers/crypto/cavium/cpt/cptvf_reqmanager.c:17: warning: Function parameter or member 'q' not described in 'get_free_pending_entry' drivers/crypto/cavium/cpt/cptvf_reqmanager.c:17: warning: Function parameter or member 'qlen' not described in 'get_free_pending_entry' Cc: George Cherian Cc: Herbert Xu Cc: "David S. Miller" Cc: linux-crypto@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Herbert Xu --- drivers/crypto/cavium/cpt/cptpf_main.c | 2 +- drivers/crypto/cavium/cpt/cptvf_reqmanager.c | 4 ++-- drivers/crypto/cavium/nitrox/nitrox_main.c | 4 ++-- drivers/crypto/cavium/nitrox/nitrox_mbx.c | 4 ++-- drivers/crypto/cavium/nitrox/nitrox_reqmgr.c | 12 +++++++----- drivers/crypto/cavium/nitrox/nitrox_skcipher.c | 2 +- 6 files changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/crypto/cavium/cpt/cptpf_main.c b/drivers/crypto/cavium/cpt/cptpf_main.c index 06ee42e8a2458..8c32d0eb8fcf2 100644 --- a/drivers/crypto/cavium/cpt/cptpf_main.c +++ b/drivers/crypto/cavium/cpt/cptpf_main.c @@ -401,7 +401,7 @@ static void cpt_disable_all_cores(struct cpt_device *cpt) cpt_write_csr64(cpt->reg_base, CPTX_PF_EXE_CTL(0), 0); } -/** +/* * Ensure all cores are disengaged from all groups by * calling cpt_disable_all_cores() before calling this * function. diff --git a/drivers/crypto/cavium/cpt/cptvf_reqmanager.c b/drivers/crypto/cavium/cpt/cptvf_reqmanager.c index feb0f76783dda..153004bdfb5cd 100644 --- a/drivers/crypto/cavium/cpt/cptvf_reqmanager.c +++ b/drivers/crypto/cavium/cpt/cptvf_reqmanager.c @@ -9,8 +9,8 @@ /** * get_free_pending_entry - get free entry from pending queue - * @param pqinfo: pending_qinfo structure - * @param qno: queue number + * @q: pending queue + * @qlen: queue length */ static struct pending_entry *get_free_pending_entry(struct pending_queue *q, int qlen) diff --git a/drivers/crypto/cavium/nitrox/nitrox_main.c b/drivers/crypto/cavium/nitrox/nitrox_main.c index facc8e6bc5801..6af05df281a98 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_main.c +++ b/drivers/crypto/cavium/nitrox/nitrox_main.c @@ -35,7 +35,7 @@ static LIST_HEAD(ndevlist); static DEFINE_MUTEX(devlist_lock); static unsigned int num_devices; -/** +/* * nitrox_pci_tbl - PCI Device ID Table */ static const struct pci_device_id nitrox_pci_tbl[] = { @@ -65,7 +65,7 @@ struct ucode { u64 code[]; }; -/** +/* * write_to_ucd_unit - Write Firmware to NITROX UCD unit */ static void write_to_ucd_unit(struct nitrox_device *ndev, u32 ucode_size, diff --git a/drivers/crypto/cavium/nitrox/nitrox_mbx.c b/drivers/crypto/cavium/nitrox/nitrox_mbx.c index c1af9d4fca6e3..2e9c0d2143632 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_mbx.c +++ b/drivers/crypto/cavium/nitrox/nitrox_mbx.c @@ -8,7 +8,7 @@ #define RING_TO_VFNO(_x, _y) ((_x) / (_y)) -/** +/* * mbx_msg_type - Mailbox message types */ enum mbx_msg_type { @@ -18,7 +18,7 @@ enum mbx_msg_type { MBX_MSG_TYPE_NACK, }; -/** +/* * mbx_msg_opcode - Mailbox message opcodes */ enum mbx_msg_opcode { diff --git a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c index 4434c92d6229f..55c18da4a5007 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c +++ b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c @@ -19,7 +19,7 @@ #define REQ_BACKLOG 2 #define REQ_POSTED 3 -/** +/* * Response codes from SE microcode * 0x00 - Success * Completion with no error @@ -279,6 +279,7 @@ static inline bool cmdq_full(struct nitrox_cmdq *cmdq, int qlen) /** * post_se_instr - Post SE instruction to Packet Input ring * @sr: Request structure + * @cmdq: Command queue structure * * Returns 0 if successful or a negative error code, * if no space in ring. @@ -372,6 +373,8 @@ static int nitrox_enqueue_request(struct nitrox_softreq *sr) * nitrox_process_se_request - Send request to SE core * @ndev: NITROX device * @req: Crypto request + * @callback: Completion callback + * @cb_arg: Completion callback arguments * * Returns 0 on success, or a negative error code. */ @@ -526,9 +529,8 @@ static bool sr_completed(struct nitrox_softreq *sr) } /** - * process_request_list - process completed requests - * @ndev: N5 device - * @qno: queue to operate + * process_response_list - process completed requests + * @cmdq: Command queue structure * * Returns the number of responses processed. */ @@ -578,7 +580,7 @@ static void process_response_list(struct nitrox_cmdq *cmdq) } } -/** +/* * pkt_slc_resp_tasklet - post processing of SE responses */ void pkt_slc_resp_tasklet(unsigned long data) diff --git a/drivers/crypto/cavium/nitrox/nitrox_skcipher.c b/drivers/crypto/cavium/nitrox/nitrox_skcipher.c index a553ac65f3249..248b4fff1c729 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_skcipher.c +++ b/drivers/crypto/cavium/nitrox/nitrox_skcipher.c @@ -20,7 +20,7 @@ struct nitrox_cipher { enum flexi_cipher value; }; -/** +/* * supported cipher list */ static const struct nitrox_cipher flexi_cipher_table[] = { From c215b513513386afd82a099047474c67f5b8f45c Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 20 May 2021 10:27:32 +0100 Subject: [PATCH 065/142] crypto: nx: nx-aes-gcm: Kernel-doc formatting should not be used for headers Fixes the following W=1 kernel build warning(s): drivers/crypto/nx/nx-aes-gcm.c:26: warning: Function parameter or member 'tfm' not described in 'gcm_aes_nx_set_key' drivers/crypto/nx/nx-aes-gcm.c:26: warning: Function parameter or member 'in_key' not described in 'gcm_aes_nx_set_key' drivers/crypto/nx/nx-aes-gcm.c:26: warning: Function parameter or member 'key_len' not described in 'gcm_aes_nx_set_key' drivers/crypto/nx/nx-aes-gcm.c:26: warning: expecting prototype for Nest Accelerators driver(). Prototype was for gcm_aes_nx_set_key() instead drivers/crypto/nx/nx-aes-ecb.c:24: warning: Function parameter or member 'tfm' not described in 'ecb_aes_nx_set_key' drivers/crypto/nx/nx-aes-ecb.c:24: warning: Function parameter or member 'in_key' not described in 'ecb_aes_nx_set_key' drivers/crypto/nx/nx-aes-ecb.c:24: warning: Function parameter or member 'key_len' not described in 'ecb_aes_nx_set_key' drivers/crypto/nx/nx-aes-ecb.c:24: warning: expecting prototype for Nest Accelerators driver(). Prototype was for ecb_aes_nx_set_key() instead drivers/crypto/nx/nx-aes-ccm.c:26: warning: Function parameter or member 'tfm' not described in 'ccm_aes_nx_set_key' drivers/crypto/nx/nx-aes-ccm.c:26: warning: Function parameter or member 'in_key' not described in 'ccm_aes_nx_set_key' drivers/crypto/nx/nx-aes-ccm.c:26: warning: Function parameter or member 'key_len' not described in 'ccm_aes_nx_set_key' drivers/crypto/nx/nx-aes-ccm.c:26: warning: expecting prototype for Nest Accelerators driver(). Prototype was for ccm_aes_nx_set_key() instead drivers/crypto/nx/nx-aes-ctr.c:25: warning: Function parameter or member 'tfm' not described in 'ctr_aes_nx_set_key' drivers/crypto/nx/nx-aes-ctr.c:25: warning: Function parameter or member 'in_key' not described in 'ctr_aes_nx_set_key' drivers/crypto/nx/nx-aes-ctr.c:25: warning: Function parameter or member 'key_len' not described in 'ctr_aes_nx_set_key' drivers/crypto/nx/nx-aes-ctr.c:25: warning: expecting prototype for Nest Accelerators driver(). Prototype was for ctr_aes_nx_set_key() instead drivers/crypto/nx/nx-aes-xcbc.c:22: warning: cannot understand function prototype: 'struct xcbc_state ' drivers/crypto/nx/nx-sha256.c:21: warning: Function parameter or member 'tfm' not described in 'nx_crypto_ctx_sha256_init' drivers/crypto/nx/nx-sha256.c:21: warning: expecting prototype for SHA(). Prototype was for nx_crypto_ctx_sha256_init() instead drivers/crypto/nx/nx-sha512.c:20: warning: Function parameter or member 'tfm' not described in 'nx_crypto_ctx_sha512_init' drivers/crypto/nx/nx-sha512.c:20: warning: expecting prototype for SHA(). Prototype was for nx_crypto_ctx_sha512_init() instead drivers/crypto/nx/nx-842-pseries.c:280: warning: Function parameter or member 'wmem' not described in 'nx842_pseries_compress' drivers/crypto/nx/nx-842-pseries.c:280: warning: Excess function parameter 'wrkmem' description in 'nx842_pseries_compress' drivers/crypto/nx/nx-842-pseries.c:410: warning: Function parameter or member 'wmem' not described in 'nx842_pseries_decompress' drivers/crypto/nx/nx-842-pseries.c:410: warning: Excess function parameter 'wrkmem' description in 'nx842_pseries_decompress' drivers/crypto/nx/nx-842-pseries.c:523: warning: Function parameter or member 'devdata' not described in 'nx842_OF_set_defaults' drivers/crypto/nx/nx-842-pseries.c:548: warning: Function parameter or member 'prop' not described in 'nx842_OF_upd_status' drivers/crypto/nx/nx-842-pseries.c:582: warning: Function parameter or member 'devdata' not described in 'nx842_OF_upd_maxsglen' drivers/crypto/nx/nx-842-pseries.c:582: warning: Function parameter or member 'prop' not described in 'nx842_OF_upd_maxsglen' drivers/crypto/nx/nx-842-pseries.c:630: warning: Function parameter or member 'devdata' not described in 'nx842_OF_upd_maxsyncop' drivers/crypto/nx/nx-842-pseries.c:630: warning: Function parameter or member 'prop' not described in 'nx842_OF_upd_maxsyncop' drivers/crypto/nx/nx-842-pseries.c:692: warning: Cannot understand * drivers/crypto/nx/nx-842-pseries.c:825: warning: Function parameter or member 'data' not described in 'nx842_OF_notifier' drivers/crypto/nx/nx-842-pseries.c:825: warning: Excess function parameter 'update' description in 'nx842_OF_notifier' Cc: Haren Myneni Cc: Herbert Xu Cc: "David S. Miller" Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Robert Jennings Cc: Seth Jennings Cc: linux-crypto@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Lee Jones Signed-off-by: Herbert Xu --- drivers/crypto/nx/nx-842-pseries.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/crypto/nx/nx-842-pseries.c b/drivers/crypto/nx/nx-842-pseries.c index 8ee547ee378ec..67caff73f058f 100644 --- a/drivers/crypto/nx/nx-842-pseries.c +++ b/drivers/crypto/nx/nx-842-pseries.c @@ -264,8 +264,8 @@ static int nx842_validate_result(struct device *dev, * @inlen: Length of input buffer * @out: Pointer to output buffer * @outlen: Length of output buffer - * @wrkmem: ptr to buffer for working memory, size determined by - * nx842_pseries_driver.workmem_size + * @wmem: ptr to buffer for working memory, size determined by + * nx842_pseries_driver.workmem_size * * Returns: * 0 Success, output of length @outlen stored in the buffer at @out @@ -393,8 +393,8 @@ static int nx842_pseries_compress(const unsigned char *in, unsigned int inlen, * @inlen: Length of input buffer * @out: Pointer to output buffer * @outlen: Length of output buffer - * @wrkmem: ptr to buffer for working memory, size determined by - * nx842_pseries_driver.workmem_size + * @wmem: ptr to buffer for working memory, size determined by + * nx842_pseries_driver.workmem_size * * Returns: * 0 Success, output of length @outlen stored in the buffer at @out @@ -513,7 +513,7 @@ static int nx842_pseries_decompress(const unsigned char *in, unsigned int inlen, /** * nx842_OF_set_defaults -- Set default (disabled) values for devdata * - * @devdata - struct nx842_devdata to update + * @devdata: struct nx842_devdata to update * * Returns: * 0 on success @@ -538,7 +538,7 @@ static int nx842_OF_set_defaults(struct nx842_devdata *devdata) * The status field indicates if the device is enabled when the status * is 'okay'. Otherwise the device driver will be disabled. * - * @prop - struct property point containing the maxsyncop for the update + * @prop: struct property point containing the maxsyncop for the update * * Returns: * 0 - Device is available @@ -571,8 +571,8 @@ static int nx842_OF_upd_status(struct property *prop) * In this example, the maximum byte length of a scatter list is * 0x0ff0 (4,080). * - * @devdata - struct nx842_devdata to update - * @prop - struct property point containing the maxsyncop for the update + * @devdata: struct nx842_devdata to update + * @prop: struct property point containing the maxsyncop for the update * * Returns: * 0 on success @@ -619,8 +619,8 @@ static int nx842_OF_upd_maxsglen(struct nx842_devdata *devdata, * 0x1000 (4,096) data byte length and 0x1f3 (510) total scatter list * elements. * - * @devdata - struct nx842_devdata to update - * @prop - struct property point containing the maxsyncop for the update + * @devdata: struct nx842_devdata to update + * @prop: struct property point containing the maxsyncop for the update * * Returns: * 0 on success @@ -689,7 +689,6 @@ static int nx842_OF_upd_maxsyncop(struct nx842_devdata *devdata, } /** - * * nx842_OF_upd -- Handle OF properties updates for the device. * * Set all properties from the OF tree. Optionally, a new property @@ -812,8 +811,7 @@ static int nx842_OF_upd(struct property *new_prop) * * @np: notifier block * @action: notifier action - * @update: struct pSeries_reconfig_prop_update pointer if action is - * PSERIES_UPDATE_PROPERTY + * @data: struct of_reconfig_data pointer * * Returns: * NOTIFY_OK on success From 01df08b93e400ce45d86ef8dd7dd849f44b0e9d3 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 20 May 2021 10:27:33 +0100 Subject: [PATCH 066/142] crypto: ccp: ccp-dev: Fix a little doc-rot Fixes the following W=1 kernel build warning(s): drivers/crypto/ccp/ccp-dev.c:476: warning: Function parameter or member 'sp' not described in 'ccp_alloc_struct' drivers/crypto/ccp/ccp-dev.c:476: warning: Excess function parameter 'dev' description in 'ccp_alloc_struct' drivers/crypto/ccp/ccp-dev.c:476: warning: Function parameter or member 'sp' not described in 'ccp_alloc_struct' drivers/crypto/ccp/ccp-dev.c:476: warning: Excess function parameter 'dev' description in 'ccp_alloc_struct' Cc: Tom Lendacky Cc: John Allen Cc: Herbert Xu Cc: "David S. Miller" Cc: Gary R Hook Cc: linux-crypto@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Herbert Xu --- drivers/crypto/ccp/ccp-dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c index 6777582aa1cee..9ce4b68e9c483 100644 --- a/drivers/crypto/ccp/ccp-dev.c +++ b/drivers/crypto/ccp/ccp-dev.c @@ -470,7 +470,7 @@ int ccp_cmd_queue_thread(void *data) /** * ccp_alloc_struct - allocate and initialize the ccp_device struct * - * @dev: device struct of the CCP + * @sp: sp_device struct of the CCP */ struct ccp_device *ccp_alloc_struct(struct sp_device *sp) { From aa22cd7f67807eb8047221e57f8a327432ab8309 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 20 May 2021 12:40:00 +0200 Subject: [PATCH 067/142] crypto: tcrypt - enable tests for xxhash and blake2 Fill some of the recently freed up slots in tcrypt with xxhash64 and blake2b/blake2s, so we can easily benchmark their kernel implementations from user space. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/tcrypt.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 6b7c158dc5087..f8d06da78e4f3 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -1847,10 +1847,22 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) ret += tcrypt_test("cts(cbc(aes))"); break; + case 39: + ret += tcrypt_test("xxhash64"); + break; + case 40: ret += tcrypt_test("rmd160"); break; + case 41: + ret += tcrypt_test("blake2s-256"); + break; + + case 42: + ret += tcrypt_test("blake2b-512"); + break; + case 43: ret += tcrypt_test("ecb(seed)"); break; @@ -2356,10 +2368,22 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) test_hash_speed("sha224", sec, generic_hash_speed_template); if (mode > 300 && mode < 400) break; fallthrough; + case 314: + test_hash_speed("xxhash64", sec, generic_hash_speed_template); + if (mode > 300 && mode < 400) break; + fallthrough; case 315: test_hash_speed("rmd160", sec, generic_hash_speed_template); if (mode > 300 && mode < 400) break; fallthrough; + case 316: + test_hash_speed("blake2s-256", sec, generic_hash_speed_template); + if (mode > 300 && mode < 400) break; + fallthrough; + case 317: + test_hash_speed("blake2b-512", sec, generic_hash_speed_template); + if (mode > 300 && mode < 400) break; + fallthrough; case 318: klen = 16; test_hash_speed("ghash", sec, generic_hash_speed_template); @@ -2456,10 +2480,22 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) test_ahash_speed("sha224", sec, generic_hash_speed_template); if (mode > 400 && mode < 500) break; fallthrough; + case 414: + test_ahash_speed("xxhash64", sec, generic_hash_speed_template); + if (mode > 400 && mode < 500) break; + fallthrough; case 415: test_ahash_speed("rmd160", sec, generic_hash_speed_template); if (mode > 400 && mode < 500) break; fallthrough; + case 416: + test_ahash_speed("blake2s-256", sec, generic_hash_speed_template); + if (mode > 400 && mode < 500) break; + fallthrough; + case 417: + test_ahash_speed("blake2b-512", sec, generic_hash_speed_template); + if (mode > 400 && mode < 500) break; + fallthrough; case 418: test_ahash_speed("sha3-224", sec, generic_hash_speed_template); if (mode > 400 && mode < 500) break; From 98f481f22de235b5356f9fa94b0fcffeacc772d8 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 20 May 2021 21:57:13 +0800 Subject: [PATCH 068/142] hwrng: core - Use DEVICE_ATTR_ macro Use DEVICE_ATTR_RW()/DEVICE_ATTR_RO() helper instead of plain DEVICE_ATTR, which makes the code a bit shorter and easier to read. Signed-off-by: YueHaibing Signed-off-by: Herbert Xu --- drivers/char/hw_random/core.c | 36 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 322e3d0ea98ca..a3db27916256d 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -319,9 +319,9 @@ static int enable_best_rng(void) return ret; } -static ssize_t hwrng_attr_current_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t len) +static ssize_t rng_current_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) { int err; struct hwrng *rng, *old_rng, *new_rng; @@ -354,9 +354,9 @@ static ssize_t hwrng_attr_current_store(struct device *dev, return err ? : len; } -static ssize_t hwrng_attr_current_show(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t rng_current_show(struct device *dev, + struct device_attribute *attr, + char *buf) { ssize_t ret; struct hwrng *rng; @@ -371,9 +371,9 @@ static ssize_t hwrng_attr_current_show(struct device *dev, return ret; } -static ssize_t hwrng_attr_available_show(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t rng_available_show(struct device *dev, + struct device_attribute *attr, + char *buf) { int err; struct hwrng *rng; @@ -392,22 +392,16 @@ static ssize_t hwrng_attr_available_show(struct device *dev, return strlen(buf); } -static ssize_t hwrng_attr_selected_show(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t rng_selected_show(struct device *dev, + struct device_attribute *attr, + char *buf) { return sysfs_emit(buf, "%d\n", cur_rng_set_by_user); } -static DEVICE_ATTR(rng_current, S_IRUGO | S_IWUSR, - hwrng_attr_current_show, - hwrng_attr_current_store); -static DEVICE_ATTR(rng_available, S_IRUGO, - hwrng_attr_available_show, - NULL); -static DEVICE_ATTR(rng_selected, S_IRUGO, - hwrng_attr_selected_show, - NULL); +static DEVICE_ATTR_RW(rng_current); +static DEVICE_ATTR_RO(rng_available); +static DEVICE_ATTR_RO(rng_selected); static struct attribute *rng_dev_attrs[] = { &dev_attr_rng_current.attr, From 9b7b94683a9b9c42a743d591e48b9f51f505dd1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20M=C3=BCller?= Date: Thu, 20 May 2021 21:31:11 +0200 Subject: [PATCH 069/142] crypto: DRBG - switch to HMAC SHA512 DRBG as default DRBG The default DRBG is the one that has the highest priority. The priority is defined based on the order of the list drbg_cores[] where the highest priority is given to the last entry by drbg_fill_array. With this patch the default DRBG is switched from HMAC SHA256 to HMAC SHA512 to support compliance with SP800-90B and SP800-90C (current draft). The user of the crypto API is completely unaffected by the change. Signed-off-by: Stephan Mueller Acked-by: simo Sorce Signed-off-by: Herbert Xu --- crypto/drbg.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/crypto/drbg.c b/crypto/drbg.c index 1b4587e0ddad8..ea85d4a0fe9e9 100644 --- a/crypto/drbg.c +++ b/crypto/drbg.c @@ -176,18 +176,18 @@ static const struct drbg_core drbg_cores[] = { .blocklen_bytes = 48, .cra_name = "hmac_sha384", .backend_cra_name = "hmac(sha384)", - }, { - .flags = DRBG_HMAC | DRBG_STRENGTH256, - .statelen = 64, /* block length of cipher */ - .blocklen_bytes = 64, - .cra_name = "hmac_sha512", - .backend_cra_name = "hmac(sha512)", }, { .flags = DRBG_HMAC | DRBG_STRENGTH256, .statelen = 32, /* block length of cipher */ .blocklen_bytes = 32, .cra_name = "hmac_sha256", .backend_cra_name = "hmac(sha256)", + }, { + .flags = DRBG_HMAC | DRBG_STRENGTH256, + .statelen = 64, /* block length of cipher */ + .blocklen_bytes = 64, + .cra_name = "hmac_sha512", + .backend_cra_name = "hmac(sha512)", }, #endif /* CONFIG_CRYPTO_DRBG_HMAC */ }; From 1339a7c3ba05137a2d2fe75f602311bbfc6fab33 Mon Sep 17 00:00:00 2001 From: Thara Gopinath Date: Thu, 20 May 2021 22:20:23 -0400 Subject: [PATCH 070/142] crypto: qce: skcipher: Fix incorrect sg count for dma transfers Use the sg count returned by dma_map_sg to call into dmaengine_prep_slave_sg rather than using the original sg count. dma_map_sg can merge consecutive sglist entries, thus making the original sg count wrong. This is a fix for memory coruption issues observed while testing encryption/decryption of large messages using libkcapi framework. Patch has been tested further by running full suite of tcrypt.ko tests including fuzz tests. Signed-off-by: Thara Gopinath Signed-off-by: Herbert Xu --- drivers/crypto/qce/skcipher.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/crypto/qce/skcipher.c b/drivers/crypto/qce/skcipher.c index c0a0d8c4fce19..2594184792272 100644 --- a/drivers/crypto/qce/skcipher.c +++ b/drivers/crypto/qce/skcipher.c @@ -72,7 +72,7 @@ qce_skcipher_async_req_handle(struct crypto_async_request *async_req) struct scatterlist *sg; bool diff_dst; gfp_t gfp; - int ret; + int dst_nents, src_nents, ret; rctx->iv = req->iv; rctx->ivsize = crypto_skcipher_ivsize(skcipher); @@ -123,21 +123,22 @@ qce_skcipher_async_req_handle(struct crypto_async_request *async_req) sg_mark_end(sg); rctx->dst_sg = rctx->dst_tbl.sgl; - ret = dma_map_sg(qce->dev, rctx->dst_sg, rctx->dst_nents, dir_dst); - if (ret < 0) + dst_nents = dma_map_sg(qce->dev, rctx->dst_sg, rctx->dst_nents, dir_dst); + if (dst_nents < 0) goto error_free; if (diff_dst) { - ret = dma_map_sg(qce->dev, req->src, rctx->src_nents, dir_src); - if (ret < 0) + src_nents = dma_map_sg(qce->dev, req->src, rctx->src_nents, dir_src); + if (src_nents < 0) goto error_unmap_dst; rctx->src_sg = req->src; } else { rctx->src_sg = rctx->dst_sg; + src_nents = dst_nents - 1; } - ret = qce_dma_prep_sgs(&qce->dma, rctx->src_sg, rctx->src_nents, - rctx->dst_sg, rctx->dst_nents, + ret = qce_dma_prep_sgs(&qce->dma, rctx->src_sg, src_nents, + rctx->dst_sg, dst_nents, qce_skcipher_done, async_req); if (ret) goto error_unmap_src; From abf790a9b52d91750a07bfe055aaf0f152f6d4ac Mon Sep 17 00:00:00 2001 From: Thara Gopinath Date: Thu, 20 May 2021 22:58:44 -0400 Subject: [PATCH 071/142] MAINTAINERS: Add maintainer for Qualcomm crypto drivers There is no maintainer for Qualcomm crypto drivers and we are seeing more development in this area. Add myself as the maintainer so that I can help in reviewing the changes submitted to these drivers. Signed-off-by: Thara Gopinath Signed-off-by: Herbert Xu --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 6df5a401ff92f..d478f44be7ce6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15136,6 +15136,13 @@ S: Maintained F: Documentation/devicetree/bindings/opp/qcom-nvmem-cpufreq.txt F: drivers/cpufreq/qcom-cpufreq-nvmem.c +QUALCOMM CRYPTO DRIVERS +M: Thara Gopinath +L: linux-crypto@vger.kernel.org +L: linux-arm-msm@vger.kernel.org +S: Maintained +F: drivers/crypto/qce/ + QUALCOMM EMAC GIGABIT ETHERNET DRIVER M: Timur Tabi L: netdev@vger.kernel.org From 3f4a8567b50e47da075f3ca676a899954d4c3d8d Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Fri, 21 May 2021 16:41:47 +0800 Subject: [PATCH 072/142] crypto: nx - Fix typo in comment Fix typo '@workmem' -> '@wmem'. Cc: Herbert Xu Signed-off-by: Shaokun Zhang Signed-off-by: Herbert Xu --- drivers/crypto/nx/nx-common-powernv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/nx/nx-common-powernv.c b/drivers/crypto/nx/nx-common-powernv.c index 446f611726df5..655361ba91070 100644 --- a/drivers/crypto/nx/nx-common-powernv.c +++ b/drivers/crypto/nx/nx-common-powernv.c @@ -660,8 +660,8 @@ static int nx842_powernv_compress(const unsigned char *in, unsigned int inlen, * @inlen: input buffer size * @out: output buffer pointer * @outlenp: output buffer size pointer - * @workmem: working memory buffer pointer, size determined by - * nx842_powernv_driver.workmem_size + * @wmem: working memory buffer pointer, size determined by + * nx842_powernv_driver.workmem_size * * Returns: see @nx842_powernv_exec() */ From e5764377aa54b32bfcb651f8188729e7b35e7a7c Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Fri, 21 May 2021 17:44:52 +0800 Subject: [PATCH 073/142] crypto: qce - Fix inconsistent indenting Eliminate the follow smatch warning: drivers/crypto/qce/aead.c:85 qce_aead_done() warn: inconsistent indenting. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Herbert Xu --- drivers/crypto/qce/aead.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/qce/aead.c b/drivers/crypto/qce/aead.c index d47f4171ad830..290e2446a2f35 100644 --- a/drivers/crypto/qce/aead.c +++ b/drivers/crypto/qce/aead.c @@ -82,7 +82,7 @@ static void qce_aead_done(void *data) ret = memcmp(result_buf->auth_iv, tag, ctx->authsize); if (ret) { pr_err("Bad message error\n"); - error = -EBADMSG; + error = -EBADMSG; } } From dc11803409fbf8bc5a326ddd9f24cde620b3519d Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Fri, 21 May 2021 18:02:43 +0800 Subject: [PATCH 074/142] crypto: hisilicon/qm - add dfx log if not use hardware crypto algs Add print information necessary if not use hardware crypto algs. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 8f7ea504ce80c..deb104e2bd244 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -4252,11 +4252,14 @@ static void hisi_qm_controller_reset(struct work_struct *rst_work) */ int hisi_qm_alg_register(struct hisi_qm *qm, struct hisi_qm_list *qm_list) { + struct device *dev = &qm->pdev->dev; int flag = 0; int ret = 0; - /* HW V2 not support both use uacce sva mode and hardware crypto algs */ - if (qm->ver <= QM_HW_V2 && qm->use_sva) + + if (qm->ver <= QM_HW_V2 && qm->use_sva) { + dev_info(dev, "HW V2 not both use uacce sva mode and hardware crypto algs.\n"); return 0; + } mutex_lock(&qm_list->lock); if (list_empty(&qm_list->list)) From 0dbcf1a24e6875d51c290a174a7f2526498e2836 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Fri, 21 May 2021 18:02:44 +0800 Subject: [PATCH 075/142] crypto: hisilicon/qm - fix the process of VF's list adding If Kunpeng 920 enabled the sva mode, the "qm alg register" process will return directly. So the list of VF wasn't added to QM list. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index deb104e2bd244..c671f9433716f 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -4256,17 +4256,17 @@ int hisi_qm_alg_register(struct hisi_qm *qm, struct hisi_qm_list *qm_list) int flag = 0; int ret = 0; - if (qm->ver <= QM_HW_V2 && qm->use_sva) { - dev_info(dev, "HW V2 not both use uacce sva mode and hardware crypto algs.\n"); - return 0; - } - mutex_lock(&qm_list->lock); if (list_empty(&qm_list->list)) flag = 1; list_add_tail(&qm->list, &qm_list->list); mutex_unlock(&qm_list->lock); + if (qm->ver <= QM_HW_V2 && qm->use_sva) { + dev_info(dev, "HW V2 not both use uacce sva mode and hardware crypto algs.\n"); + return 0; + } + if (flag) { ret = qm_list->register_to_crypto(qm); if (ret) { @@ -4291,13 +4291,13 @@ EXPORT_SYMBOL_GPL(hisi_qm_alg_register); */ void hisi_qm_alg_unregister(struct hisi_qm *qm, struct hisi_qm_list *qm_list) { - if (qm->ver <= QM_HW_V2 && qm->use_sva) - return; - mutex_lock(&qm_list->lock); list_del(&qm->list); mutex_unlock(&qm_list->lock); + if (qm->ver <= QM_HW_V2 && qm->use_sva) + return; + if (list_empty(&qm_list->list)) qm_list->unregister_from_crypto(qm); } From 6889fc2104e5d20899b91e61daf07a7524b2010d Mon Sep 17 00:00:00 2001 From: Hui Tang Date: Sat, 22 May 2021 10:44:28 +0800 Subject: [PATCH 076/142] crypto: ecdh - fix ecdh-nist-p192's entry in testmgr Add a comment that p192 will fail to register in FIPS mode. Fix ecdh-nist-p192's entry in testmgr by removing the ifdefs and not setting fips_allowed. Signed-off-by: Hui Tang Signed-off-by: Herbert Xu --- crypto/ecdh.c | 1 + crypto/testmgr.c | 3 --- crypto/testmgr.h | 2 -- 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/crypto/ecdh.c b/crypto/ecdh.c index 07eb34fef25b7..1974675093239 100644 --- a/crypto/ecdh.c +++ b/crypto/ecdh.c @@ -179,6 +179,7 @@ static int ecdh_init(void) { int ret; + /* NIST p192 will fail to register in FIPS mode */ ret = crypto_register_kpp(&ecdh_nist_p192); ecdh_nist_p192_registered = ret == 0; diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 10c5b3b01ec47..26e40dba9ad29 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -4899,15 +4899,12 @@ static const struct alg_test_desc alg_test_descs[] = { } }, { #endif -#ifndef CONFIG_CRYPTO_FIPS .alg = "ecdh-nist-p192", .test = alg_test_kpp, - .fips_allowed = 1, .suite = { .kpp = __VECS(ecdh_p192_tv_template) } }, { -#endif .alg = "ecdh-nist-p256", .test = alg_test_kpp, .fips_allowed = 1, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index aead75d904933..b9cf5b815532a 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -2685,7 +2685,6 @@ static const struct kpp_testvec curve25519_tv_template[] = { } }; -#ifndef CONFIG_CRYPTO_FIPS static const struct kpp_testvec ecdh_p192_tv_template[] = { { .secret = @@ -2725,7 +2724,6 @@ static const struct kpp_testvec ecdh_p192_tv_template[] = { .expected_ss_size = 24 } }; -#endif static const struct kpp_testvec ecdh_p256_tv_template[] = { { From 8fd28fa5046b377039d5bbc0ab2f625dec703980 Mon Sep 17 00:00:00 2001 From: Hui Tang Date: Sat, 22 May 2021 10:44:29 +0800 Subject: [PATCH 077/142] crypto: ecdh - fix 'ecdh_init' NIST P192 is not unregistered if failed to register NIST P256, actually it need to unregister the algorithms already registered. Signed-off-by: Hui Tang Signed-off-by: Herbert Xu --- crypto/ecdh.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/crypto/ecdh.c b/crypto/ecdh.c index 1974675093239..b6f493e828128 100644 --- a/crypto/ecdh.c +++ b/crypto/ecdh.c @@ -183,7 +183,16 @@ static int ecdh_init(void) ret = crypto_register_kpp(&ecdh_nist_p192); ecdh_nist_p192_registered = ret == 0; - return crypto_register_kpp(&ecdh_nist_p256); + ret = crypto_register_kpp(&ecdh_nist_p256); + if (ret) + goto nist_p256_error; + + return 0; + +nist_p256_error: + if (ecdh_nist_p192_registered) + crypto_unregister_kpp(&ecdh_nist_p192); + return ret; } static void ecdh_exit(void) From 8154132521e9cd6d28a7e9778c4ae23b716994bf Mon Sep 17 00:00:00 2001 From: Hui Tang Date: Sat, 22 May 2021 10:44:30 +0800 Subject: [PATCH 078/142] crypto: ecdh - register NIST P384 tfm Add ecdh_nist_p384_init_tfm and register and unregister P384 tfm. Signed-off-by: Hui Tang Signed-off-by: Herbert Xu --- crypto/ecdh.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/crypto/ecdh.c b/crypto/ecdh.c index b6f493e828128..c6f61c2211dc7 100644 --- a/crypto/ecdh.c +++ b/crypto/ecdh.c @@ -173,6 +173,31 @@ static struct kpp_alg ecdh_nist_p256 = { }, }; +static int ecdh_nist_p384_init_tfm(struct crypto_kpp *tfm) +{ + struct ecdh_ctx *ctx = ecdh_get_ctx(tfm); + + ctx->curve_id = ECC_CURVE_NIST_P384; + ctx->ndigits = ECC_CURVE_NIST_P384_DIGITS; + + return 0; +} + +static struct kpp_alg ecdh_nist_p384 = { + .set_secret = ecdh_set_secret, + .generate_public_key = ecdh_compute_value, + .compute_shared_secret = ecdh_compute_value, + .max_size = ecdh_max_size, + .init = ecdh_nist_p384_init_tfm, + .base = { + .cra_name = "ecdh-nist-p384", + .cra_driver_name = "ecdh-nist-p384-generic", + .cra_priority = 100, + .cra_module = THIS_MODULE, + .cra_ctxsize = sizeof(struct ecdh_ctx), + }, +}; + static bool ecdh_nist_p192_registered; static int ecdh_init(void) @@ -187,8 +212,15 @@ static int ecdh_init(void) if (ret) goto nist_p256_error; + ret = crypto_register_kpp(&ecdh_nist_p384); + if (ret) + goto nist_p384_error; + return 0; +nist_p384_error: + crypto_unregister_kpp(&ecdh_nist_p256); + nist_p256_error: if (ecdh_nist_p192_registered) crypto_unregister_kpp(&ecdh_nist_p192); @@ -200,6 +232,7 @@ static void ecdh_exit(void) if (ecdh_nist_p192_registered) crypto_unregister_kpp(&ecdh_nist_p192); crypto_unregister_kpp(&ecdh_nist_p256); + crypto_unregister_kpp(&ecdh_nist_p384); } subsys_initcall(ecdh_init); From 8e568fc2a71d097a5549043a39984a46262b6035 Mon Sep 17 00:00:00 2001 From: Hui Tang Date: Sat, 22 May 2021 10:44:31 +0800 Subject: [PATCH 079/142] crypto: ecdh - add test suite for NIST P384 Add test vector params for NIST P384, add test vector for NIST P384 on vector of tests. Vector param from: https://datatracker.ietf.org/doc/html/rfc5903#section-3.1 Signed-off-by: Hui Tang Signed-off-by: Herbert Xu --- crypto/testmgr.c | 7 ++++++ crypto/testmgr.h | 61 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 26e40dba9ad29..1f7f63e836ae2 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -4911,6 +4911,13 @@ static const struct alg_test_desc alg_test_descs[] = { .suite = { .kpp = __VECS(ecdh_p256_tv_template) } + }, { + .alg = "ecdh-nist-p384", + .test = alg_test_kpp, + .fips_allowed = 1, + .suite = { + .kpp = __VECS(ecdh_p384_tv_template) + } }, { .alg = "ecdsa-nist-p192", .test = alg_test_akcipher, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index b9cf5b815532a..96eb7ce9f81be 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -2811,6 +2811,67 @@ static const struct kpp_testvec ecdh_p256_tv_template[] = { } }; +/* + * NIST P384 test vectors from RFC5903 + */ +static const struct kpp_testvec ecdh_p384_tv_template[] = { + { + .secret = +#ifdef __LITTLE_ENDIAN + "\x02\x00" /* type */ + "\x36\x00" /* len */ + "\x30\x00" /* key_size */ +#else + "\x00\x02" /* type */ + "\x00\x36" /* len */ + "\x00\x30" /* key_size */ +#endif + "\x09\x9F\x3C\x70\x34\xD4\xA2\xC6" + "\x99\x88\x4D\x73\xA3\x75\xA6\x7F" + "\x76\x24\xEF\x7C\x6B\x3C\x0F\x16" + "\x06\x47\xB6\x74\x14\xDC\xE6\x55" + "\xE3\x5B\x53\x80\x41\xE6\x49\xEE" + "\x3F\xAE\xF8\x96\x78\x3A\xB1\x94", + .b_public = + "\xE5\x58\xDB\xEF\x53\xEE\xCD\xE3" + "\xD3\xFC\xCF\xC1\xAE\xA0\x8A\x89" + "\xA9\x87\x47\x5D\x12\xFD\x95\x0D" + "\x83\xCF\xA4\x17\x32\xBC\x50\x9D" + "\x0D\x1A\xC4\x3A\x03\x36\xDE\xF9" + "\x6F\xDA\x41\xD0\x77\x4A\x35\x71" + "\xDC\xFB\xEC\x7A\xAC\xF3\x19\x64" + "\x72\x16\x9E\x83\x84\x30\x36\x7F" + "\x66\xEE\xBE\x3C\x6E\x70\xC4\x16" + "\xDD\x5F\x0C\x68\x75\x9D\xD1\xFF" + "\xF8\x3F\xA4\x01\x42\x20\x9D\xFF" + "\x5E\xAA\xD9\x6D\xB9\xE6\x38\x6C", + .expected_a_public = + "\x66\x78\x42\xD7\xD1\x80\xAC\x2C" + "\xDE\x6F\x74\xF3\x75\x51\xF5\x57" + "\x55\xC7\x64\x5C\x20\xEF\x73\xE3" + "\x16\x34\xFE\x72\xB4\xC5\x5E\xE6" + "\xDE\x3A\xC8\x08\xAC\xB4\xBD\xB4" + "\xC8\x87\x32\xAE\xE9\x5F\x41\xAA" + "\x94\x82\xED\x1F\xC0\xEE\xB9\xCA" + "\xFC\x49\x84\x62\x5C\xCF\xC2\x3F" + "\x65\x03\x21\x49\xE0\xE1\x44\xAD" + "\xA0\x24\x18\x15\x35\xA0\xF3\x8E" + "\xEB\x9F\xCF\xF3\xC2\xC9\x47\xDA" + "\xE6\x9B\x4C\x63\x45\x73\xA8\x1C", + .expected_ss = + "\x11\x18\x73\x31\xC2\x79\x96\x2D" + "\x93\xD6\x04\x24\x3F\xD5\x92\xCB" + "\x9D\x0A\x92\x6F\x42\x2E\x47\x18" + "\x75\x21\x28\x7E\x71\x56\xC5\xC4" + "\xD6\x03\x13\x55\x69\xB9\xE9\xD0" + "\x9C\xF5\xD4\xA2\x70\xF5\x97\x46", + .secret_size = 54, + .b_public_size = 96, + .expected_a_public_size = 96, + .expected_ss_size = 48 + } +}; + /* * MD4 test vectors from RFC1320 */ From 9b75e311acadb978001c81400a6ba64f48bf00e8 Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Sat, 22 May 2021 14:49:21 +0800 Subject: [PATCH 080/142] crypto: hisilicon/qm - add MSI detection steps on Kunpeng930 Compared with Kunpeng920, Kunpeng930 adds MSI configuration steps to wait for the interrupt to be emptied. In order to be compatible with the kunpeng920 driver, 'set_msi' callback is added in 'hisi_qm_hw_ops' to configure hardware register. Call 'set_msi' to disable or enable MSI during reset. Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 124 +++++++++++++++++++++++++++------- 1 file changed, 100 insertions(+), 24 deletions(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index c671f9433716f..a7cd314073c24 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -161,6 +161,9 @@ #define QM_PEH_VENDOR_ID 0x1000d8 #define ACC_VENDOR_ID_VALUE 0x5a5a #define QM_PEH_DFX_INFO0 0x1000fc +#define QM_PEH_DFX_INFO1 0x100100 +#define QM_PEH_DFX_MASK (BIT(0) | BIT(2)) +#define QM_PEH_MSI_FINISH_MASK GENMASK(19, 16) #define ACC_PEH_SRIOV_CTRL_VF_MSE_SHIFT 3 #define ACC_PEH_MSI_DISABLE GENMASK(31, 0) #define ACC_MASTER_GLOBAL_CTRL_SHUTDOWN 0x1 @@ -171,6 +174,7 @@ #define QM_RAS_NFE_MBIT_DISABLE ~QM_ECC_MBIT #define ACC_AM_ROB_ECC_INT_STS 0x300104 #define ACC_ROB_ECC_ERR_MULTPL BIT(1) +#define QM_MSI_CAP_ENABLE BIT(16) #define QM_DFX_MB_CNT_VF 0x104010 #define QM_DFX_DB_CNT_VF 0x104020 @@ -352,6 +356,7 @@ struct hisi_qm_hw_ops { void (*hw_error_uninit)(struct hisi_qm *qm); enum acc_err_result (*hw_error_handle)(struct hisi_qm *qm); int (*stop_qp)(struct hisi_qp *qp); + int (*set_msi)(struct hisi_qm *qm, bool set); }; struct qm_dfx_item { @@ -1776,10 +1781,98 @@ static int qm_stop_qp(struct hisi_qp *qp) return qm_mb(qp->qm, QM_MB_CMD_STOP_QP, 0, qp->qp_id, 0); } +static int qm_set_msi(struct hisi_qm *qm, bool set) +{ + struct pci_dev *pdev = qm->pdev; + + if (set) { + pci_write_config_dword(pdev, pdev->msi_cap + PCI_MSI_MASK_64, + 0); + } else { + pci_write_config_dword(pdev, pdev->msi_cap + PCI_MSI_MASK_64, + ACC_PEH_MSI_DISABLE); + if (qm->err_status.is_qm_ecc_mbit || + qm->err_status.is_dev_ecc_mbit) + return 0; + + mdelay(1); + if (readl(qm->io_base + QM_PEH_DFX_INFO0)) + return -EFAULT; + } + + return 0; +} + +static void qm_wait_msi_finish(struct hisi_qm *qm) +{ + struct pci_dev *pdev = qm->pdev; + u32 cmd = ~0; + int cnt = 0; + u32 val; + int ret; + + while (true) { + pci_read_config_dword(pdev, pdev->msi_cap + + PCI_MSI_PENDING_64, &cmd); + if (!cmd) + break; + + if (++cnt > MAX_WAIT_COUNTS) { + pci_warn(pdev, "failed to empty MSI PENDING!\n"); + break; + } + + udelay(1); + } + + ret = readl_relaxed_poll_timeout(qm->io_base + QM_PEH_DFX_INFO0, + val, !(val & QM_PEH_DFX_MASK), + POLL_PERIOD, POLL_TIMEOUT); + if (ret) + pci_warn(pdev, "failed to empty PEH MSI!\n"); + + ret = readl_relaxed_poll_timeout(qm->io_base + QM_PEH_DFX_INFO1, + val, !(val & QM_PEH_MSI_FINISH_MASK), + POLL_PERIOD, POLL_TIMEOUT); + if (ret) + pci_warn(pdev, "failed to finish MSI operation!\n"); +} + +static int qm_set_msi_v3(struct hisi_qm *qm, bool set) +{ + struct pci_dev *pdev = qm->pdev; + int ret = -ETIMEDOUT; + u32 cmd, i; + + pci_read_config_dword(pdev, pdev->msi_cap, &cmd); + if (set) + cmd |= QM_MSI_CAP_ENABLE; + else + cmd &= ~QM_MSI_CAP_ENABLE; + + pci_write_config_dword(pdev, pdev->msi_cap, cmd); + if (set) { + for (i = 0; i < MAX_WAIT_COUNTS; i++) { + pci_read_config_dword(pdev, pdev->msi_cap, &cmd); + if (cmd & QM_MSI_CAP_ENABLE) + return 0; + + udelay(1); + } + } else { + udelay(WAIT_PERIOD_US_MIN); + qm_wait_msi_finish(qm); + ret = 0; + } + + return ret; +} + static const struct hisi_qm_hw_ops qm_hw_ops_v1 = { .qm_db = qm_db_v1, .get_irq_num = qm_get_irq_num_v1, .hw_error_init = qm_hw_error_init_v1, + .set_msi = qm_set_msi, }; static const struct hisi_qm_hw_ops qm_hw_ops_v2 = { @@ -1789,6 +1882,7 @@ static const struct hisi_qm_hw_ops qm_hw_ops_v2 = { .hw_error_init = qm_hw_error_init_v2, .hw_error_uninit = qm_hw_error_uninit_v2, .hw_error_handle = qm_hw_error_handle_v2, + .set_msi = qm_set_msi, }; static const struct hisi_qm_hw_ops qm_hw_ops_v3 = { @@ -1799,6 +1893,7 @@ static const struct hisi_qm_hw_ops qm_hw_ops_v3 = { .hw_error_uninit = qm_hw_error_uninit_v3, .hw_error_handle = qm_hw_error_handle_v2, .stop_qp = qm_stop_qp, + .set_msi = qm_set_msi_v3, }; static void *qm_get_avail_sqe(struct hisi_qp *qp) @@ -3586,6 +3681,9 @@ static int qm_check_req_recv(struct hisi_qm *qm) int ret; u32 val; + if (qm->ver >= QM_HW_V3) + return 0; + writel(ACC_VENDOR_ID_VALUE, qm->io_base + QM_PEH_VENDOR_ID); ret = readl_relaxed_poll_timeout(qm->io_base + QM_PEH_VENDOR_ID, val, (val == ACC_VENDOR_ID_VALUE), @@ -3656,28 +3754,6 @@ static int qm_set_vf_mse(struct hisi_qm *qm, bool set) return -ETIMEDOUT; } -static int qm_set_msi(struct hisi_qm *qm, bool set) -{ - struct pci_dev *pdev = qm->pdev; - - if (set) { - pci_write_config_dword(pdev, pdev->msi_cap + PCI_MSI_MASK_64, - 0); - } else { - pci_write_config_dword(pdev, pdev->msi_cap + PCI_MSI_MASK_64, - ACC_PEH_MSI_DISABLE); - if (qm->err_status.is_qm_ecc_mbit || - qm->err_status.is_dev_ecc_mbit) - return 0; - - mdelay(1); - if (readl(qm->io_base + QM_PEH_DFX_INFO0)) - return -EFAULT; - } - - return 0; -} - static int qm_vf_reset_prepare(struct hisi_qm *qm, enum qm_stop_reason stop_reason) { @@ -3800,7 +3876,7 @@ static int qm_soft_reset(struct hisi_qm *qm) } } - ret = qm_set_msi(qm, false); + ret = qm->ops->set_msi(qm, false); if (ret) { pci_err(pdev, "Fails to disable PEH MSI bit.\n"); return ret; @@ -3941,7 +4017,7 @@ static int qm_controller_reset_done(struct hisi_qm *qm) struct pci_dev *pdev = qm->pdev; int ret; - ret = qm_set_msi(qm, true); + ret = qm->ops->set_msi(qm, true); if (ret) { pci_err(pdev, "Fails to enable PEH MSI bit!\n"); return ret; From a5c164b195a89aedc8179d68cedf00e7f8baa58e Mon Sep 17 00:00:00 2001 From: Longfang Liu Date: Sat, 22 May 2021 15:30:04 +0800 Subject: [PATCH 081/142] crypto: hisilicon/qm - support address prefetching Kunpeng930 hardware supports address prefetching to improve performance before doing tasks in SVA scenario. This patch enables this function in device initialization by writing hardware registers. In the process of reset, address prefetching is disabled to avoid the failure of interaction between accelerator device and SMMU. Signed-off-by: Longfang Liu Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_main.c | 50 +++++++++++++++++++++++ drivers/crypto/hisilicon/qm.c | 35 ++++++++++++++++ drivers/crypto/hisilicon/qm.h | 2 + drivers/crypto/hisilicon/sec2/sec_main.c | 45 ++++++++++++++++++++ drivers/crypto/hisilicon/zip/zip_main.c | 50 +++++++++++++++++++++++ 5 files changed, 182 insertions(+) diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index 13323baf393e8..37c5296008474 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -75,6 +75,11 @@ #define HPRE_BD_USR_MASK GENMASK(1, 0) #define HPRE_CLUSTER_CORE_MASK_V2 GENMASK(3, 0) #define HPRE_CLUSTER_CORE_MASK_V3 GENMASK(7, 0) +#define HPRE_PREFETCH_CFG 0x301130 +#define HPRE_SVA_PREFTCH_DFX 0x30115C +#define HPRE_PREFETCH_ENABLE (~(BIT(0) | BIT(30))) +#define HPRE_PREFETCH_DISABLE BIT(30) +#define HPRE_SVA_DISABLE_READY (BIT(4) | BIT(8)) #define HPRE_AM_OOO_SHUTDOWN_ENB 0x301044 #define HPRE_AM_OOO_SHUTDOWN_ENABLE BIT(0) @@ -370,6 +375,47 @@ static void disable_flr_of_bme(struct hisi_qm *qm) writel(PEH_AXUSER_CFG_ENABLE, qm->io_base + QM_PEH_AXUSER_CFG_ENABLE); } +static void hpre_open_sva_prefetch(struct hisi_qm *qm) +{ + u32 val; + int ret; + + if (qm->ver < QM_HW_V3) + return; + + /* Enable prefetch */ + val = readl_relaxed(qm->io_base + HPRE_PREFETCH_CFG); + val &= HPRE_PREFETCH_ENABLE; + writel(val, qm->io_base + HPRE_PREFETCH_CFG); + + ret = readl_relaxed_poll_timeout(qm->io_base + HPRE_PREFETCH_CFG, + val, !(val & HPRE_PREFETCH_DISABLE), + HPRE_REG_RD_INTVRL_US, + HPRE_REG_RD_TMOUT_US); + if (ret) + pci_err(qm->pdev, "failed to open sva prefetch\n"); +} + +static void hpre_close_sva_prefetch(struct hisi_qm *qm) +{ + u32 val; + int ret; + + if (qm->ver < QM_HW_V3) + return; + + val = readl_relaxed(qm->io_base + HPRE_PREFETCH_CFG); + val |= HPRE_PREFETCH_DISABLE; + writel(val, qm->io_base + HPRE_PREFETCH_CFG); + + ret = readl_relaxed_poll_timeout(qm->io_base + HPRE_SVA_PREFTCH_DFX, + val, !(val & HPRE_SVA_DISABLE_READY), + HPRE_REG_RD_INTVRL_US, + HPRE_REG_RD_TMOUT_US); + if (ret) + pci_err(qm->pdev, "failed to close sva prefetch\n"); +} + static int hpre_set_user_domain_and_cache(struct hisi_qm *qm) { struct device *dev = &qm->pdev->dev; @@ -876,6 +922,8 @@ static const struct hisi_qm_err_ini hpre_err_ini = { .clear_dev_hw_err_status = hpre_clear_hw_err_status, .log_dev_hw_err = hpre_log_hw_error, .open_axi_master_ooo = hpre_open_axi_master_ooo, + .open_sva_prefetch = hpre_open_sva_prefetch, + .close_sva_prefetch = hpre_close_sva_prefetch, .err_info_init = hpre_err_info_init, }; @@ -888,6 +936,8 @@ static int hpre_pf_probe_init(struct hpre *hpre) if (ret) return ret; + hpre_open_sva_prefetch(qm); + qm->err_ini = &hpre_err_ini; qm->err_ini->err_info_init(qm); hisi_qm_dev_err_init(qm); diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index a7cd314073c24..fe35ea949a5bb 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -95,6 +95,7 @@ #define QM_DOORBELL_SQ_CQ_BASE_V2 0x1000 #define QM_DOORBELL_EQ_AEQ_BASE_V2 0x2000 #define QM_QUE_ISO_CFG_V 0x0030 +#define QM_PAGE_SIZE 0x0034 #define QM_QUE_ISO_EN 0x100154 #define QM_CAPBILITY 0x100158 #define QM_QP_NUN_MASK GENMASK(10, 0) @@ -796,6 +797,32 @@ static void qm_init_qp_status(struct hisi_qp *qp) atomic_set(&qp_status->used, 0); } +static void qm_init_prefetch(struct hisi_qm *qm) +{ + struct device *dev = &qm->pdev->dev; + u32 page_type = 0x0; + + if (qm->ver < QM_HW_V3) + return; + + switch (PAGE_SIZE) { + case SZ_4K: + page_type = 0x0; + break; + case SZ_16K: + page_type = 0x1; + break; + case SZ_64K: + page_type = 0x2; + break; + default: + dev_err(dev, "system page size is not support: %lu, default set to 4KB", + PAGE_SIZE); + } + + writel(page_type, qm->io_base + QM_PAGE_SIZE); +} + static void qm_vft_data_cfg(struct hisi_qm *qm, enum vft_type type, u32 base, u32 number) { @@ -2974,6 +3001,8 @@ static int __hisi_qm_start(struct hisi_qm *qm) if (ret) return ret; + qm_init_prefetch(qm); + writel(0x0, qm->io_base + QM_VF_EQ_INT_MASK); writel(0x0, qm->io_base + QM_VF_AEQ_INT_MASK); @@ -3898,6 +3927,9 @@ static int qm_soft_reset(struct hisi_qm *qm) return ret; } + if (qm->err_ini->close_sva_prefetch) + qm->err_ini->close_sva_prefetch(qm); + ret = qm_set_pf_mse(qm, false); if (ret) { pci_err(pdev, "Fails to disable pf MSE bit.\n"); @@ -3967,6 +3999,9 @@ static void qm_restart_prepare(struct hisi_qm *qm) { u32 value; + if (qm->err_ini->open_sva_prefetch) + qm->err_ini->open_sva_prefetch(qm); + if (qm->ver >= QM_HW_V3) return; diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h index acefdf8b3a50e..9048aa6e5f8ab 100644 --- a/drivers/crypto/hisilicon/qm.h +++ b/drivers/crypto/hisilicon/qm.h @@ -188,6 +188,8 @@ struct hisi_qm_err_ini { void (*clear_dev_hw_err_status)(struct hisi_qm *qm, u32 err_sts); void (*open_axi_master_ooo)(struct hisi_qm *qm); void (*close_axi_master_ooo)(struct hisi_qm *qm); + void (*open_sva_prefetch)(struct hisi_qm *qm); + void (*close_sva_prefetch)(struct hisi_qm *qm); void (*log_dev_hw_err)(struct hisi_qm *qm, u32 err_sts); void (*err_info_init)(struct hisi_qm *qm); }; diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 6a4408ea18c1c..8ab4e67b8a417 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -85,6 +85,12 @@ #define SEC_USER1_SMMU_MASK (~SEC_USER1_SVA_SET) #define SEC_CORE_INT_STATUS_M_ECC BIT(2) +#define SEC_PREFETCH_CFG 0x301130 +#define SEC_SVA_TRANS 0x301EC4 +#define SEC_PREFETCH_ENABLE (~(BIT(0) | BIT(1) | BIT(11))) +#define SEC_PREFETCH_DISABLE BIT(1) +#define SEC_SVA_DISABLE_READY (BIT(7) | BIT(11)) + #define SEC_DELAY_10_US 10 #define SEC_POLL_TIMEOUT_US 1000 #define SEC_DBGFS_VAL_MAX_LEN 20 @@ -332,6 +338,42 @@ static u8 sec_get_endian(struct hisi_qm *qm) return SEC_64BE; } +static void sec_open_sva_prefetch(struct hisi_qm *qm) +{ + u32 val; + int ret; + + if (qm->ver < QM_HW_V3) + return; + + /* Enable prefetch */ + val = readl_relaxed(qm->io_base + SEC_PREFETCH_CFG); + val &= SEC_PREFETCH_ENABLE; + writel(val, qm->io_base + SEC_PREFETCH_CFG); + + ret = readl_relaxed_poll_timeout(qm->io_base + SEC_PREFETCH_CFG, + val, !(val & SEC_PREFETCH_DISABLE), + SEC_DELAY_10_US, SEC_POLL_TIMEOUT_US); + if (ret) + pci_err(qm->pdev, "failed to open sva prefetch\n"); +} + +static void sec_close_sva_prefetch(struct hisi_qm *qm) +{ + u32 val; + int ret; + + val = readl_relaxed(qm->io_base + SEC_PREFETCH_CFG); + val |= SEC_PREFETCH_DISABLE; + writel(val, qm->io_base + SEC_PREFETCH_CFG); + + ret = readl_relaxed_poll_timeout(qm->io_base + SEC_SVA_TRANS, + val, !(val & SEC_SVA_DISABLE_READY), + SEC_DELAY_10_US, SEC_POLL_TIMEOUT_US); + if (ret) + pci_err(qm->pdev, "failed to close sva prefetch\n"); +} + static int sec_engine_init(struct hisi_qm *qm) { int ret; @@ -751,6 +793,8 @@ static const struct hisi_qm_err_ini sec_err_ini = { .clear_dev_hw_err_status = sec_clear_hw_err_status, .log_dev_hw_err = sec_log_hw_error, .open_axi_master_ooo = sec_open_axi_master_ooo, + .open_sva_prefetch = sec_open_sva_prefetch, + .close_sva_prefetch = sec_close_sva_prefetch, .err_info_init = sec_err_info_init, }; @@ -766,6 +810,7 @@ static int sec_pf_probe_init(struct sec_dev *sec) if (ret) return ret; + sec_open_sva_prefetch(qm); hisi_qm_dev_err_init(qm); sec_debug_regs_clear(qm); diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index 3e23f2a1cf5a5..9e4c49cd6f3ab 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -97,6 +97,14 @@ #define HZIP_RD_CNT_CLR_CE_EN (HZIP_CNT_CLR_CE_EN | \ HZIP_RO_CNT_CLR_CE_EN) +#define HZIP_PREFETCH_CFG 0x3011B0 +#define HZIP_SVA_TRANS 0x3011C4 +#define HZIP_PREFETCH_ENABLE (~(BIT(26) | BIT(17) | BIT(0))) +#define HZIP_SVA_PREFETCH_DISABLE BIT(26) +#define HZIP_SVA_DISABLE_READY (BIT(26) | BIT(30)) +#define HZIP_DELAY_1_US 1 +#define HZIP_POLL_TIMEOUT_US 1000 + static const char hisi_zip_name[] = "hisi_zip"; static struct dentry *hzip_debugfs_root; @@ -263,6 +271,45 @@ int zip_create_qps(struct hisi_qp **qps, int qp_num, int node) return hisi_qm_alloc_qps_node(&zip_devices, qp_num, 0, node, qps); } +static void hisi_zip_open_sva_prefetch(struct hisi_qm *qm) +{ + u32 val; + int ret; + + if (qm->ver < QM_HW_V3) + return; + + /* Enable prefetch */ + val = readl_relaxed(qm->io_base + HZIP_PREFETCH_CFG); + val &= HZIP_PREFETCH_ENABLE; + writel(val, qm->io_base + HZIP_PREFETCH_CFG); + + ret = readl_relaxed_poll_timeout(qm->io_base + HZIP_PREFETCH_CFG, + val, !(val & HZIP_SVA_PREFETCH_DISABLE), + HZIP_DELAY_1_US, HZIP_POLL_TIMEOUT_US); + if (ret) + pci_err(qm->pdev, "failed to open sva prefetch\n"); +} + +static void hisi_zip_close_sva_prefetch(struct hisi_qm *qm) +{ + u32 val; + int ret; + + if (qm->ver < QM_HW_V3) + return; + + val = readl_relaxed(qm->io_base + HZIP_PREFETCH_CFG); + val |= HZIP_SVA_PREFETCH_DISABLE; + writel(val, qm->io_base + HZIP_PREFETCH_CFG); + + ret = readl_relaxed_poll_timeout(qm->io_base + HZIP_SVA_TRANS, + val, !(val & HZIP_SVA_DISABLE_READY), + HZIP_DELAY_1_US, HZIP_POLL_TIMEOUT_US); + if (ret) + pci_err(qm->pdev, "failed to close sva prefetch\n"); +} + static int hisi_zip_set_user_domain_and_cache(struct hisi_qm *qm) { void __iomem *base = qm->io_base; @@ -696,6 +743,8 @@ static const struct hisi_qm_err_ini hisi_zip_err_ini = { .log_dev_hw_err = hisi_zip_log_hw_error, .open_axi_master_ooo = hisi_zip_open_axi_master_ooo, .close_axi_master_ooo = hisi_zip_close_axi_master_ooo, + .open_sva_prefetch = hisi_zip_open_sva_prefetch, + .close_sva_prefetch = hisi_zip_close_sva_prefetch, .err_info_init = hisi_zip_err_info_init, }; @@ -714,6 +763,7 @@ static int hisi_zip_pf_probe_init(struct hisi_zip *hisi_zip) qm->err_ini->err_info_init(qm); hisi_zip_set_user_domain_and_cache(qm); + hisi_zip_open_sva_prefetch(qm); hisi_qm_dev_err_init(qm); hisi_zip_debug_regs_clear(qm); From e7662cb9e99ef0fd15b8a0dcb3e5d7b32f9812d4 Mon Sep 17 00:00:00 2001 From: Zou Wei Date: Mon, 24 May 2021 17:47:08 +0800 Subject: [PATCH 082/142] crypto: hisilicon - switch to memdup_user_nul() Use memdup_user_nul() helper instead of open-coding to simplify the code. v1-->v2: fixed patch title error v2-->v3: return the actual error Reported-by: Hulk Robot Signed-off-by: Zou Wei Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index fe35ea949a5bb..7c1f8ab28f995 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -1603,16 +1603,9 @@ static ssize_t qm_cmd_write(struct file *filp, const char __user *buffer, if (count > QM_DBG_WRITE_LEN) return -ENOSPC; - cmd_buf = kzalloc(count + 1, GFP_KERNEL); - if (!cmd_buf) - return -ENOMEM; - - if (copy_from_user(cmd_buf, buffer, count)) { - kfree(cmd_buf); - return -EFAULT; - } - - cmd_buf[count] = '\0'; + cmd_buf = memdup_user_nul(buffer, count); + if (IS_ERR(cmd_buf)) + return PTR_ERR(cmd_buf); cmd_buf_tmp = strchr(cmd_buf, '\n'); if (cmd_buf_tmp) { From 5d0421d65be8c02bdde7a44f153babeaf004db7a Mon Sep 17 00:00:00 2001 From: Tian Tao Date: Mon, 24 May 2021 19:53:38 +0800 Subject: [PATCH 083/142] hwrng: exynos - Use pm_runtime_resume_and_get() to replace open coding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit use pm_runtime_resume_and_get() to replace pm_runtime_get_sync and pm_runtime_put_noidle. this change is just to simplify the code, no actual functional changes. Signed-off-by: Tian Tao Reviewed-by: Krzysztof Kozlowski Acked-by: Łukasz Stelmach Signed-off-by: Herbert Xu --- drivers/char/hw_random/exynos-trng.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/char/hw_random/exynos-trng.c b/drivers/char/hw_random/exynos-trng.c index c8db62bc5ff72..9cc3d542dd0f4 100644 --- a/drivers/char/hw_random/exynos-trng.c +++ b/drivers/char/hw_random/exynos-trng.c @@ -196,10 +196,9 @@ static int __maybe_unused exynos_trng_resume(struct device *dev) { int ret; - ret = pm_runtime_get_sync(dev); + ret = pm_runtime_resume_and_get(dev); if (ret < 0) { dev_err(dev, "Could not get runtime PM.\n"); - pm_runtime_put_noidle(dev); return ret; } From b21d14d9885ace8587a5b5b36cdcda9d8814f313 Mon Sep 17 00:00:00 2001 From: Tian Tao Date: Mon, 24 May 2021 20:20:57 +0800 Subject: [PATCH 084/142] hwrng: omap - Use pm_runtime_resume_and_get() to replace open coding use pm_runtime_resume_and_get() to replace pm_runtime_get_sync and pm_runtime_put_noidle. this change is just to simplify the code, no actual functional changes. Signed-off-by: Tian Tao Signed-off-by: Herbert Xu --- drivers/char/hw_random/omap-rng.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/char/hw_random/omap-rng.c b/drivers/char/hw_random/omap-rng.c index cede9f1591029..00ff96703dd25 100644 --- a/drivers/char/hw_random/omap-rng.c +++ b/drivers/char/hw_random/omap-rng.c @@ -454,10 +454,9 @@ static int omap_rng_probe(struct platform_device *pdev) } pm_runtime_enable(&pdev->dev); - ret = pm_runtime_get_sync(&pdev->dev); + ret = pm_runtime_resume_and_get(&pdev->dev); if (ret < 0) { dev_err(&pdev->dev, "Failed to runtime_get device: %d\n", ret); - pm_runtime_put_noidle(&pdev->dev); goto err_ioremap; } @@ -543,10 +542,9 @@ static int __maybe_unused omap_rng_resume(struct device *dev) struct omap_rng_dev *priv = dev_get_drvdata(dev); int ret; - ret = pm_runtime_get_sync(dev); + ret = pm_runtime_resume_and_get(dev); if (ret < 0) { dev_err(dev, "Failed to runtime_get device: %d\n", ret); - pm_runtime_put_noidle(dev); return ret; } From e9009fb227fa66a66cef02a36fb51c288f411e0d Mon Sep 17 00:00:00 2001 From: Tian Tao Date: Mon, 24 May 2021 20:28:38 +0800 Subject: [PATCH 085/142] hwrng: ks-sa - Use pm_runtime_resume_and_get() to replace open coding use pm_runtime_resume_and_get() to replace pm_runtime_get_sync and pm_runtime_put_noidle. this change is just to simplify the code, no actual functional changes. Signed-off-by: Tian Tao Signed-off-by: Herbert Xu --- drivers/char/hw_random/ks-sa-rng.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/char/hw_random/ks-sa-rng.c b/drivers/char/hw_random/ks-sa-rng.c index 8f1d47ff97996..2f2f21f1b659e 100644 --- a/drivers/char/hw_random/ks-sa-rng.c +++ b/drivers/char/hw_random/ks-sa-rng.c @@ -241,10 +241,9 @@ static int ks_sa_rng_probe(struct platform_device *pdev) } pm_runtime_enable(dev); - ret = pm_runtime_get_sync(dev); + ret = pm_runtime_resume_and_get(dev); if (ret < 0) { dev_err(dev, "Failed to enable SA power-domain\n"); - pm_runtime_put_noidle(dev); pm_runtime_disable(dev); return ret; } From 7551a074700a4093f5556a5ae51c1f83ea6b96ba Mon Sep 17 00:00:00 2001 From: Wu Bo Date: Tue, 25 May 2021 16:15:19 +0800 Subject: [PATCH 086/142] crypto: af_alg - use DIV_ROUND_UP helper macro for calculations Replace open coded divisor calculations with the DIV_ROUND_UP kernel macro for better readability. Signed-off-by: Wu Bo Signed-off-by: Herbert Xu --- crypto/af_alg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/af_alg.c b/crypto/af_alg.c index 18cc82dc4a42f..8bd288d2b089b 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -411,7 +411,7 @@ int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len) if (n < 0) return n; - npages = (off + n + PAGE_SIZE - 1) >> PAGE_SHIFT; + npages = DIV_ROUND_UP(off + n, PAGE_SIZE); if (WARN_ON(npages == 0)) return -EINVAL; /* Add one extra for linking */ From f5a6bf077126a1ac8a5c489022531e72a088603e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 25 May 2021 10:30:46 +0200 Subject: [PATCH 087/142] crypto: ixp4xx - convert to platform driver The ixp4xx_crypto driver traditionally registers a bare platform device without attaching it to a driver, and detects the hardware at module init time by reading an SoC specific hardware register. Change this to the conventional method of registering the platform device from the platform code itself when the device is present, turning the module_init/module_exit functions into probe/release driver callbacks. This enables compile-testing as well as potentially having ixp4xx coexist with other ARMv5 platforms in the same kernel in the future. Cc: Corentin Labbe Tested-by: Corentin Labbe Signed-off-by: Arnd Bergmann Signed-off-by: Linus Walleij Signed-off-by: Herbert Xu --- arch/arm/mach-ixp4xx/common.c | 26 ++++++++++++++++++++++++ drivers/crypto/ixp4xx_crypto.c | 37 ++++++++++++---------------------- 2 files changed, 39 insertions(+), 24 deletions(-) diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c index 000f672a94c97..007a44412e240 100644 --- a/arch/arm/mach-ixp4xx/common.c +++ b/arch/arm/mach-ixp4xx/common.c @@ -233,12 +233,38 @@ static struct platform_device *ixp46x_devices[] __initdata = { unsigned long ixp4xx_exp_bus_size; EXPORT_SYMBOL(ixp4xx_exp_bus_size); +static struct platform_device_info ixp_dev_info __initdata = { + .name = "ixp4xx_crypto", + .id = 0, + .dma_mask = DMA_BIT_MASK(32), +}; + +static int __init ixp_crypto_register(void) +{ + struct platform_device *pdev; + + if (!(~(*IXP4XX_EXP_CFG2) & (IXP4XX_FEATURE_HASH | + IXP4XX_FEATURE_AES | IXP4XX_FEATURE_DES))) { + printk(KERN_ERR "ixp_crypto: No HW crypto available\n"); + return -ENODEV; + } + + pdev = platform_device_register_full(&ixp_dev_info); + if (IS_ERR(pdev)) + return PTR_ERR(pdev); + + return 0; +} + void __init ixp4xx_sys_init(void) { ixp4xx_exp_bus_size = SZ_16M; platform_add_devices(ixp4xx_devices, ARRAY_SIZE(ixp4xx_devices)); + if (IS_ENABLED(CONFIG_CRYPTO_DEV_IXP4XX)) + ixp_crypto_register(); + if (cpu_is_ixp46x()) { int region; diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c index b38650b0fea10..76099d6cfff9a 100644 --- a/drivers/crypto/ixp4xx_crypto.c +++ b/drivers/crypto/ixp4xx_crypto.c @@ -229,8 +229,6 @@ static dma_addr_t crypt_phys; static int support_aes = 1; -#define DRIVER_NAME "ixp4xx_crypto" - static struct platform_device *pdev; static inline dma_addr_t crypt_virt2phys(struct crypt_ctl *virt) @@ -453,11 +451,6 @@ static int init_ixp_crypto(struct device *dev) int ret = -ENODEV; u32 msg[2] = { 0, 0 }; - if (! ( ~(*IXP4XX_EXP_CFG2) & (IXP4XX_FEATURE_HASH | - IXP4XX_FEATURE_AES | IXP4XX_FEATURE_DES))) { - dev_err(dev, "ixp_crypto: No HW crypto available\n"); - return ret; - } npe_c = npe_request(NPE_ID); if (!npe_c) return ret; @@ -1441,26 +1434,17 @@ static struct ixp_aead_alg ixp4xx_aeads[] = { #define IXP_POSTFIX "-ixp4xx" -static const struct platform_device_info ixp_dev_info __initdata = { - .name = DRIVER_NAME, - .id = 0, - .dma_mask = DMA_BIT_MASK(32), -}; - -static int __init ixp_module_init(void) +static int ixp_crypto_probe(struct platform_device *_pdev) { int num = ARRAY_SIZE(ixp4xx_algos); int i, err; - pdev = platform_device_register_full(&ixp_dev_info); - if (IS_ERR(pdev)) - return PTR_ERR(pdev); + pdev = _pdev; err = init_ixp_crypto(&pdev->dev); - if (err) { - platform_device_unregister(pdev); + if (err) return err; - } + for (i = 0; i < num; i++) { struct skcipher_alg *cra = &ixp4xx_algos[i].crypto; @@ -1531,7 +1515,7 @@ static int __init ixp_module_init(void) return 0; } -static void __exit ixp_module_exit(void) +static int ixp_crypto_remove(struct platform_device *pdev) { int num = ARRAY_SIZE(ixp4xx_algos); int i; @@ -1546,11 +1530,16 @@ static void __exit ixp_module_exit(void) crypto_unregister_skcipher(&ixp4xx_algos[i].crypto); } release_ixp_crypto(&pdev->dev); - platform_device_unregister(pdev); + + return 0; } -module_init(ixp_module_init); -module_exit(ixp_module_exit); +static struct platform_driver ixp_crypto_driver = { + .probe = ixp_crypto_probe, + .remove = ixp_crypto_remove, + .driver = { .name = "ixp4xx_crypto" }, +}; +module_platform_driver(ixp_crypto_driver); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Christian Hohnstaedt "); From 937264905aa21655cb1142146997f211153e6e27 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 25 May 2021 10:48:46 +0200 Subject: [PATCH 088/142] crypto: ixp4xx - Add DT bindings This adds device tree bindings for the ixp4xx crypto engine. Cc: Corentin Labbe Cc: devicetree@vger.kernel.org Signed-off-by: Linus Walleij Reviewed-by: Rob Herring Signed-off-by: Herbert Xu --- .../bindings/crypto/intel,ixp4xx-crypto.yaml | 47 +++++++++++++++++++ ...ntel,ixp4xx-network-processing-engine.yaml | 22 +++++++-- 2 files changed, 65 insertions(+), 4 deletions(-) create mode 100644 Documentation/devicetree/bindings/crypto/intel,ixp4xx-crypto.yaml diff --git a/Documentation/devicetree/bindings/crypto/intel,ixp4xx-crypto.yaml b/Documentation/devicetree/bindings/crypto/intel,ixp4xx-crypto.yaml new file mode 100644 index 0000000000000..9c53c27bd20ac --- /dev/null +++ b/Documentation/devicetree/bindings/crypto/intel,ixp4xx-crypto.yaml @@ -0,0 +1,47 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +# Copyright 2018 Linaro Ltd. +%YAML 1.2 +--- +$id: "http://devicetree.org/schemas/crypto/intel,ixp4xx-crypto.yaml#" +$schema: "http://devicetree.org/meta-schemas/core.yaml#" + +title: Intel IXP4xx cryptographic engine + +maintainers: + - Linus Walleij + +description: | + The Intel IXP4xx cryptographic engine makes use of the IXP4xx NPE + (Network Processing Engine). Since it is not a device on its own + it is defined as a subnode of the NPE, if crypto support is + available on the platform. + +properties: + compatible: + const: intel,ixp4xx-crypto + + intel,npe-handle: + $ref: '/schemas/types.yaml#/definitions/phandle-array' + maxItems: 1 + description: phandle to the NPE this crypto engine is using, the cell + describing the NPE instance to be used. + + queue-rx: + $ref: /schemas/types.yaml#/definitions/phandle-array + maxItems: 1 + description: phandle to the RX queue on the NPE, the cell describing + the queue instance to be used. + + queue-txready: + $ref: /schemas/types.yaml#/definitions/phandle-array + maxItems: 1 + description: phandle to the TX READY queue on the NPE, the cell describing + the queue instance to be used. + +required: + - compatible + - intel,npe-handle + - queue-rx + - queue-txready + +additionalProperties: false diff --git a/Documentation/devicetree/bindings/firmware/intel,ixp4xx-network-processing-engine.yaml b/Documentation/devicetree/bindings/firmware/intel,ixp4xx-network-processing-engine.yaml index 1bd2870c3a9c9..c435c9f369a41 100644 --- a/Documentation/devicetree/bindings/firmware/intel,ixp4xx-network-processing-engine.yaml +++ b/Documentation/devicetree/bindings/firmware/intel,ixp4xx-network-processing-engine.yaml @@ -26,9 +26,16 @@ properties: reg: items: - - description: NPE0 register range - - description: NPE1 register range - - description: NPE2 register range + - description: NPE0 (NPE-A) register range + - description: NPE1 (NPE-B) register range + - description: NPE2 (NPE-C) register range + + crypto: + $ref: /schemas/crypto/intel,ixp4xx-crypto.yaml# + type: object + description: Optional node for the embedded crypto engine, the node + should be named with the instance number of the NPE engine used for + the crypto engine. required: - compatible @@ -38,8 +45,15 @@ additionalProperties: false examples: - | - npe@c8006000 { + npe: npe@c8006000 { compatible = "intel,ixp4xx-network-processing-engine"; reg = <0xc8006000 0x1000>, <0xc8007000 0x1000>, <0xc8008000 0x1000>; + + crypto { + compatible = "intel,ixp4xx-crypto"; + intel,npe-handle = <&npe 2>; + queue-rx = <&qmgr 30>; + queue-txready = <&qmgr 29>; + }; }; ... From 76f24b4f46b8ca380d6e2c91bd84e0e47a9f4bcd Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 25 May 2021 10:50:56 +0200 Subject: [PATCH 089/142] crypto: ixp4xx - Add device tree support This makes the IXP4xx driver probe from the device tree and retrieve the NPE and two queue manager handled used to process crypto from the device tree. As the crypto engine is topologically a part of the NPE hardware, we augment the NPE driver to spawn the crypto engine as a child. The platform data probe path is going away in due time, for now it is an isolated else clause. Cc: Corentin Labbe Signed-off-by: Linus Walleij Signed-off-by: Herbert Xu --- drivers/crypto/ixp4xx_crypto.c | 107 +++++++++++++++++++++++--------- drivers/soc/ixp4xx/ixp4xx-npe.c | 7 +++ 2 files changed, 86 insertions(+), 28 deletions(-) diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c index 76099d6cfff9a..35fc5ee704915 100644 --- a/drivers/crypto/ixp4xx_crypto.c +++ b/drivers/crypto/ixp4xx_crypto.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -71,15 +72,11 @@ #define MOD_AES256 (0x0a00 | KEYLEN_256) #define MAX_IVLEN 16 -#define NPE_ID 2 /* NPE C */ #define NPE_QLEN 16 /* Space for registering when the first * NPE_QLEN crypt_ctl are busy */ #define NPE_QLEN_TOTAL 64 -#define SEND_QID 29 -#define RECV_QID 30 - #define CTL_FLAG_UNUSED 0x0000 #define CTL_FLAG_USED 0x1000 #define CTL_FLAG_PERFORM_ABLK 0x0001 @@ -221,6 +218,9 @@ static const struct ix_hash_algo hash_alg_sha1 = { }; static struct npe *npe_c; + +static unsigned int send_qid; +static unsigned int recv_qid; static struct dma_pool *buffer_pool; static struct dma_pool *ctx_pool; @@ -437,8 +437,7 @@ static void crypto_done_action(unsigned long arg) int i; for (i = 0; i < 4; i++) { - dma_addr_t phys = qmgr_get_entry(RECV_QID); - + dma_addr_t phys = qmgr_get_entry(recv_qid); if (!phys) return; one_packet(phys); @@ -448,10 +447,52 @@ static void crypto_done_action(unsigned long arg) static int init_ixp_crypto(struct device *dev) { - int ret = -ENODEV; + struct device_node *np = dev->of_node; u32 msg[2] = { 0, 0 }; + int ret = -ENODEV; + u32 npe_id; + + dev_info(dev, "probing...\n"); + + /* Locate the NPE and queue manager to use from device tree */ + if (IS_ENABLED(CONFIG_OF) && np) { + struct of_phandle_args queue_spec; + struct of_phandle_args npe_spec; + + ret = of_parse_phandle_with_fixed_args(np, "intel,npe-handle", + 1, 0, &npe_spec); + if (ret) { + dev_err(dev, "no NPE engine specified\n"); + return -ENODEV; + } + npe_id = npe_spec.args[0]; - npe_c = npe_request(NPE_ID); + ret = of_parse_phandle_with_fixed_args(np, "queue-rx", 1, 0, + &queue_spec); + if (ret) { + dev_err(dev, "no rx queue phandle\n"); + return -ENODEV; + } + recv_qid = queue_spec.args[0]; + + ret = of_parse_phandle_with_fixed_args(np, "queue-txready", 1, 0, + &queue_spec); + if (ret) { + dev_err(dev, "no txready queue phandle\n"); + return -ENODEV; + } + send_qid = queue_spec.args[0]; + } else { + /* + * Hardcoded engine when using platform data, this goes away + * when we switch to using DT only. + */ + npe_id = 2; + send_qid = 29; + recv_qid = 30; + } + + npe_c = npe_request(npe_id); if (!npe_c) return ret; @@ -497,20 +538,20 @@ static int init_ixp_crypto(struct device *dev) if (!ctx_pool) goto err; - ret = qmgr_request_queue(SEND_QID, NPE_QLEN_TOTAL, 0, 0, + ret = qmgr_request_queue(send_qid, NPE_QLEN_TOTAL, 0, 0, "ixp_crypto:out", NULL); if (ret) goto err; - ret = qmgr_request_queue(RECV_QID, NPE_QLEN, 0, 0, + ret = qmgr_request_queue(recv_qid, NPE_QLEN, 0, 0, "ixp_crypto:in", NULL); if (ret) { - qmgr_release_queue(SEND_QID); + qmgr_release_queue(send_qid); goto err; } - qmgr_set_irq(RECV_QID, QUEUE_IRQ_SRC_NOT_EMPTY, irqhandler, NULL); + qmgr_set_irq(recv_qid, QUEUE_IRQ_SRC_NOT_EMPTY, irqhandler, NULL); tasklet_init(&crypto_done_tasklet, crypto_done_action, 0); - qmgr_enable_irq(RECV_QID); + qmgr_enable_irq(recv_qid); return 0; npe_error: @@ -526,11 +567,11 @@ static int init_ixp_crypto(struct device *dev) static void release_ixp_crypto(struct device *dev) { - qmgr_disable_irq(RECV_QID); + qmgr_disable_irq(recv_qid); tasklet_kill(&crypto_done_tasklet); - qmgr_release_queue(SEND_QID); - qmgr_release_queue(RECV_QID); + qmgr_release_queue(send_qid); + qmgr_release_queue(recv_qid); dma_pool_destroy(ctx_pool); dma_pool_destroy(buffer_pool); @@ -682,8 +723,8 @@ static int register_chain_var(struct crypto_tfm *tfm, u8 xpad, u32 target, buf->phys_addr = pad_phys; atomic_inc(&ctx->configuring); - qmgr_put_entry(SEND_QID, crypt_virt2phys(crypt)); - BUG_ON(qmgr_stat_overflow(SEND_QID)); + qmgr_put_entry(send_qid, crypt_virt2phys(crypt)); + BUG_ON(qmgr_stat_overflow(send_qid)); return 0; } @@ -757,8 +798,8 @@ static int gen_rev_aes_key(struct crypto_tfm *tfm) crypt->ctl_flags |= CTL_FLAG_GEN_REVAES; atomic_inc(&ctx->configuring); - qmgr_put_entry(SEND_QID, crypt_virt2phys(crypt)); - BUG_ON(qmgr_stat_overflow(SEND_QID)); + qmgr_put_entry(send_qid, crypt_virt2phys(crypt)); + BUG_ON(qmgr_stat_overflow(send_qid)); return 0; } @@ -943,7 +984,7 @@ static int ablk_perform(struct skcipher_request *req, int encrypt) if (sg_nents(req->src) > 1 || sg_nents(req->dst) > 1) return ixp4xx_cipher_fallback(req, encrypt); - if (qmgr_stat_full(SEND_QID)) + if (qmgr_stat_full(send_qid)) return -EAGAIN; if (atomic_read(&ctx->configuring)) return -EAGAIN; @@ -993,8 +1034,8 @@ static int ablk_perform(struct skcipher_request *req, int encrypt) req_ctx->src = src_hook.next; crypt->src_buf = src_hook.phys_next; crypt->ctl_flags |= CTL_FLAG_PERFORM_ABLK; - qmgr_put_entry(SEND_QID, crypt_virt2phys(crypt)); - BUG_ON(qmgr_stat_overflow(SEND_QID)); + qmgr_put_entry(send_qid, crypt_virt2phys(crypt)); + BUG_ON(qmgr_stat_overflow(send_qid)); return -EINPROGRESS; free_buf_src: @@ -1057,7 +1098,7 @@ static int aead_perform(struct aead_request *req, int encrypt, enum dma_data_direction src_direction = DMA_BIDIRECTIONAL; unsigned int lastlen; - if (qmgr_stat_full(SEND_QID)) + if (qmgr_stat_full(send_qid)) return -EAGAIN; if (atomic_read(&ctx->configuring)) return -EAGAIN; @@ -1141,8 +1182,8 @@ static int aead_perform(struct aead_request *req, int encrypt, } crypt->ctl_flags |= CTL_FLAG_PERFORM_AEAD; - qmgr_put_entry(SEND_QID, crypt_virt2phys(crypt)); - BUG_ON(qmgr_stat_overflow(SEND_QID)); + qmgr_put_entry(send_qid, crypt_virt2phys(crypt)); + BUG_ON(qmgr_stat_overflow(send_qid)); return -EINPROGRESS; free_buf_dst: @@ -1436,12 +1477,13 @@ static struct ixp_aead_alg ixp4xx_aeads[] = { static int ixp_crypto_probe(struct platform_device *_pdev) { + struct device *dev = &_pdev->dev; int num = ARRAY_SIZE(ixp4xx_algos); int i, err; pdev = _pdev; - err = init_ixp_crypto(&pdev->dev); + err = init_ixp_crypto(dev); if (err) return err; @@ -1533,11 +1575,20 @@ static int ixp_crypto_remove(struct platform_device *pdev) return 0; } +static const struct of_device_id ixp4xx_crypto_of_match[] = { + { + .compatible = "intel,ixp4xx-crypto", + }, + {}, +}; static struct platform_driver ixp_crypto_driver = { .probe = ixp_crypto_probe, .remove = ixp_crypto_remove, - .driver = { .name = "ixp4xx_crypto" }, + .driver = { + .name = "ixp4xx_crypto", + .of_match_table = ixp4xx_crypto_of_match, + }, }; module_platform_driver(ixp_crypto_driver); diff --git a/drivers/soc/ixp4xx/ixp4xx-npe.c b/drivers/soc/ixp4xx/ixp4xx-npe.c index ec90b44fa0cd3..3c158251a58b6 100644 --- a/drivers/soc/ixp4xx/ixp4xx-npe.c +++ b/drivers/soc/ixp4xx/ixp4xx-npe.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -679,6 +680,7 @@ static int ixp4xx_npe_probe(struct platform_device *pdev) { int i, found = 0; struct device *dev = &pdev->dev; + struct device_node *np = dev->of_node; struct resource *res; for (i = 0; i < NPE_COUNT; i++) { @@ -711,6 +713,11 @@ static int ixp4xx_npe_probe(struct platform_device *pdev) if (!found) return -ENODEV; + + /* Spawn crypto subdevice if using device tree */ + if (IS_ENABLED(CONFIG_OF) && np) + devm_of_platform_populate(dev); + return 0; } From 4cd8c3152edeb0a580e0552317606a1f90bc59ab Mon Sep 17 00:00:00 2001 From: Srujana Challa Date: Tue, 25 May 2021 16:57:15 +0530 Subject: [PATCH 090/142] crypto: octeontx2 - Add mailbox support for CN10K Mailbox region configuration has some changes on CN10K platform from OcteonTX2(CN9XX) platform. On CN10K platform: The DRAM region allocated to PF is enumerated as PF BAR4 memory. PF BAR4 contains AF-PF mbox region followed by its VFs mbox region. AF-PF mbox region base address is configured at RVU_AF_PFX_BAR4_ADDR PF-VF mailbox base address is configured at RVU_PF(x)_VF_MBOX_ADDR = RVU_AF_PF()_BAR4_ADDR+64KB. PF access its mbox region via BAR4, whereas VF accesses PF-VF DRAM mailboxes via BAR2 indirect access. On CN9XX platform: Mailbox region in DRAM is divided into two parts AF-PF mbox region and PF-VF mbox region i.e all PFs mbox region is contiguous similarly all VFs. The base address of the AF-PF mbox region is configured at RVU_AF_PF_BAR4_ADDR. AF-PF1 mbox address can be calculated as RVU_AF_PF_BAR4_ADDR * mbox size. This patch changes mbox initialization to support both CN9XX and CN10K platform. This patch also removes platform specific name from the PF/VF driver name to make it appropriate for all supported platforms. Signed-off-by: Srujana Challa Signed-off-by: Herbert Xu --- drivers/crypto/marvell/octeontx2/Makefile | 12 ++--- .../marvell/octeontx2/otx2_cpt_common.h | 20 +++++++++ .../marvell/octeontx2/otx2_cpt_hw_types.h | 3 ++ drivers/crypto/marvell/octeontx2/otx2_cptpf.h | 1 + .../marvell/octeontx2/otx2_cptpf_main.c | 35 +++++++++------ drivers/crypto/marvell/octeontx2/otx2_cptvf.h | 3 ++ .../marvell/octeontx2/otx2_cptvf_main.c | 45 +++++++++++++------ .../marvell/octeontx2/otx2_cptvf_mbox.c | 43 ++++++++++++++++++ 8 files changed, 129 insertions(+), 33 deletions(-) diff --git a/drivers/crypto/marvell/octeontx2/Makefile b/drivers/crypto/marvell/octeontx2/Makefile index b9c6201019e0c..10e1fe056a9e5 100644 --- a/drivers/crypto/marvell/octeontx2/Makefile +++ b/drivers/crypto/marvell/octeontx2/Makefile @@ -1,10 +1,10 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_CRYPTO_DEV_OCTEONTX2_CPT) += octeontx2-cpt.o octeontx2-cptvf.o +obj-$(CONFIG_CRYPTO_DEV_OCTEONTX2_CPT) += rvu_cptpf.o rvu_cptvf.o -octeontx2-cpt-objs := otx2_cptpf_main.o otx2_cptpf_mbox.o \ - otx2_cpt_mbox_common.o otx2_cptpf_ucode.o otx2_cptlf.o -octeontx2-cptvf-objs := otx2_cptvf_main.o otx2_cptvf_mbox.o otx2_cptlf.o \ - otx2_cpt_mbox_common.o otx2_cptvf_reqmgr.o \ - otx2_cptvf_algs.o +rvu_cptpf-objs := otx2_cptpf_main.o otx2_cptpf_mbox.o \ + otx2_cpt_mbox_common.o otx2_cptpf_ucode.o otx2_cptlf.o +rvu_cptvf-objs := otx2_cptvf_main.o otx2_cptvf_mbox.o otx2_cptlf.o \ + otx2_cpt_mbox_common.o otx2_cptvf_reqmgr.o \ + otx2_cptvf_algs.o ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h index ecedd91a8d859..414427dcfa61b 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h @@ -25,6 +25,9 @@ #define OTX2_CPT_NAME_LENGTH 64 #define OTX2_CPT_DMA_MINALIGN 128 +/* HW capability flags */ +#define CN10K_MBOX 0 + #define BAD_OTX2_CPT_ENG_TYPE OTX2_CPT_MAX_ENG_TYPES enum otx2_cpt_eng_type { @@ -116,6 +119,23 @@ static inline u64 otx2_cpt_read64(void __iomem *reg_base, u64 blk, u64 slot, OTX2_CPT_RVU_FUNC_ADDR_S(blk, slot, offs)); } +static inline bool is_dev_otx2(struct pci_dev *pdev) +{ + if (pdev->device == OTX2_CPT_PCI_PF_DEVICE_ID || + pdev->device == OTX2_CPT_PCI_VF_DEVICE_ID) + return true; + + return false; +} + +static inline void otx2_cpt_set_hw_caps(struct pci_dev *pdev, + unsigned long *cap_flag) +{ + if (!is_dev_otx2(pdev)) + __set_bit(CN10K_MBOX, cap_flag); +} + + int otx2_cpt_send_ready_msg(struct otx2_mbox *mbox, struct pci_dev *pdev); int otx2_cpt_send_mbox_msg(struct otx2_mbox *mbox, struct pci_dev *pdev); diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_hw_types.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_hw_types.h index ecafc42f37a26..391a457f71163 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cpt_hw_types.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_hw_types.h @@ -10,6 +10,8 @@ /* Device IDs */ #define OTX2_CPT_PCI_PF_DEVICE_ID 0xA0FD #define OTX2_CPT_PCI_VF_DEVICE_ID 0xA0FE +#define CN10K_CPT_PCI_PF_DEVICE_ID 0xA0F2 +#define CN10K_CPT_PCI_VF_DEVICE_ID 0xA0F3 /* Mailbox interrupts offset */ #define OTX2_CPT_PF_MBOX_INT 6 @@ -25,6 +27,7 @@ */ #define OTX2_CPT_VF_MSIX_VECTORS 1 #define OTX2_CPT_VF_INTR_MBOX_MASK BIT(0) +#define CN10K_CPT_VF_MBOX_REGION (0xC0000) /* CPT LF MSIX vectors */ #define OTX2_CPT_LF_MSIX_VECTORS 2 diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf.h b/drivers/crypto/marvell/octeontx2/otx2_cptpf.h index e19af1356f123..5ebba86c65d93 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf.h @@ -47,6 +47,7 @@ struct otx2_cptpf_dev { struct workqueue_struct *flr_wq; struct cptpf_flr_work *flr_work; + unsigned long cap_flag; u8 pf_id; /* RVU PF number */ u8 max_vfs; /* Maximum number of VFs supported by CPT */ u8 enabled_vfs; /* Number of enabled VFs */ diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c index 58f47e3ab62e7..d341aecd3dd2f 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c @@ -8,8 +8,8 @@ #include "otx2_cptpf.h" #include "rvu_reg.h" -#define OTX2_CPT_DRV_NAME "octeontx2-cpt" -#define OTX2_CPT_DRV_STRING "Marvell OcteonTX2 CPT Physical Function Driver" +#define OTX2_CPT_DRV_NAME "rvu_cptpf" +#define OTX2_CPT_DRV_STRING "Marvell RVU CPT Physical Function Driver" static void cptpf_enable_vfpf_mbox_intr(struct otx2_cptpf_dev *cptpf, int num_vfs) @@ -284,7 +284,11 @@ static int cptpf_vfpf_mbox_init(struct otx2_cptpf_dev *cptpf, int num_vfs) return -ENOMEM; /* Map VF-PF mailbox memory */ - vfpf_mbox_base = readq(cptpf->reg_base + RVU_PF_VF_BAR4_ADDR); + if (test_bit(CN10K_MBOX, &cptpf->cap_flag)) + vfpf_mbox_base = readq(cptpf->reg_base + RVU_PF_VF_MBOX_ADDR); + else + vfpf_mbox_base = readq(cptpf->reg_base + RVU_PF_VF_BAR4_ADDR); + if (!vfpf_mbox_base) { dev_err(dev, "VF-PF mailbox address not configured\n"); err = -ENOMEM; @@ -365,6 +369,8 @@ static int cptpf_register_afpf_mbox_intr(struct otx2_cptpf_dev *cptpf) static int cptpf_afpf_mbox_init(struct otx2_cptpf_dev *cptpf) { + struct pci_dev *pdev = cptpf->pdev; + resource_size_t offset; int err; cptpf->afpf_mbox_wq = alloc_workqueue("cpt_afpf_mailbox", @@ -373,8 +379,17 @@ static int cptpf_afpf_mbox_init(struct otx2_cptpf_dev *cptpf) if (!cptpf->afpf_mbox_wq) return -ENOMEM; + offset = pci_resource_start(pdev, PCI_MBOX_BAR_NUM); + /* Map AF-PF mailbox memory */ + cptpf->afpf_mbox_base = devm_ioremap_wc(&pdev->dev, offset, MBOX_SIZE); + if (!cptpf->afpf_mbox_base) { + dev_err(&pdev->dev, "Unable to map BAR4\n"); + err = -ENOMEM; + goto error; + } + err = otx2_mbox_init(&cptpf->afpf_mbox, cptpf->afpf_mbox_base, - cptpf->pdev, cptpf->reg_base, MBOX_DIR_PFAF, 1); + pdev, cptpf->reg_base, MBOX_DIR_PFAF, 1); if (err) goto error; @@ -607,7 +622,6 @@ static int otx2_cptpf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct device *dev = &pdev->dev; - resource_size_t offset, size; struct otx2_cptpf_dev *cptpf; int err; @@ -644,15 +658,6 @@ static int otx2_cptpf_probe(struct pci_dev *pdev, if (err) goto clear_drvdata; - offset = pci_resource_start(pdev, PCI_MBOX_BAR_NUM); - size = pci_resource_len(pdev, PCI_MBOX_BAR_NUM); - /* Map AF-PF mailbox memory */ - cptpf->afpf_mbox_base = devm_ioremap_wc(dev, offset, size); - if (!cptpf->afpf_mbox_base) { - dev_err(&pdev->dev, "Unable to map BAR4\n"); - err = -ENODEV; - goto clear_drvdata; - } err = pci_alloc_irq_vectors(pdev, RVU_PF_INT_VEC_CNT, RVU_PF_INT_VEC_CNT, PCI_IRQ_MSIX); if (err < 0) { @@ -660,6 +665,7 @@ static int otx2_cptpf_probe(struct pci_dev *pdev, RVU_PF_INT_VEC_CNT); goto clear_drvdata; } + otx2_cpt_set_hw_caps(pdev, &cptpf->cap_flag); /* Initialize AF-PF mailbox */ err = cptpf_afpf_mbox_init(cptpf); if (err) @@ -719,6 +725,7 @@ static void otx2_cptpf_remove(struct pci_dev *pdev) /* Supported devices */ static const struct pci_device_id otx2_cpt_id_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OTX2_CPT_PCI_PF_DEVICE_ID) }, + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, CN10K_CPT_PCI_PF_DEVICE_ID) }, { 0, } /* end of table */ }; diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf.h b/drivers/crypto/marvell/octeontx2/otx2_cptvf.h index 4f0a169fddbd0..4207e2236903e 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptvf.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf.h @@ -19,11 +19,14 @@ struct otx2_cptvf_dev { struct otx2_mbox pfvf_mbox; struct work_struct pfvf_mbox_work; struct workqueue_struct *pfvf_mbox_wq; + void *bbuf_base; + unsigned long cap_flag; }; irqreturn_t otx2_cptvf_pfvf_mbox_intr(int irq, void *arg); void otx2_cptvf_pfvf_mbox_handler(struct work_struct *work); int otx2_cptvf_send_eng_grp_num_msg(struct otx2_cptvf_dev *cptvf, int eng_type); int otx2_cptvf_send_kvf_limits_msg(struct otx2_cptvf_dev *cptvf); +int otx2_cpt_mbox_bbuf_init(struct otx2_cptvf_dev *cptvf, struct pci_dev *pdev); #endif /* __OTX2_CPTVF_H */ diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c index 47f3787310243..5178e0688d755 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c @@ -7,7 +7,7 @@ #include "otx2_cptvf_algs.h" #include -#define OTX2_CPTVF_DRV_NAME "octeontx2-cptvf" +#define OTX2_CPTVF_DRV_NAME "rvu_cptvf" static void cptvf_enable_pfvf_mbox_intrs(struct otx2_cptvf_dev *cptvf) { @@ -70,6 +70,8 @@ static int cptvf_register_interrupts(struct otx2_cptvf_dev *cptvf) static int cptvf_pfvf_mbox_init(struct otx2_cptvf_dev *cptvf) { + struct pci_dev *pdev = cptvf->pdev; + resource_size_t offset, size; int ret; cptvf->pfvf_mbox_wq = alloc_workqueue("cpt_pfvf_mailbox", @@ -78,14 +80,39 @@ static int cptvf_pfvf_mbox_init(struct otx2_cptvf_dev *cptvf) if (!cptvf->pfvf_mbox_wq) return -ENOMEM; + if (test_bit(CN10K_MBOX, &cptvf->cap_flag)) { + /* For cn10k platform, VF mailbox region is in its BAR2 + * register space + */ + cptvf->pfvf_mbox_base = cptvf->reg_base + + CN10K_CPT_VF_MBOX_REGION; + } else { + offset = pci_resource_start(pdev, PCI_MBOX_BAR_NUM); + size = pci_resource_len(pdev, PCI_MBOX_BAR_NUM); + /* Map PF-VF mailbox memory */ + cptvf->pfvf_mbox_base = devm_ioremap_wc(&pdev->dev, offset, + size); + if (!cptvf->pfvf_mbox_base) { + dev_err(&pdev->dev, "Unable to map BAR4\n"); + ret = -ENOMEM; + goto free_wqe; + } + } + ret = otx2_mbox_init(&cptvf->pfvf_mbox, cptvf->pfvf_mbox_base, - cptvf->pdev, cptvf->reg_base, MBOX_DIR_VFPF, 1); + pdev, cptvf->reg_base, MBOX_DIR_VFPF, 1); if (ret) goto free_wqe; + ret = otx2_cpt_mbox_bbuf_init(cptvf, pdev); + if (ret) + goto destroy_mbox; + INIT_WORK(&cptvf->pfvf_mbox_work, otx2_cptvf_pfvf_mbox_handler); return 0; +destroy_mbox: + otx2_mbox_destroy(&cptvf->pfvf_mbox); free_wqe: destroy_workqueue(cptvf->pfvf_mbox_wq); return ret; @@ -305,7 +332,6 @@ static int otx2_cptvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct device *dev = &pdev->dev; - resource_size_t offset, size; struct otx2_cptvf_dev *cptvf; int ret; @@ -337,15 +363,7 @@ static int otx2_cptvf_probe(struct pci_dev *pdev, cptvf->reg_base = pcim_iomap_table(pdev)[PCI_PF_REG_BAR_NUM]; - offset = pci_resource_start(pdev, PCI_MBOX_BAR_NUM); - size = pci_resource_len(pdev, PCI_MBOX_BAR_NUM); - /* Map PF-VF mailbox memory */ - cptvf->pfvf_mbox_base = devm_ioremap_wc(dev, offset, size); - if (!cptvf->pfvf_mbox_base) { - dev_err(&pdev->dev, "Unable to map BAR4\n"); - ret = -ENODEV; - goto clear_drvdata; - } + otx2_cpt_set_hw_caps(pdev, &cptvf->cap_flag); /* Initialize PF<=>VF mailbox */ ret = cptvf_pfvf_mbox_init(cptvf); if (ret) @@ -392,6 +410,7 @@ static void otx2_cptvf_remove(struct pci_dev *pdev) /* Supported devices */ static const struct pci_device_id otx2_cptvf_id_table[] = { {PCI_VDEVICE(CAVIUM, OTX2_CPT_PCI_VF_DEVICE_ID), 0}, + {PCI_VDEVICE(CAVIUM, CN10K_CPT_PCI_VF_DEVICE_ID), 0}, { 0, } /* end of table */ }; @@ -405,6 +424,6 @@ static struct pci_driver otx2_cptvf_pci_driver = { module_pci_driver(otx2_cptvf_pci_driver); MODULE_AUTHOR("Marvell"); -MODULE_DESCRIPTION("Marvell OcteonTX2 CPT Virtual Function Driver"); +MODULE_DESCRIPTION("Marvell RVU CPT Virtual Function Driver"); MODULE_LICENSE("GPL v2"); MODULE_DEVICE_TABLE(pci, otx2_cptvf_id_table); diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_mbox.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_mbox.c index 5d73b711cba61..02cb9e44afd81 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_mbox.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_mbox.c @@ -5,6 +5,48 @@ #include "otx2_cptvf.h" #include +int otx2_cpt_mbox_bbuf_init(struct otx2_cptvf_dev *cptvf, struct pci_dev *pdev) +{ + struct otx2_mbox_dev *mdev; + struct otx2_mbox *otx2_mbox; + + cptvf->bbuf_base = devm_kmalloc(&pdev->dev, MBOX_SIZE, GFP_KERNEL); + if (!cptvf->bbuf_base) + return -ENOMEM; + /* + * Overwrite mbox mbase to point to bounce buffer, so that PF/VF + * prepare all mbox messages in bounce buffer instead of directly + * in hw mbox memory. + */ + otx2_mbox = &cptvf->pfvf_mbox; + mdev = &otx2_mbox->dev[0]; + mdev->mbase = cptvf->bbuf_base; + + return 0; +} + +static void otx2_cpt_sync_mbox_bbuf(struct otx2_mbox *mbox, int devid) +{ + u16 msgs_offset = ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN); + void *hw_mbase = mbox->hwbase + (devid * MBOX_SIZE); + struct otx2_mbox_dev *mdev = &mbox->dev[devid]; + struct mbox_hdr *hdr; + u64 msg_size; + + if (mdev->mbase == hw_mbase) + return; + + hdr = hw_mbase + mbox->rx_start; + msg_size = hdr->msg_size; + + if (msg_size > mbox->rx_size - msgs_offset) + msg_size = mbox->rx_size - msgs_offset; + + /* Copy mbox messages from mbox memory to bounce buffer */ + memcpy(mdev->mbase + mbox->rx_start, + hw_mbase + mbox->rx_start, msg_size + msgs_offset); +} + irqreturn_t otx2_cptvf_pfvf_mbox_intr(int __always_unused irq, void *arg) { struct otx2_cptvf_dev *cptvf = arg; @@ -106,6 +148,7 @@ void otx2_cptvf_pfvf_mbox_handler(struct work_struct *work) cptvf = container_of(work, struct otx2_cptvf_dev, pfvf_mbox_work); pfvf_mbox = &cptvf->pfvf_mbox; + otx2_cpt_sync_mbox_bbuf(pfvf_mbox, 0); mdev = &pfvf_mbox->dev[0]; rsp_hdr = (struct mbox_hdr *)(mdev->mbase + pfvf_mbox->rx_start); if (rsp_hdr->num_msgs == 0) From eb33cd9116b2f1d193352c77bd829b61b1249b00 Mon Sep 17 00:00:00 2001 From: Srujana Challa Date: Tue, 25 May 2021 16:57:16 +0530 Subject: [PATCH 091/142] crypto: octeontx2 - add support to map LMTST region for CN10K On CN10K platform transmit/receive buffer alloc and free from/to hardware had changed to support burst operation. Whereas pervious silicon's only support single buffer free at a time. To Support the same firmware allocates a DRAM region for each PF/VF for storing LMTLINES. These LMTLINES are used to send CPT commands to HW. PF/VF LMTST region is accessed via BAR4. PFs LMTST region is followed by its VFs mbox memory. The size of region varies from 2KB to 256KB based on number of LMTLINES configured. This patch adds support for mapping of PF/VF LMTST region. Signed-off-by: Srujana Challa Signed-off-by: Herbert Xu --- drivers/crypto/marvell/octeontx2/Makefile | 5 +- drivers/crypto/marvell/octeontx2/cn10k_cpt.c | 53 +++++++++++++++++++ drivers/crypto/marvell/octeontx2/cn10k_cpt.h | 13 +++++ .../marvell/octeontx2/otx2_cpt_common.h | 5 +- drivers/crypto/marvell/octeontx2/otx2_cptlf.h | 2 + .../marvell/octeontx2/otx2_cptpf_main.c | 5 ++ .../marvell/octeontx2/otx2_cptvf_main.c | 6 +++ 7 files changed, 86 insertions(+), 3 deletions(-) create mode 100644 drivers/crypto/marvell/octeontx2/cn10k_cpt.c create mode 100644 drivers/crypto/marvell/octeontx2/cn10k_cpt.h diff --git a/drivers/crypto/marvell/octeontx2/Makefile b/drivers/crypto/marvell/octeontx2/Makefile index 10e1fe056a9e5..c242d22008c33 100644 --- a/drivers/crypto/marvell/octeontx2/Makefile +++ b/drivers/crypto/marvell/octeontx2/Makefile @@ -2,9 +2,10 @@ obj-$(CONFIG_CRYPTO_DEV_OCTEONTX2_CPT) += rvu_cptpf.o rvu_cptvf.o rvu_cptpf-objs := otx2_cptpf_main.o otx2_cptpf_mbox.o \ - otx2_cpt_mbox_common.o otx2_cptpf_ucode.o otx2_cptlf.o + otx2_cpt_mbox_common.o otx2_cptpf_ucode.o otx2_cptlf.o \ + cn10k_cpt.o rvu_cptvf-objs := otx2_cptvf_main.o otx2_cptvf_mbox.o otx2_cptlf.o \ otx2_cpt_mbox_common.o otx2_cptvf_reqmgr.o \ - otx2_cptvf_algs.o + otx2_cptvf_algs.o cn10k_cpt.o ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af diff --git a/drivers/crypto/marvell/octeontx2/cn10k_cpt.c b/drivers/crypto/marvell/octeontx2/cn10k_cpt.c new file mode 100644 index 0000000000000..57cf156934ab1 --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/cn10k_cpt.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2021 Marvell. */ + +#include "otx2_cptpf.h" +#include "otx2_cptvf.h" +#include "otx2_cptlf.h" +#include "cn10k_cpt.h" + +int cn10k_cptpf_lmtst_init(struct otx2_cptpf_dev *cptpf) +{ + struct pci_dev *pdev = cptpf->pdev; + resource_size_t size; + u64 lmt_base; + + if (!test_bit(CN10K_LMTST, &cptpf->cap_flag)) + return 0; + + lmt_base = readq(cptpf->reg_base + RVU_PF_LMTLINE_ADDR); + if (!lmt_base) { + dev_err(&pdev->dev, "PF LMTLINE address not configured\n"); + return -ENOMEM; + } + size = pci_resource_len(pdev, PCI_MBOX_BAR_NUM); + size -= ((1 + cptpf->max_vfs) * MBOX_SIZE); + cptpf->lfs.lmt_base = devm_ioremap_wc(&pdev->dev, lmt_base, size); + if (!cptpf->lfs.lmt_base) { + dev_err(&pdev->dev, + "Mapping of PF LMTLINE address failed\n"); + return -ENOMEM; + } + + return 0; +} + +int cn10k_cptvf_lmtst_init(struct otx2_cptvf_dev *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + resource_size_t offset, size; + + if (!test_bit(CN10K_LMTST, &cptvf->cap_flag)) + return 0; + + offset = pci_resource_start(pdev, PCI_MBOX_BAR_NUM); + size = pci_resource_len(pdev, PCI_MBOX_BAR_NUM); + /* Map VF LMILINE region */ + cptvf->lfs.lmt_base = devm_ioremap_wc(&pdev->dev, offset, size); + if (!cptvf->lfs.lmt_base) { + dev_err(&pdev->dev, "Unable to map BAR4\n"); + return -ENOMEM; + } + + return 0; +} diff --git a/drivers/crypto/marvell/octeontx2/cn10k_cpt.h b/drivers/crypto/marvell/octeontx2/cn10k_cpt.h new file mode 100644 index 0000000000000..b9a8c463eaf37 --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/cn10k_cpt.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * Copyright (C) 2021 Marvell. + */ +#ifndef __CN10K_CPT_H +#define __CN10K_CPT_H + +#include "otx2_cptpf.h" +#include "otx2_cptvf.h" + +int cn10k_cptpf_lmtst_init(struct otx2_cptpf_dev *cptpf); +int cn10k_cptvf_lmtst_init(struct otx2_cptvf_dev *cptvf); + +#endif /* __CN10K_CPTLF_H */ diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h index 414427dcfa61b..c5445b05f53c2 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h @@ -27,6 +27,7 @@ /* HW capability flags */ #define CN10K_MBOX 0 +#define CN10K_LMTST 1 #define BAD_OTX2_CPT_ENG_TYPE OTX2_CPT_MAX_ENG_TYPES @@ -131,8 +132,10 @@ static inline bool is_dev_otx2(struct pci_dev *pdev) static inline void otx2_cpt_set_hw_caps(struct pci_dev *pdev, unsigned long *cap_flag) { - if (!is_dev_otx2(pdev)) + if (!is_dev_otx2(pdev)) { __set_bit(CN10K_MBOX, cap_flag); + __set_bit(CN10K_LMTST, cap_flag); + } } diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptlf.h b/drivers/crypto/marvell/octeontx2/otx2_cptlf.h index ab1678fc564d6..c87c18e311710 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptlf.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cptlf.h @@ -87,6 +87,8 @@ struct otx2_cptlf_info { struct otx2_cptlfs_info { /* Registers start address of VF/PF LFs are attached to */ void __iomem *reg_base; +#define LMTLINE_SIZE 128 + void __iomem *lmt_base; struct pci_dev *pdev; /* Device LFs are attached to */ struct otx2_cptlf_info lf[OTX2_CPT_MAX_LFS_NUM]; struct otx2_mbox *mbox; diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c index d341aecd3dd2f..4ec3a4613e74a 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c @@ -6,6 +6,7 @@ #include "otx2_cpt_common.h" #include "otx2_cptpf_ucode.h" #include "otx2_cptpf.h" +#include "cn10k_cpt.h" #include "rvu_reg.h" #define OTX2_CPT_DRV_NAME "rvu_cptpf" @@ -677,6 +678,10 @@ static int otx2_cptpf_probe(struct pci_dev *pdev, cptpf->max_vfs = pci_sriov_get_totalvfs(pdev); + err = cn10k_cptpf_lmtst_init(cptpf); + if (err) + goto unregister_intr; + /* Initialize CPT PF device */ err = cptpf_device_init(cptpf); if (err) diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c index 5178e0688d755..3411e664cf50c 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c @@ -5,6 +5,7 @@ #include "otx2_cptvf.h" #include "otx2_cptlf.h" #include "otx2_cptvf_algs.h" +#include "cn10k_cpt.h" #include #define OTX2_CPTVF_DRV_NAME "rvu_cptvf" @@ -364,6 +365,11 @@ static int otx2_cptvf_probe(struct pci_dev *pdev, cptvf->reg_base = pcim_iomap_table(pdev)[PCI_PF_REG_BAR_NUM]; otx2_cpt_set_hw_caps(pdev, &cptvf->cap_flag); + + ret = cn10k_cptvf_lmtst_init(cptvf); + if (ret) + goto clear_drvdata; + /* Initialize PF<=>VF mailbox */ ret = cptvf_pfvf_mbox_init(cptvf); if (ret) From 40a645f753b32346f1ab3953e769479561a19b8d Mon Sep 17 00:00:00 2001 From: Srujana Challa Date: Tue, 25 May 2021 16:57:17 +0530 Subject: [PATCH 092/142] crypto: octeontx2 - add support for CPT operations on CN10K CPT result format had changed for CN10K HW to accommodate more fields. This patch adds support to use new result format and new LMTST lines for CPT operations on CN10K platform. Signed-off-by: Srujana Challa Signed-off-by: Herbert Xu --- drivers/crypto/marvell/octeontx2/cn10k_cpt.c | 44 ++++++++++++++++++- drivers/crypto/marvell/octeontx2/cn10k_cpt.h | 23 ++++++++++ .../marvell/octeontx2/otx2_cpt_hw_types.h | 13 +++++- drivers/crypto/marvell/octeontx2/otx2_cptlf.c | 9 +++- drivers/crypto/marvell/octeontx2/otx2_cptlf.h | 8 ++++ .../marvell/octeontx2/otx2_cptpf_main.c | 2 +- .../marvell/octeontx2/otx2_cptpf_ucode.c | 32 +++++++++++--- .../marvell/octeontx2/otx2_cptpf_ucode.h | 8 ++-- .../marvell/octeontx2/otx2_cptvf_reqmgr.c | 17 ++++--- 9 files changed, 134 insertions(+), 22 deletions(-) diff --git a/drivers/crypto/marvell/octeontx2/cn10k_cpt.c b/drivers/crypto/marvell/octeontx2/cn10k_cpt.c index 57cf156934ab1..1499ef75b5c22 100644 --- a/drivers/crypto/marvell/octeontx2/cn10k_cpt.c +++ b/drivers/crypto/marvell/octeontx2/cn10k_cpt.c @@ -1,20 +1,57 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2021 Marvell. */ +#include #include "otx2_cptpf.h" #include "otx2_cptvf.h" #include "otx2_cptlf.h" #include "cn10k_cpt.h" +static struct cpt_hw_ops otx2_hw_ops = { + .send_cmd = otx2_cpt_send_cmd, + .cpt_get_compcode = otx2_cpt_get_compcode, + .cpt_get_uc_compcode = otx2_cpt_get_uc_compcode, +}; + +static struct cpt_hw_ops cn10k_hw_ops = { + .send_cmd = cn10k_cpt_send_cmd, + .cpt_get_compcode = cn10k_cpt_get_compcode, + .cpt_get_uc_compcode = cn10k_cpt_get_uc_compcode, +}; + +void cn10k_cpt_send_cmd(union otx2_cpt_inst_s *cptinst, u32 insts_num, + struct otx2_cptlf_info *lf) +{ + void __iomem *lmtline = lf->lmtline; + u64 val = (lf->slot & 0x7FF); + u64 tar_addr = 0; + + /* tar_addr<6:4> = Size of first LMTST - 1 in units of 128b. */ + tar_addr |= (__force u64)lf->ioreg | + (((OTX2_CPT_INST_SIZE/16) - 1) & 0x7) << 4; + /* + * Make sure memory areas pointed in CPT_INST_S + * are flushed before the instruction is sent to CPT + */ + dma_wmb(); + + /* Copy CPT command to LMTLINE */ + memcpy_toio(lmtline, cptinst, insts_num * OTX2_CPT_INST_SIZE); + cn10k_lmt_flush(val, tar_addr); +} + int cn10k_cptpf_lmtst_init(struct otx2_cptpf_dev *cptpf) { struct pci_dev *pdev = cptpf->pdev; resource_size_t size; u64 lmt_base; - if (!test_bit(CN10K_LMTST, &cptpf->cap_flag)) + if (!test_bit(CN10K_LMTST, &cptpf->cap_flag)) { + cptpf->lfs.ops = &otx2_hw_ops; return 0; + } + cptpf->lfs.ops = &cn10k_hw_ops; lmt_base = readq(cptpf->reg_base + RVU_PF_LMTLINE_ADDR); if (!lmt_base) { dev_err(&pdev->dev, "PF LMTLINE address not configured\n"); @@ -37,9 +74,12 @@ int cn10k_cptvf_lmtst_init(struct otx2_cptvf_dev *cptvf) struct pci_dev *pdev = cptvf->pdev; resource_size_t offset, size; - if (!test_bit(CN10K_LMTST, &cptvf->cap_flag)) + if (!test_bit(CN10K_LMTST, &cptvf->cap_flag)) { + cptvf->lfs.ops = &otx2_hw_ops; return 0; + } + cptvf->lfs.ops = &cn10k_hw_ops; offset = pci_resource_start(pdev, PCI_MBOX_BAR_NUM); size = pci_resource_len(pdev, PCI_MBOX_BAR_NUM); /* Map VF LMILINE region */ diff --git a/drivers/crypto/marvell/octeontx2/cn10k_cpt.h b/drivers/crypto/marvell/octeontx2/cn10k_cpt.h index b9a8c463eaf37..c091392b47e0f 100644 --- a/drivers/crypto/marvell/octeontx2/cn10k_cpt.h +++ b/drivers/crypto/marvell/octeontx2/cn10k_cpt.h @@ -4,9 +4,32 @@ #ifndef __CN10K_CPT_H #define __CN10K_CPT_H +#include "otx2_cpt_common.h" #include "otx2_cptpf.h" #include "otx2_cptvf.h" +static inline u8 cn10k_cpt_get_compcode(union otx2_cpt_res_s *result) +{ + return ((struct cn10k_cpt_res_s *)result)->compcode; +} + +static inline u8 cn10k_cpt_get_uc_compcode(union otx2_cpt_res_s *result) +{ + return ((struct cn10k_cpt_res_s *)result)->uc_compcode; +} + +static inline u8 otx2_cpt_get_compcode(union otx2_cpt_res_s *result) +{ + return ((struct cn9k_cpt_res_s *)result)->compcode; +} + +static inline u8 otx2_cpt_get_uc_compcode(union otx2_cpt_res_s *result) +{ + return ((struct cn9k_cpt_res_s *)result)->uc_compcode; +} + +void cn10k_cpt_send_cmd(union otx2_cpt_inst_s *cptinst, u32 insts_num, + struct otx2_cptlf_info *lf); int cn10k_cptpf_lmtst_init(struct otx2_cptpf_dev *cptpf); int cn10k_cptvf_lmtst_init(struct otx2_cptvf_dev *cptvf); diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_hw_types.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_hw_types.h index 391a457f71163..6f947978e4e89 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cpt_hw_types.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_hw_types.h @@ -138,7 +138,7 @@ enum otx2_cpt_comp_e { OTX2_CPT_COMP_E_FAULT = 0x02, OTX2_CPT_COMP_E_HWERR = 0x04, OTX2_CPT_COMP_E_INSTERR = 0x05, - OTX2_CPT_COMP_E_LAST_ENTRY = 0x06 + OTX2_CPT_COMP_E_WARN = 0x06 }; /* @@ -269,13 +269,22 @@ union otx2_cpt_inst_s { union otx2_cpt_res_s { u64 u[2]; - struct { + struct cn9k_cpt_res_s { u64 compcode:8; u64 uc_compcode:8; u64 doneint:1; u64 reserved_17_63:47; u64 reserved_64_127; } s; + + struct cn10k_cpt_res_s { + u64 compcode:7; + u64 doneint:1; + u64 uc_compcode:8; + u64 rlen:16; + u64 spi:32; + u64 esn; + } cn10k; }; /* diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptlf.c b/drivers/crypto/marvell/octeontx2/otx2_cptlf.c index 34aba15327612..c8350fcd60fab 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptlf.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptlf.c @@ -379,9 +379,14 @@ int otx2_cptlf_init(struct otx2_cptlfs_info *lfs, u8 eng_grp_mask, int pri, for (slot = 0; slot < lfs->lfs_num; slot++) { lfs->lf[slot].lfs = lfs; lfs->lf[slot].slot = slot; - lfs->lf[slot].lmtline = lfs->reg_base + - OTX2_CPT_RVU_FUNC_ADDR_S(BLKADDR_LMT, slot, + if (lfs->lmt_base) + lfs->lf[slot].lmtline = lfs->lmt_base + + (slot * LMTLINE_SIZE); + else + lfs->lf[slot].lmtline = lfs->reg_base + + OTX2_CPT_RVU_FUNC_ADDR_S(BLKADDR_LMT, slot, OTX2_CPT_LMT_LF_LMTLINEX(0)); + lfs->lf[slot].ioreg = lfs->reg_base + OTX2_CPT_RVU_FUNC_ADDR_S(BLKADDR_CPT0, slot, OTX2_CPT_LF_NQX(0)); diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptlf.h b/drivers/crypto/marvell/octeontx2/otx2_cptlf.h index c87c18e311710..b691b6c1d5c45 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptlf.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cptlf.h @@ -84,6 +84,13 @@ struct otx2_cptlf_info { struct otx2_cptlf_wqe *wqe; /* Tasklet work info */ }; +struct cpt_hw_ops { + void (*send_cmd)(union otx2_cpt_inst_s *cptinst, u32 insts_num, + struct otx2_cptlf_info *lf); + u8 (*cpt_get_compcode)(union otx2_cpt_res_s *result); + u8 (*cpt_get_uc_compcode)(union otx2_cpt_res_s *result); +}; + struct otx2_cptlfs_info { /* Registers start address of VF/PF LFs are attached to */ void __iomem *reg_base; @@ -92,6 +99,7 @@ struct otx2_cptlfs_info { struct pci_dev *pdev; /* Device LFs are attached to */ struct otx2_cptlf_info lf[OTX2_CPT_MAX_LFS_NUM]; struct otx2_mbox *mbox; + struct cpt_hw_ops *ops; u8 are_lfs_attached; /* Whether CPT LFs are attached */ u8 lfs_num; /* Number of CPT LFs */ u8 kcrypto_eng_grp_num; /* Kernel crypto engine group number */ diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c index 4ec3a4613e74a..1fb04f9bb7ac4 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c @@ -586,7 +586,7 @@ static int cptpf_sriov_enable(struct pci_dev *pdev, int num_vfs) if (ret) goto disable_intr; - ret = otx2_cpt_create_eng_grps(cptpf->pdev, &cptpf->eng_grps); + ret = otx2_cpt_create_eng_grps(cptpf, &cptpf->eng_grps); if (ret) goto disable_intr; diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c index a531f4c8b4414..dff34b3ec09e1 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c @@ -16,6 +16,8 @@ #define LOADFVC_MAJOR_OP 0x01 #define LOADFVC_MINOR_OP 0x08 +#define CTX_FLUSH_TIMER_CNT 0xFFFFFF + struct fw_info_t { struct list_head ucodes; }; @@ -666,7 +668,8 @@ static int reserve_engines(struct device *dev, static void ucode_unload(struct device *dev, struct otx2_cpt_ucode *ucode) { if (ucode->va) { - dma_free_coherent(dev, ucode->size, ucode->va, ucode->dma); + dma_free_coherent(dev, OTX2_CPT_UCODE_SZ, ucode->va, + ucode->dma); ucode->va = NULL; ucode->dma = 0; ucode->size = 0; @@ -685,7 +688,7 @@ static int copy_ucode_to_dma_mem(struct device *dev, u32 i; /* Allocate DMAable space */ - ucode->va = dma_alloc_coherent(dev, ucode->size, &ucode->dma, + ucode->va = dma_alloc_coherent(dev, OTX2_CPT_UCODE_SZ, &ucode->dma, GFP_KERNEL); if (!ucode->va) return -ENOMEM; @@ -1100,11 +1103,12 @@ int otx2_cpt_get_eng_grp(struct otx2_cpt_eng_grps *eng_grps, int eng_type) return eng_grp_num; } -int otx2_cpt_create_eng_grps(struct pci_dev *pdev, +int otx2_cpt_create_eng_grps(struct otx2_cptpf_dev *cptpf, struct otx2_cpt_eng_grps *eng_grps) { struct otx2_cpt_uc_info_t *uc_info[OTX2_CPT_MAX_ETYPES_PER_GRP] = { }; struct otx2_cpt_engines engs[OTX2_CPT_MAX_ETYPES_PER_GRP] = { {0} }; + struct pci_dev *pdev = cptpf->pdev; struct fw_info_t fw_info; int ret; @@ -1180,6 +1184,23 @@ int otx2_cpt_create_eng_grps(struct pci_dev *pdev, eng_grps->is_grps_created = true; cpt_ucode_release_fw(&fw_info); + + if (is_dev_otx2(pdev)) + return 0; + /* + * Configure engine group mask to allow context prefetching + * for the groups. + */ + otx2_cpt_write_af_reg(&cptpf->afpf_mbox, pdev, CPT_AF_CTL, + OTX2_CPT_ALL_ENG_GRPS_MASK << 3 | BIT_ULL(16), + BLKADDR_CPT0); + /* + * Set interval to periodically flush dirty data for the next + * CTX cache entry. Set the interval count to maximum supported + * value. + */ + otx2_cpt_write_af_reg(&cptpf->afpf_mbox, pdev, CPT_AF_CTX_FLUSH_TIMER, + CTX_FLUSH_TIMER_CNT, BLKADDR_CPT0); return 0; delete_eng_grp: @@ -1460,9 +1481,10 @@ int otx2_cpt_discover_eng_capabilities(struct otx2_cptpf_dev *cptpf) iq_cmd.cptr.s.grp = otx2_cpt_get_eng_grp(&cptpf->eng_grps, etype); otx2_cpt_fill_inst(&inst, &iq_cmd, rptr_baddr); - otx2_cpt_send_cmd(&inst, 1, &cptpf->lfs.lf[0]); + lfs->ops->send_cmd(&inst, 1, &cptpf->lfs.lf[0]); - while (result->s.compcode == OTX2_CPT_COMPLETION_CODE_INIT) + while (lfs->ops->cpt_get_compcode(result) == + OTX2_CPT_COMPLETION_CODE_INIT) cpu_relax(); cptpf->eng_caps[etype].u = be64_to_cpup(rptr); diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.h b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.h index 6b0d432de0afa..fe019ab730b2d 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.h @@ -23,11 +23,13 @@ /* Microcode version string length */ #define OTX2_CPT_UCODE_VER_STR_SZ 44 -/* Maximum number of supported engines/cores on OcteonTX2 platform */ -#define OTX2_CPT_MAX_ENGINES 128 +/* Maximum number of supported engines/cores on OcteonTX2/CN10K platform */ +#define OTX2_CPT_MAX_ENGINES 144 #define OTX2_CPT_ENGS_BITMASK_LEN BITS_TO_LONGS(OTX2_CPT_MAX_ENGINES) +#define OTX2_CPT_UCODE_SZ (64 * 1024) + /* Microcode types */ enum otx2_cpt_ucode_type { OTX2_CPT_AE_UC_TYPE = 1, /* AE-MAIN */ @@ -153,7 +155,7 @@ int otx2_cpt_init_eng_grps(struct pci_dev *pdev, struct otx2_cpt_eng_grps *eng_grps); void otx2_cpt_cleanup_eng_grps(struct pci_dev *pdev, struct otx2_cpt_eng_grps *eng_grps); -int otx2_cpt_create_eng_grps(struct pci_dev *pdev, +int otx2_cpt_create_eng_grps(struct otx2_cptpf_dev *cptpf, struct otx2_cpt_eng_grps *eng_grps); int otx2_cpt_disable_all_cores(struct otx2_cptpf_dev *cptpf); int otx2_cpt_get_eng_grp(struct otx2_cpt_eng_grps *eng_grps, int eng_type); diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c index d5c1c1b7c7e4b..811ded72ce5fb 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c @@ -320,7 +320,7 @@ static int process_request(struct pci_dev *pdev, struct otx2_cpt_req_info *req, cpt_req->dlen, false); /* Send CPT command */ - otx2_cpt_send_cmd(&cptinst, 1, lf); + lf->lfs->ops->send_cmd(&cptinst, 1, lf); /* * We allocate and prepare pending queue entry in critical section @@ -349,13 +349,14 @@ int otx2_cpt_do_request(struct pci_dev *pdev, struct otx2_cpt_req_info *req, &lfs->lf[cpu_num]); } -static int cpt_process_ccode(struct pci_dev *pdev, +static int cpt_process_ccode(struct otx2_cptlfs_info *lfs, union otx2_cpt_res_s *cpt_status, struct otx2_cpt_inst_info *info, u32 *res_code) { - u8 uc_ccode = cpt_status->s.uc_compcode; - u8 ccode = cpt_status->s.compcode; + u8 uc_ccode = lfs->ops->cpt_get_uc_compcode(cpt_status); + u8 ccode = lfs->ops->cpt_get_compcode(cpt_status); + struct pci_dev *pdev = lfs->pdev; switch (ccode) { case OTX2_CPT_COMP_E_FAULT: @@ -389,6 +390,7 @@ static int cpt_process_ccode(struct pci_dev *pdev, return 1; case OTX2_CPT_COMP_E_GOOD: + case OTX2_CPT_COMP_E_WARN: /* * Check microcode completion code, it is only valid * when completion code is CPT_COMP_E::GOOD @@ -426,7 +428,7 @@ static int cpt_process_ccode(struct pci_dev *pdev, return 0; } -static inline void process_pending_queue(struct pci_dev *pdev, +static inline void process_pending_queue(struct otx2_cptlfs_info *lfs, struct otx2_cpt_pending_queue *pqueue) { struct otx2_cpt_pending_entry *resume_pentry = NULL; @@ -436,6 +438,7 @@ static inline void process_pending_queue(struct pci_dev *pdev, struct otx2_cpt_inst_info *info = NULL; struct otx2_cpt_req_info *req = NULL; struct crypto_async_request *areq; + struct pci_dev *pdev = lfs->pdev; u32 res_code, resume_index; while (1) { @@ -476,7 +479,7 @@ static inline void process_pending_queue(struct pci_dev *pdev, goto process_pentry; } - if (cpt_process_ccode(pdev, cpt_status, info, &res_code)) { + if (cpt_process_ccode(lfs, cpt_status, info, &res_code)) { spin_unlock_bh(&pqueue->lock); return; } @@ -529,7 +532,7 @@ static inline void process_pending_queue(struct pci_dev *pdev, void otx2_cpt_post_process(struct otx2_cptlf_wqe *wqe) { - process_pending_queue(wqe->lfs->pdev, + process_pending_queue(wqe->lfs, &wqe->lfs->lf[wqe->lf_num].pqueue); } From 76c1f4e0efd8abeaa3c7789d10ef9c82d950bedd Mon Sep 17 00:00:00 2001 From: Srujana Challa Date: Tue, 25 May 2021 16:57:18 +0530 Subject: [PATCH 093/142] crypto: octeontx2 - enable and handle ME interrupts Adds master enable (ME) interrupt handler in PF. Upon receiving ME interrupt for a VF, PF clears it's transaction pending bit. Signed-off-by: Srujana Challa Signed-off-by: Herbert Xu --- .../marvell/octeontx2/otx2_cptpf_main.c | 118 ++++++++++++++---- 1 file changed, 95 insertions(+), 23 deletions(-) diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c index 1fb04f9bb7ac4..146a55ac4b9b0 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c @@ -63,45 +63,66 @@ static void cptpf_disable_vfpf_mbox_intr(struct otx2_cptpf_dev *cptpf, } } -static void cptpf_enable_vf_flr_intrs(struct otx2_cptpf_dev *cptpf) +static void cptpf_enable_vf_flr_me_intrs(struct otx2_cptpf_dev *cptpf, + int num_vfs) { - /* Clear interrupt if any */ + /* Clear FLR interrupt if any */ otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_VFFLR_INTX(0), - ~0x0ULL); - otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_VFFLR_INTX(1), - ~0x0ULL); + INTR_MASK(num_vfs)); /* Enable VF FLR interrupts */ otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, - RVU_PF_VFFLR_INT_ENA_W1SX(0), ~0x0ULL); + RVU_PF_VFFLR_INT_ENA_W1SX(0), INTR_MASK(num_vfs)); + /* Clear ME interrupt if any */ + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_VFME_INTX(0), + INTR_MASK(num_vfs)); + /* Enable VF ME interrupts */ + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFME_INT_ENA_W1SX(0), INTR_MASK(num_vfs)); + + if (num_vfs <= 64) + return; + + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_VFFLR_INTX(1), + INTR_MASK(num_vfs - 64)); otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, - RVU_PF_VFFLR_INT_ENA_W1SX(1), ~0x0ULL); + RVU_PF_VFFLR_INT_ENA_W1SX(1), INTR_MASK(num_vfs - 64)); + + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_VFME_INTX(1), + INTR_MASK(num_vfs - 64)); + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFME_INT_ENA_W1SX(1), INTR_MASK(num_vfs - 64)); } -static void cptpf_disable_vf_flr_intrs(struct otx2_cptpf_dev *cptpf, +static void cptpf_disable_vf_flr_me_intrs(struct otx2_cptpf_dev *cptpf, int num_vfs) { int vector; /* Disable VF FLR interrupts */ otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, - RVU_PF_VFFLR_INT_ENA_W1CX(0), ~0x0ULL); + RVU_PF_VFFLR_INT_ENA_W1CX(0), INTR_MASK(num_vfs)); + vector = pci_irq_vector(cptpf->pdev, RVU_PF_INT_VEC_VFFLR0); + free_irq(vector, cptpf); + + /* Disable VF ME interrupts */ otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, - RVU_PF_VFFLR_INT_ENA_W1CX(1), ~0x0ULL); + RVU_PF_VFME_INT_ENA_W1CX(0), INTR_MASK(num_vfs)); + vector = pci_irq_vector(cptpf->pdev, RVU_PF_INT_VEC_VFME0); + free_irq(vector, cptpf); - /* Clear interrupt if any */ - otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_VFFLR_INTX(0), - ~0x0ULL); - otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_VFFLR_INTX(1), - ~0x0ULL); + if (num_vfs <= 64) + return; - vector = pci_irq_vector(cptpf->pdev, RVU_PF_INT_VEC_VFFLR0); + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFFLR_INT_ENA_W1CX(1), INTR_MASK(num_vfs - 64)); + vector = pci_irq_vector(cptpf->pdev, RVU_PF_INT_VEC_VFFLR1); free_irq(vector, cptpf); - if (num_vfs > 64) { - vector = pci_irq_vector(cptpf->pdev, RVU_PF_INT_VEC_VFFLR1); - free_irq(vector, cptpf); - } + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFME_INT_ENA_W1CX(1), INTR_MASK(num_vfs - 64)); + vector = pci_irq_vector(cptpf->pdev, RVU_PF_INT_VEC_VFME1); + free_irq(vector, cptpf); } static void cptpf_flr_wq_handler(struct work_struct *work) @@ -173,11 +194,38 @@ static irqreturn_t cptpf_vf_flr_intr(int __always_unused irq, void *arg) return IRQ_HANDLED; } +static irqreturn_t cptpf_vf_me_intr(int __always_unused irq, void *arg) +{ + struct otx2_cptpf_dev *cptpf = arg; + int reg, vf, num_reg = 1; + u64 intr; + + if (cptpf->max_vfs > 64) + num_reg = 2; + + for (reg = 0; reg < num_reg; reg++) { + intr = otx2_cpt_read64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFME_INTX(reg)); + if (!intr) + continue; + for (vf = 0; vf < 64; vf++) { + if (!(intr & BIT_ULL(vf))) + continue; + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFTRPENDX(reg), BIT_ULL(vf)); + /* Clear interrupt */ + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFME_INTX(reg), BIT_ULL(vf)); + } + } + return IRQ_HANDLED; +} + static void cptpf_unregister_vfpf_intr(struct otx2_cptpf_dev *cptpf, int num_vfs) { cptpf_disable_vfpf_mbox_intr(cptpf, num_vfs); - cptpf_disable_vf_flr_intrs(cptpf, num_vfs); + cptpf_disable_vf_flr_me_intrs(cptpf, num_vfs); } static int cptpf_register_vfpf_intr(struct otx2_cptpf_dev *cptpf, int num_vfs) @@ -203,6 +251,15 @@ static int cptpf_register_vfpf_intr(struct otx2_cptpf_dev *cptpf, int num_vfs) "IRQ registration failed for VFFLR0 irq\n"); goto free_mbox0_irq; } + vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFME0); + /* Register VF ME interrupt handler */ + ret = request_irq(vector, cptpf_vf_me_intr, 0, "CPTPF ME0", cptpf); + if (ret) { + dev_err(dev, + "IRQ registration failed for PFVF mbox0 irq\n"); + goto free_flr0_irq; + } + if (num_vfs > 64) { vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFPF_MBOX1); ret = request_irq(vector, otx2_cptpf_vfpf_mbox_intr, 0, @@ -210,7 +267,7 @@ static int cptpf_register_vfpf_intr(struct otx2_cptpf_dev *cptpf, int num_vfs) if (ret) { dev_err(dev, "IRQ registration failed for PFVF mbox1 irq\n"); - goto free_flr0_irq; + goto free_me0_irq; } vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFFLR1); /* Register VF FLR interrupt handler */ @@ -221,15 +278,30 @@ static int cptpf_register_vfpf_intr(struct otx2_cptpf_dev *cptpf, int num_vfs) "IRQ registration failed for VFFLR1 irq\n"); goto free_mbox1_irq; } + vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFME1); + /* Register VF FLR interrupt handler */ + ret = request_irq(vector, cptpf_vf_me_intr, 0, "CPTPF ME1", + cptpf); + if (ret) { + dev_err(dev, + "IRQ registration failed for VFFLR1 irq\n"); + goto free_flr1_irq; + } } cptpf_enable_vfpf_mbox_intr(cptpf, num_vfs); - cptpf_enable_vf_flr_intrs(cptpf); + cptpf_enable_vf_flr_me_intrs(cptpf, num_vfs); return 0; +free_flr1_irq: + vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFFLR1); + free_irq(vector, cptpf); free_mbox1_irq: vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFPF_MBOX1); free_irq(vector, cptpf); +free_me0_irq: + vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFME0); + free_irq(vector, cptpf); free_flr0_irq: vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFFLR0); free_irq(vector, cptpf); From d5c1477b2f39173a988c01694d9bfafc771fa6ef Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Fri, 28 May 2021 18:26:13 +0800 Subject: [PATCH 094/142] crypto: hisilicon/sec - add new type of SQE Add new type of sqe for Kunpeng930, which is the next generation of SEC accelerator hardware. The hardware adds a new SQE data structure. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec.h | 5 +- drivers/crypto/hisilicon/sec2/sec_crypto.h | 174 +++++++++++++++++++++ 2 files changed, 178 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h index dfdce2f21e658..28679cf4e4683 100644 --- a/drivers/crypto/hisilicon/sec2/sec.h +++ b/drivers/crypto/hisilicon/sec2/sec.h @@ -38,7 +38,10 @@ struct sec_aead_req { /* SEC request of Crypto */ struct sec_req { - struct sec_sqe sec_sqe; + union { + struct sec_sqe sec_sqe; + struct sec_sqe3 sec_sqe3; + }; struct sec_ctx *ctx; struct sec_qp_ctx *qp_ctx; diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.h b/drivers/crypto/hisilicon/sec2/sec_crypto.h index 9c78edac56a4b..3b64e1705479f 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.h +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.h @@ -44,6 +44,7 @@ enum sec_ckey_type { enum sec_bd_type { SEC_BD_TYPE1 = 0x1, SEC_BD_TYPE2 = 0x2, + SEC_BD_TYPE3 = 0x3, }; enum sec_auth { @@ -63,6 +64,16 @@ enum sec_addr_type { SEC_PRP = 0x2, }; +enum { + AUTHPAD_PAD, + AUTHPAD_NOPAD, +}; + +enum { + AIGEN_GEN, + AIGEN_NOGEN, +}; + struct sec_sqe_type2 { /* * mac_len: 0~4 bits @@ -209,6 +220,169 @@ struct sec_sqe { struct sec_sqe_type2 type2; }; +struct bd3_auth_ivin { + __le64 a_ivin_addr; + __le32 rsvd0; + __le32 rsvd1; +} __packed __aligned(4); + +struct bd3_skip_data { + __le32 rsvd0; + + /* + * gran_num: 0~15 bits + * reserved: 16~31 bits + */ + __le32 gran_num; + + /* + * src_skip_data_len: 0~24 bits + * reserved: 25~31 bits + */ + __le32 src_skip_data_len; + + /* + * dst_skip_data_len: 0~24 bits + * reserved: 25~31 bits + */ + __le32 dst_skip_data_len; +}; + +struct bd3_stream_scene { + __le64 c_ivin_addr; + __le64 long_a_data_len; + + /* + * auth_pad: 0~1 bits + * stream_protocol: 2~4 bits + * reserved: 5~7 bits + */ + __u8 stream_auth_pad; + __u8 plaintext_type; + __le16 pad_len_1p3; +} __packed __aligned(4); + +struct bd3_no_scene { + __le64 c_ivin_addr; + __le32 rsvd0; + __le32 rsvd1; + __le32 rsvd2; +} __packed __aligned(4); + +struct bd3_check_sum { + __u8 rsvd0; + __u8 hac_sva_status; + __le16 check_sum_i; +}; + +struct bd3_tls_type_back { + __u8 tls_1p3_type_back; + __u8 hac_sva_status; + __le16 pad_len_1p3_back; +}; + +struct sec_sqe3 { + /* + * type: 0~3 bit + * bd_invalid: 4 bit + * scene: 5~8 bit + * de: 9~10 bit + * src_addr_type: 11~13 bit + * dst_addr_type: 14~16 bit + * mac_addr_type: 17~19 bit + * reserved: 20~31 bits + */ + __le32 bd_param; + + /* + * cipher: 0~1 bits + * ci_gen: 2~3 bit + * c_icv_len: 4~9 bit + * c_width: 10~12 bits + * c_key_len: 13~15 bits + */ + __le16 c_icv_key; + + /* + * c_mode : 0~3 bits + * c_alg : 4~7 bits + */ + __u8 c_mode_alg; + + /* + * nonce_len : 0~3 bits + * huk : 4 bits + * cal_iv_addr_en : 5 bits + * seq : 6 bits + * reserved : 7 bits + */ + __u8 huk_iv_seq; + + __le64 tag; + __le64 data_src_addr; + __le64 a_key_addr; + union { + struct bd3_auth_ivin auth_ivin; + struct bd3_skip_data skip_data; + }; + + __le64 c_key_addr; + + /* + * auth: 0~1 bits + * ai_gen: 2~3 bits + * mac_len: 4~8 bits + * akey_len: 9~14 bits + * a_alg: 15~20 bits + * key_sel: 21~24 bits + * updata_key: 25 bits + * reserved: 26~31 bits + */ + __le32 auth_mac_key; + __le32 salt; + __le16 auth_src_offset; + __le16 cipher_src_offset; + + /* + * auth_len: 0~23 bit + * auth_key_offset: 24~31 bits + */ + __le32 a_len_key; + + /* + * cipher_len: 0~23 bit + * auth_ivin_offset: 24~31 bits + */ + __le32 c_len_ivin; + __le64 data_dst_addr; + __le64 mac_addr; + union { + struct bd3_stream_scene stream_scene; + struct bd3_no_scene no_scene; + }; + + /* + * done: 0 bit + * icv: 1~3 bit + * csc: 4~6 bit + * flag: 7~10 bit + * reserved: 11~15 bit + */ + __le16 done_flag; + __u8 error_type; + __u8 warning_type; + union { + __le32 mac_i; + __le32 kek_key_addr_l; + }; + union { + __le32 kek_key_addr_h; + struct bd3_check_sum check_sum; + struct bd3_tls_type_back tls_type_back; + }; + __le32 counter; +} __packed __aligned(4); + int sec_register_to_crypto(struct hisi_qm *qm); void sec_unregister_from_crypto(struct hisi_qm *qm); #endif From adc3f65a7806dda12894870731509b6778735319 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Fri, 28 May 2021 18:26:14 +0800 Subject: [PATCH 095/142] crypto: hisilicon/sec - driver adapt to new SQE Due to Kunpeng930 adds new SQE data structure, the SEC driver needs to be upgraded. It mainly includes bd parsing process and bd filling process. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec.h | 1 + drivers/crypto/hisilicon/sec2/sec_crypto.c | 291 ++++++++++++++++++--- drivers/crypto/hisilicon/sec2/sec_crypto.h | 7 + 3 files changed, 256 insertions(+), 43 deletions(-) diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h index 28679cf4e4683..14ba66da75855 100644 --- a/drivers/crypto/hisilicon/sec2/sec.h +++ b/drivers/crypto/hisilicon/sec2/sec.h @@ -140,6 +140,7 @@ struct sec_ctx { bool pbuf_supported; struct sec_cipher_ctx c_ctx; struct sec_auth_ctx a_ctx; + u8 type_supported; struct device *dev; }; diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index 133aede8bf078..f4b1c8cbb4d51 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -37,10 +37,22 @@ #define SEC_AEAD_ALG_OFFSET 11 #define SEC_AUTH_OFFSET 6 +#define SEC_DE_OFFSET_V3 9 +#define SEC_SCENE_OFFSET_V3 5 +#define SEC_CKEY_OFFSET_V3 13 +#define SEC_SRC_SGL_OFFSET_V3 11 +#define SEC_DST_SGL_OFFSET_V3 14 +#define SEC_CALG_OFFSET_V3 4 +#define SEC_AKEY_OFFSET_V3 9 +#define SEC_MAC_OFFSET_V3 4 +#define SEC_AUTH_ALG_OFFSET_V3 15 +#define SEC_CIPHER_AUTH_V3 0xbf +#define SEC_AUTH_CIPHER_V3 0x40 #define SEC_FLAG_OFFSET 7 #define SEC_FLAG_MASK 0x0780 #define SEC_TYPE_MASK 0x0F #define SEC_DONE_MASK 0x0001 +#define SEC_SQE_LEN_RATE_MASK 0x3 #define SEC_TOTAL_IV_SZ (SEC_IV_SIZE * QM_Q_DEPTH) #define SEC_SGL_SGE_NR 128 @@ -145,44 +157,90 @@ static int sec_aead_verify(struct sec_req *req) return 0; } +static u8 pre_parse_finished_bd(struct bd_status *status, void *resp) +{ + struct sec_sqe *bd = resp; + + status->done = le16_to_cpu(bd->type2.done_flag) & SEC_DONE_MASK; + status->flag = (le16_to_cpu(bd->type2.done_flag) & + SEC_FLAG_MASK) >> SEC_FLAG_OFFSET; + status->tag = le16_to_cpu(bd->type2.tag); + status->err_type = bd->type2.error_type; + + return bd->type_cipher_auth & SEC_TYPE_MASK; +} + +static u8 pre_parse_finished_bd3(struct bd_status *status, void *resp) +{ + struct sec_sqe3 *bd3 = resp; + + status->done = le16_to_cpu(bd3->done_flag) & SEC_DONE_MASK; + status->flag = (le16_to_cpu(bd3->done_flag) & + SEC_FLAG_MASK) >> SEC_FLAG_OFFSET; + status->tag = le64_to_cpu(bd3->tag); + status->err_type = bd3->error_type; + + return le32_to_cpu(bd3->bd_param) & SEC_TYPE_MASK; +} + +static int sec_cb_status_check(struct sec_req *req, + struct bd_status *status) +{ + struct sec_ctx *ctx = req->ctx; + + if (unlikely(req->err_type || status->done != SEC_SQE_DONE)) { + dev_err_ratelimited(ctx->dev, "err_type[%d], done[%u]\n", + req->err_type, status->done); + return -EIO; + } + + if (unlikely(ctx->alg_type == SEC_SKCIPHER)) { + if (unlikely(status->flag != SEC_SQE_CFLAG)) { + dev_err_ratelimited(ctx->dev, "flag[%u]\n", + status->flag); + return -EIO; + } + } + + return 0; +} + static void sec_req_cb(struct hisi_qp *qp, void *resp) { struct sec_qp_ctx *qp_ctx = qp->qp_ctx; struct sec_dfx *dfx = &qp_ctx->ctx->sec->debug.dfx; - struct sec_sqe *bd = resp; + u8 type_supported = qp_ctx->ctx->type_supported; + struct bd_status status; struct sec_ctx *ctx; struct sec_req *req; - u16 done, flag; - int err = 0; + int err; u8 type; - type = bd->type_cipher_auth & SEC_TYPE_MASK; - if (unlikely(type != SEC_BD_TYPE2)) { + if (type_supported == SEC_BD_TYPE2) { + type = pre_parse_finished_bd(&status, resp); + req = qp_ctx->req_list[status.tag]; + } else { + type = pre_parse_finished_bd3(&status, resp); + req = (void *)(uintptr_t)status.tag; + } + + if (unlikely(type != type_supported)) { atomic64_inc(&dfx->err_bd_cnt); pr_err("err bd type [%d]\n", type); return; } - req = qp_ctx->req_list[le16_to_cpu(bd->type2.tag)]; if (unlikely(!req)) { atomic64_inc(&dfx->invalid_req_cnt); atomic_inc(&qp->qp_status.used); return; } - req->err_type = bd->type2.error_type; + + req->err_type = status.err_type; ctx = req->ctx; - done = le16_to_cpu(bd->type2.done_flag) & SEC_DONE_MASK; - flag = (le16_to_cpu(bd->type2.done_flag) & - SEC_FLAG_MASK) >> SEC_FLAG_OFFSET; - if (unlikely(req->err_type || done != SEC_SQE_DONE || - (ctx->alg_type == SEC_SKCIPHER && flag != SEC_SQE_CFLAG) || - (ctx->alg_type == SEC_AEAD && flag != SEC_SQE_AEAD_FLAG))) { - dev_err_ratelimited(ctx->dev, - "err_type[%d],done[%d],flag[%d]\n", - req->err_type, done, flag); - err = -EIO; + err = sec_cb_status_check(req, &status); + if (err) atomic64_inc(&dfx->done_flag_cnt); - } if (ctx->alg_type == SEC_AEAD && !req->c_req.encrypt) err = sec_aead_verify(req); @@ -382,10 +440,11 @@ static int sec_create_qp_ctx(struct hisi_qm *qm, struct sec_ctx *ctx, qp = ctx->qps[qp_ctx_id]; qp->req_type = 0; qp->qp_ctx = qp_ctx; - qp->req_cb = sec_req_cb; qp_ctx->qp = qp; qp_ctx->ctx = ctx; + qp->req_cb = sec_req_cb; + mutex_init(&qp_ctx->req_lock); idr_init(&qp_ctx->req_idr); INIT_LIST_HEAD(&qp_ctx->backlog); @@ -615,19 +674,25 @@ static int sec_skcipher_aes_sm4_setkey(struct sec_cipher_ctx *c_ctx, return -EINVAL; } } else { - switch (keylen) { - case AES_KEYSIZE_128: - c_ctx->c_key_len = SEC_CKEY_128BIT; - break; - case AES_KEYSIZE_192: - c_ctx->c_key_len = SEC_CKEY_192BIT; - break; - case AES_KEYSIZE_256: - c_ctx->c_key_len = SEC_CKEY_256BIT; - break; - default: - pr_err("hisi_sec2: aes key error!\n"); + if (c_ctx->c_alg == SEC_CALG_SM4 && + keylen != AES_KEYSIZE_128) { + pr_err("hisi_sec2: sm4 key error!\n"); return -EINVAL; + } else { + switch (keylen) { + case AES_KEYSIZE_128: + c_ctx->c_key_len = SEC_CKEY_128BIT; + break; + case AES_KEYSIZE_192: + c_ctx->c_key_len = SEC_CKEY_192BIT; + break; + case AES_KEYSIZE_256: + c_ctx->c_key_len = SEC_CKEY_256BIT; + break; + default: + pr_err("hisi_sec2: aes key error!\n"); + return -EINVAL; + } } } @@ -915,6 +980,12 @@ static int sec_aead_setkey(struct crypto_aead *tfm, const u8 *key, goto bad_key; } + if ((ctx->a_ctx.mac_len & SEC_SQE_LEN_RATE_MASK) || + (ctx->a_ctx.a_key_len & SEC_SQE_LEN_RATE_MASK)) { + dev_err(dev, "MAC or AUTH key length error!\n"); + goto bad_key; + } + return 0; bad_key: @@ -1014,29 +1085,75 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req) cipher = SEC_CIPHER_DEC << SEC_CIPHER_OFFSET; sec_sqe->type_cipher_auth = bd_type | cipher; - if (req->use_pbuf) + /* Set destination and source address type */ + if (req->use_pbuf) { sa_type = SEC_PBUF << SEC_SRC_SGL_OFFSET; - else + da_type = SEC_PBUF << SEC_DST_SGL_OFFSET; + } else { sa_type = SEC_SGL << SEC_SRC_SGL_OFFSET; + da_type = SEC_SGL << SEC_DST_SGL_OFFSET; + } + + sec_sqe->sdm_addr_type |= da_type; scene = SEC_COMM_SCENE << SEC_SCENE_OFFSET; if (c_req->c_in_dma != c_req->c_out_dma) de = 0x1 << SEC_DE_OFFSET; sec_sqe->sds_sa_type = (de | scene | sa_type); - /* Just set DST address type */ - if (req->use_pbuf) - da_type = SEC_PBUF << SEC_DST_SGL_OFFSET; - else - da_type = SEC_SGL << SEC_DST_SGL_OFFSET; - sec_sqe->sdm_addr_type |= da_type; - sec_sqe->type2.clen_ivhlen |= cpu_to_le32(c_req->c_len); sec_sqe->type2.tag = cpu_to_le16((u16)req->req_id); return 0; } +static int sec_skcipher_bd_fill_v3(struct sec_ctx *ctx, struct sec_req *req) +{ + struct sec_sqe3 *sec_sqe3 = &req->sec_sqe3; + struct sec_cipher_ctx *c_ctx = &ctx->c_ctx; + struct sec_cipher_req *c_req = &req->c_req; + u32 bd_param = 0; + u16 cipher; + + memset(sec_sqe3, 0, sizeof(struct sec_sqe3)); + + sec_sqe3->c_key_addr = cpu_to_le64(c_ctx->c_key_dma); + sec_sqe3->no_scene.c_ivin_addr = cpu_to_le64(c_req->c_ivin_dma); + sec_sqe3->data_src_addr = cpu_to_le64(c_req->c_in_dma); + sec_sqe3->data_dst_addr = cpu_to_le64(c_req->c_out_dma); + + sec_sqe3->c_mode_alg = ((u8)c_ctx->c_alg << SEC_CALG_OFFSET_V3) | + c_ctx->c_mode; + sec_sqe3->c_icv_key |= cpu_to_le16(((u16)c_ctx->c_key_len) << + SEC_CKEY_OFFSET_V3); + + if (c_req->encrypt) + cipher = SEC_CIPHER_ENC; + else + cipher = SEC_CIPHER_DEC; + sec_sqe3->c_icv_key |= cpu_to_le16(cipher); + + if (req->use_pbuf) { + bd_param |= SEC_PBUF << SEC_SRC_SGL_OFFSET_V3; + bd_param |= SEC_PBUF << SEC_DST_SGL_OFFSET_V3; + } else { + bd_param |= SEC_SGL << SEC_SRC_SGL_OFFSET_V3; + bd_param |= SEC_SGL << SEC_DST_SGL_OFFSET_V3; + } + + bd_param |= SEC_COMM_SCENE << SEC_SCENE_OFFSET_V3; + if (c_req->c_in_dma != c_req->c_out_dma) + bd_param |= 0x1 << SEC_DE_OFFSET_V3; + + bd_param |= SEC_BD_TYPE3; + sec_sqe3->bd_param = cpu_to_le32(bd_param); + + sec_sqe3->c_len_ivin |= cpu_to_le32(c_req->c_len); + sec_sqe3->tag = cpu_to_le64(req); + + return 0; +} + static void sec_update_iv(struct sec_req *req, enum sec_alg_type alg_type) { struct aead_request *aead_req = req->aead_req.aead_req; @@ -1170,6 +1287,57 @@ static int sec_aead_bd_fill(struct sec_ctx *ctx, struct sec_req *req) return 0; } +static void sec_auth_bd_fill_ex_v3(struct sec_auth_ctx *ctx, int dir, + struct sec_req *req, struct sec_sqe3 *sqe3) +{ + struct sec_aead_req *a_req = &req->aead_req; + struct sec_cipher_req *c_req = &req->c_req; + struct aead_request *aq = a_req->aead_req; + + sqe3->a_key_addr = cpu_to_le64(ctx->a_key_dma); + + sqe3->auth_mac_key |= + cpu_to_le32((u32)(ctx->mac_len / + SEC_SQE_LEN_RATE) << SEC_MAC_OFFSET_V3); + + sqe3->auth_mac_key |= + cpu_to_le32((u32)(ctx->a_key_len / + SEC_SQE_LEN_RATE) << SEC_AKEY_OFFSET_V3); + + sqe3->auth_mac_key |= + cpu_to_le32((u32)(ctx->a_alg) << SEC_AUTH_ALG_OFFSET_V3); + + if (dir) { + sqe3->auth_mac_key |= cpu_to_le32((u32)SEC_AUTH_TYPE1); + sqe3->huk_iv_seq &= SEC_CIPHER_AUTH_V3; + } else { + sqe3->auth_mac_key |= cpu_to_le32((u32)SEC_AUTH_TYPE1); + sqe3->huk_iv_seq |= SEC_AUTH_CIPHER_V3; + } + sqe3->a_len_key = cpu_to_le32(c_req->c_len + aq->assoclen); + + sqe3->cipher_src_offset = cpu_to_le16((u16)aq->assoclen); + + sqe3->mac_addr = cpu_to_le64(a_req->out_mac_dma); +} + +static int sec_aead_bd_fill_v3(struct sec_ctx *ctx, struct sec_req *req) +{ + struct sec_auth_ctx *auth_ctx = &ctx->a_ctx; + struct sec_sqe3 *sec_sqe3 = &req->sec_sqe3; + int ret; + + ret = sec_skcipher_bd_fill_v3(ctx, req); + if (unlikely(ret)) { + dev_err(ctx->dev, "skcipher bd3 fill is error!\n"); + return ret; + } + + sec_auth_bd_fill_ex_v3(auth_ctx, req->c_req.encrypt, req, sec_sqe3); + + return 0; +} + static void sec_aead_callback(struct sec_ctx *c, struct sec_req *req, int err) { struct aead_request *a_req = req->aead_req.aead_req; @@ -1303,13 +1471,44 @@ static const struct sec_req_op sec_aead_req_ops = { .process = sec_process, }; +static const struct sec_req_op sec_skcipher_req_ops_v3 = { + .buf_map = sec_skcipher_sgl_map, + .buf_unmap = sec_skcipher_sgl_unmap, + .do_transfer = sec_skcipher_copy_iv, + .bd_fill = sec_skcipher_bd_fill_v3, + .bd_send = sec_bd_send, + .callback = sec_skcipher_callback, + .process = sec_process, +}; + +static const struct sec_req_op sec_aead_req_ops_v3 = { + .buf_map = sec_aead_sgl_map, + .buf_unmap = sec_aead_sgl_unmap, + .do_transfer = sec_aead_copy_iv, + .bd_fill = sec_aead_bd_fill_v3, + .bd_send = sec_bd_send, + .callback = sec_aead_callback, + .process = sec_process, +}; + static int sec_skcipher_ctx_init(struct crypto_skcipher *tfm) { struct sec_ctx *ctx = crypto_skcipher_ctx(tfm); + int ret; - ctx->req_op = &sec_skcipher_req_ops; + ret = sec_skcipher_init(tfm); + if (ret) + return ret; + + if (ctx->sec->qm.ver < QM_HW_V3) { + ctx->type_supported = SEC_BD_TYPE2; + ctx->req_op = &sec_skcipher_req_ops; + } else { + ctx->type_supported = SEC_BD_TYPE3; + ctx->req_op = &sec_skcipher_req_ops_v3; + } - return sec_skcipher_init(tfm); + return ret; } static void sec_skcipher_ctx_exit(struct crypto_skcipher *tfm) @@ -1330,10 +1529,16 @@ static int sec_aead_init(struct crypto_aead *tfm) return -EINVAL; } - ctx->req_op = &sec_aead_req_ops; ret = sec_ctx_base_init(ctx); if (ret) return ret; + if (ctx->sec->qm.ver < QM_HW_V3) { + ctx->type_supported = SEC_BD_TYPE2; + ctx->req_op = &sec_aead_req_ops; + } else { + ctx->type_supported = SEC_BD_TYPE3; + ctx->req_op = &sec_aead_req_ops_v3; + } ret = sec_auth_init(ctx); if (ret) diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.h b/drivers/crypto/hisilicon/sec2/sec_crypto.h index 3b64e1705479f..163e8134bb3d5 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.h +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.h @@ -64,6 +64,13 @@ enum sec_addr_type { SEC_PRP = 0x2, }; +struct bd_status { + u64 tag; + u8 done; + u8 err_type; + u16 flag; +}; + enum { AUTHPAD_PAD, AUTHPAD_NOPAD, From 7b44c0eecd6ade576bfb7a104dcdae5580237420 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Fri, 28 May 2021 19:42:04 +0800 Subject: [PATCH 096/142] crypto: hisilicon/sec - add new skcipher mode for SEC Add new skcipher algorithms for Kunpeng930 SEC: OFB(AES), CFB(AES), CTR(AES), OFB(SM4), CFB(SM4), CTR(SM4). Signed-off-by: Kai Ye Signed-off-by: Wenkai Lin Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec_crypto.c | 103 ++++++++++++++++++--- drivers/crypto/hisilicon/sec2/sec_crypto.h | 3 + 2 files changed, 94 insertions(+), 12 deletions(-) diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index f4b1c8cbb4d51..f4b77d0ce8f3e 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -78,6 +78,9 @@ #define SEC_SQE_CFLAG 2 #define SEC_SQE_AEAD_FLAG 3 #define SEC_SQE_DONE 0x1 +#define MAX_INPUT_DATA_LEN 0xFFFE00 +#define BITS_MASK 0xFF +#define BYTE_BITS 0x8 /* Get an en/de-cipher queue cyclically to balance load over queues of TFM */ static inline int sec_alloc_queue_id(struct sec_ctx *ctx, struct sec_req *req) @@ -751,12 +754,16 @@ static int sec_setkey_##name(struct crypto_skcipher *tfm, const u8 *key,\ GEN_SEC_SETKEY_FUNC(aes_ecb, SEC_CALG_AES, SEC_CMODE_ECB) GEN_SEC_SETKEY_FUNC(aes_cbc, SEC_CALG_AES, SEC_CMODE_CBC) GEN_SEC_SETKEY_FUNC(aes_xts, SEC_CALG_AES, SEC_CMODE_XTS) - +GEN_SEC_SETKEY_FUNC(aes_ofb, SEC_CALG_AES, SEC_CMODE_OFB) +GEN_SEC_SETKEY_FUNC(aes_cfb, SEC_CALG_AES, SEC_CMODE_CFB) +GEN_SEC_SETKEY_FUNC(aes_ctr, SEC_CALG_AES, SEC_CMODE_CTR) GEN_SEC_SETKEY_FUNC(3des_ecb, SEC_CALG_3DES, SEC_CMODE_ECB) GEN_SEC_SETKEY_FUNC(3des_cbc, SEC_CALG_3DES, SEC_CMODE_CBC) - GEN_SEC_SETKEY_FUNC(sm4_xts, SEC_CALG_SM4, SEC_CMODE_XTS) GEN_SEC_SETKEY_FUNC(sm4_cbc, SEC_CALG_SM4, SEC_CMODE_CBC) +GEN_SEC_SETKEY_FUNC(sm4_ofb, SEC_CALG_SM4, SEC_CMODE_OFB) +GEN_SEC_SETKEY_FUNC(sm4_cfb, SEC_CALG_SM4, SEC_CMODE_CFB) +GEN_SEC_SETKEY_FUNC(sm4_ctr, SEC_CALG_SM4, SEC_CMODE_CTR) static int sec_cipher_pbuf_map(struct sec_ctx *ctx, struct sec_req *req, struct scatterlist *src) @@ -1154,6 +1161,17 @@ static int sec_skcipher_bd_fill_v3(struct sec_ctx *ctx, struct sec_req *req) return 0; } +/* increment counter (128-bit int) */ +static void ctr_iv_inc(__u8 *counter, __u8 bits, __u32 nums) +{ + do { + --bits; + nums += counter[bits]; + counter[bits] = nums & BITS_MASK; + nums >>= BYTE_BITS; + } while (bits && nums); +} + static void sec_update_iv(struct sec_req *req, enum sec_alg_type alg_type) { struct aead_request *aead_req = req->aead_req.aead_req; @@ -1177,10 +1195,17 @@ static void sec_update_iv(struct sec_req *req, enum sec_alg_type alg_type) cryptlen = aead_req->cryptlen; } - sz = sg_pcopy_to_buffer(sgl, sg_nents(sgl), iv, iv_size, - cryptlen - iv_size); - if (unlikely(sz != iv_size)) - dev_err(req->ctx->dev, "copy output iv error!\n"); + if (req->ctx->c_ctx.c_mode == SEC_CMODE_CBC) { + sz = sg_pcopy_to_buffer(sgl, sg_nents(sgl), iv, iv_size, + cryptlen - iv_size); + if (unlikely(sz != iv_size)) + dev_err(req->ctx->dev, "copy output iv error!\n"); + } else { + sz = cryptlen / iv_size; + if (cryptlen % iv_size) + sz += 1; + ctr_iv_inc(iv, iv_size, sz); + } } static struct sec_req *sec_back_req_clear(struct sec_ctx *ctx, @@ -1211,8 +1236,9 @@ static void sec_skcipher_callback(struct sec_ctx *ctx, struct sec_req *req, sec_free_req_id(req); - /* IV output at encrypto of CBC mode */ - if (!err && ctx->c_ctx.c_mode == SEC_CMODE_CBC && req->c_req.encrypt) + /* IV output at encrypto of CBC/CTR mode */ + if (!err && (ctx->c_ctx.c_mode == SEC_CMODE_CBC || + ctx->c_ctx.c_mode == SEC_CMODE_CTR) && req->c_req.encrypt) sec_update_iv(req, SEC_SKCIPHER); while (1) { @@ -1422,7 +1448,8 @@ static int sec_process(struct sec_ctx *ctx, struct sec_req *req) goto err_uninit_req; /* Output IV as decrypto */ - if (ctx->c_ctx.c_mode == SEC_CMODE_CBC && !req->c_req.encrypt) + if (!req->c_req.encrypt && (ctx->c_ctx.c_mode == SEC_CMODE_CBC || + ctx->c_ctx.c_mode == SEC_CMODE_CTR)) sec_update_iv(req, ctx->alg_type); ret = ctx->req_op->bd_send(ctx, req); @@ -1634,6 +1661,14 @@ static int sec_skcipher_cryptlen_ckeck(struct sec_ctx *ctx, ret = -EINVAL; } break; + case SEC_CMODE_CFB: + case SEC_CMODE_OFB: + case SEC_CMODE_CTR: + if (unlikely(ctx->sec->qm.ver < QM_HW_V3)) { + dev_err(dev, "skcipher HW version error!\n"); + ret = -EINVAL; + } + break; default: ret = -EINVAL; } @@ -1647,7 +1682,8 @@ static int sec_skcipher_param_check(struct sec_ctx *ctx, struct sec_req *sreq) struct device *dev = ctx->dev; u8 c_alg = ctx->c_ctx.c_alg; - if (unlikely(!sk_req->src || !sk_req->dst)) { + if (unlikely(!sk_req->src || !sk_req->dst || + sk_req->cryptlen > MAX_INPUT_DATA_LEN)) { dev_err(dev, "skcipher input param error!\n"); return -EINVAL; } @@ -1762,6 +1798,32 @@ static struct skcipher_alg sec_skciphers[] = { AES_BLOCK_SIZE, AES_BLOCK_SIZE) }; +static struct skcipher_alg sec_skciphers_v3[] = { + SEC_SKCIPHER_ALG("ofb(aes)", sec_setkey_aes_ofb, + AES_MIN_KEY_SIZE, AES_MAX_KEY_SIZE, + SEC_MIN_BLOCK_SZ, AES_BLOCK_SIZE) + + SEC_SKCIPHER_ALG("cfb(aes)", sec_setkey_aes_cfb, + AES_MIN_KEY_SIZE, AES_MAX_KEY_SIZE, + SEC_MIN_BLOCK_SZ, AES_BLOCK_SIZE) + + SEC_SKCIPHER_ALG("ctr(aes)", sec_setkey_aes_ctr, + AES_MIN_KEY_SIZE, AES_MAX_KEY_SIZE, + SEC_MIN_BLOCK_SZ, AES_BLOCK_SIZE) + + SEC_SKCIPHER_ALG("ofb(sm4)", sec_setkey_sm4_ofb, + AES_MIN_KEY_SIZE, AES_MIN_KEY_SIZE, + SEC_MIN_BLOCK_SZ, AES_BLOCK_SIZE) + + SEC_SKCIPHER_ALG("cfb(sm4)", sec_setkey_sm4_cfb, + AES_MIN_KEY_SIZE, AES_MIN_KEY_SIZE, + SEC_MIN_BLOCK_SZ, AES_BLOCK_SIZE) + + SEC_SKCIPHER_ALG("ctr(sm4)", sec_setkey_sm4_ctr, + AES_MIN_KEY_SIZE, AES_MIN_KEY_SIZE, + SEC_MIN_BLOCK_SZ, AES_BLOCK_SIZE) +}; + static int sec_aead_param_check(struct sec_ctx *ctx, struct sec_req *sreq) { struct aead_request *req = sreq->aead_req.aead_req; @@ -1878,15 +1940,32 @@ int sec_register_to_crypto(struct hisi_qm *qm) if (ret) return ret; + if (qm->ver > QM_HW_V2) { + ret = crypto_register_skciphers(sec_skciphers_v3, + ARRAY_SIZE(sec_skciphers_v3)); + if (ret) + goto reg_skcipher_fail; + } ret = crypto_register_aeads(sec_aeads, ARRAY_SIZE(sec_aeads)); if (ret) - crypto_unregister_skciphers(sec_skciphers, - ARRAY_SIZE(sec_skciphers)); + goto reg_aead_fail; + return ret; + +reg_aead_fail: + if (qm->ver > QM_HW_V2) + crypto_unregister_skciphers(sec_skciphers_v3, + ARRAY_SIZE(sec_skciphers_v3)); +reg_skcipher_fail: + crypto_unregister_skciphers(sec_skciphers, + ARRAY_SIZE(sec_skciphers)); return ret; } void sec_unregister_from_crypto(struct hisi_qm *qm) { + if (qm->ver > QM_HW_V2) + crypto_unregister_skciphers(sec_skciphers_v3, + ARRAY_SIZE(sec_skciphers_v3)); crypto_unregister_skciphers(sec_skciphers, ARRAY_SIZE(sec_skciphers)); crypto_unregister_aeads(sec_aeads, ARRAY_SIZE(sec_aeads)); diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.h b/drivers/crypto/hisilicon/sec2/sec_crypto.h index 163e8134bb3d5..c9bfe75d32e36 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.h +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.h @@ -7,6 +7,7 @@ #define SEC_IV_SIZE 24 #define SEC_MAX_KEY_SIZE 64 #define SEC_COMM_SCENE 0 +#define SEC_MIN_BLOCK_SZ 1 enum sec_calg { SEC_CALG_3DES = 0x1, @@ -29,6 +30,8 @@ enum sec_mac_len { enum sec_cmode { SEC_CMODE_ECB = 0x0, SEC_CMODE_CBC = 0x1, + SEC_CMODE_CFB = 0x2, + SEC_CMODE_OFB = 0x3, SEC_CMODE_CTR = 0x4, SEC_CMODE_XTS = 0x7, }; From 5652d55a76f6f59f0c1cfc7b90050742738cd227 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Fri, 28 May 2021 19:42:05 +0800 Subject: [PATCH 097/142] crypto: hisilicon/sec - add fallback tfm supporting for XTS mode Add fallback tfm supporting for hisi_sec driver. Due to the hardware not supports 192bit key length when using XTS mode. So the driver needs to setting the soft fallback skcipher tfm for user. Signed-off-by: Kai Ye Signed-off-by: Longfang Liu Reported-by: kernel test robot Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec.h | 4 + drivers/crypto/hisilicon/sec2/sec_crypto.c | 85 +++++++++++++++++++++- 2 files changed, 86 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h index 14ba66da75855..935d8d95dcb91 100644 --- a/drivers/crypto/hisilicon/sec2/sec.h +++ b/drivers/crypto/hisilicon/sec2/sec.h @@ -97,6 +97,10 @@ struct sec_cipher_ctx { u8 c_mode; u8 c_alg; u8 c_key_len; + + /* add software support */ + bool fallback; + struct crypto_sync_skcipher *fbtfm; }; /* SEC queue context which defines queue's relatives */ diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index f4b77d0ce8f3e..b91cf2b33b987 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -21,6 +21,7 @@ #define SEC_PRIORITY 4001 #define SEC_XTS_MIN_KEY_SIZE (2 * AES_MIN_KEY_SIZE) +#define SEC_XTS_MID_KEY_SIZE (3 * AES_MIN_KEY_SIZE) #define SEC_XTS_MAX_KEY_SIZE (2 * AES_MAX_KEY_SIZE) #define SEC_DES3_2KEY_SIZE (2 * DES_KEY_SIZE) #define SEC_DES3_3KEY_SIZE (3 * DES_KEY_SIZE) @@ -81,6 +82,7 @@ #define MAX_INPUT_DATA_LEN 0xFFFE00 #define BITS_MASK 0xFF #define BYTE_BITS 0x8 +#define SEC_XTS_NAME_SZ 0x3 /* Get an en/de-cipher queue cyclically to balance load over queues of TFM */ static inline int sec_alloc_queue_id(struct sec_ctx *ctx, struct sec_req *req) @@ -598,6 +600,26 @@ static void sec_auth_uninit(struct sec_ctx *ctx) a_ctx->a_key, a_ctx->a_key_dma); } +static int sec_skcipher_fbtfm_init(struct crypto_skcipher *tfm) +{ + const char *alg = crypto_tfm_alg_name(&tfm->base); + struct sec_ctx *ctx = crypto_skcipher_ctx(tfm); + struct sec_cipher_ctx *c_ctx = &ctx->c_ctx; + + c_ctx->fallback = false; + if (likely(strncmp(alg, "xts", SEC_XTS_NAME_SZ))) + return 0; + + c_ctx->fbtfm = crypto_alloc_sync_skcipher(alg, 0, + CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(c_ctx->fbtfm)) { + pr_err("failed to alloc fallback tfm!\n"); + return PTR_ERR(c_ctx->fbtfm); + } + + return 0; +} + static int sec_skcipher_init(struct crypto_skcipher *tfm) { struct sec_ctx *ctx = crypto_skcipher_ctx(tfm); @@ -619,8 +641,14 @@ static int sec_skcipher_init(struct crypto_skcipher *tfm) if (ret) goto err_cipher_init; + ret = sec_skcipher_fbtfm_init(tfm); + if (ret) + goto err_fbtfm_init; + return 0; +err_fbtfm_init: + sec_cipher_uninit(ctx); err_cipher_init: sec_ctx_base_uninit(ctx); return ret; @@ -630,6 +658,9 @@ static void sec_skcipher_uninit(struct crypto_skcipher *tfm) { struct sec_ctx *ctx = crypto_skcipher_ctx(tfm); + if (ctx->c_ctx.fbtfm) + crypto_free_sync_skcipher(ctx->c_ctx.fbtfm); + sec_cipher_uninit(ctx); sec_ctx_base_uninit(ctx); } @@ -669,6 +700,9 @@ static int sec_skcipher_aes_sm4_setkey(struct sec_cipher_ctx *c_ctx, case SEC_XTS_MIN_KEY_SIZE: c_ctx->c_key_len = SEC_CKEY_128BIT; break; + case SEC_XTS_MID_KEY_SIZE: + c_ctx->fallback = true; + break; case SEC_XTS_MAX_KEY_SIZE: c_ctx->c_key_len = SEC_CKEY_256BIT; break; @@ -740,7 +774,13 @@ static int sec_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key, } memcpy(c_ctx->c_key, key, keylen); - + if (c_ctx->fallback) { + ret = crypto_sync_skcipher_setkey(c_ctx->fbtfm, key, keylen); + if (ret) { + dev_err(dev, "failed to set fallback skcipher key!\n"); + return ret; + } + } return 0; } @@ -1709,6 +1749,37 @@ static int sec_skcipher_param_check(struct sec_ctx *ctx, struct sec_req *sreq) return -EINVAL; } +static int sec_skcipher_soft_crypto(struct sec_ctx *ctx, + struct skcipher_request *sreq, bool encrypt) +{ + struct sec_cipher_ctx *c_ctx = &ctx->c_ctx; + struct device *dev = ctx->dev; + int ret; + + SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, c_ctx->fbtfm); + + if (!c_ctx->fbtfm) { + dev_err(dev, "failed to check fallback tfm\n"); + return -EINVAL; + } + + skcipher_request_set_sync_tfm(subreq, c_ctx->fbtfm); + + /* software need sync mode to do crypto */ + skcipher_request_set_callback(subreq, sreq->base.flags, + NULL, NULL); + skcipher_request_set_crypt(subreq, sreq->src, sreq->dst, + sreq->cryptlen, sreq->iv); + if (encrypt) + ret = crypto_skcipher_encrypt(subreq); + else + ret = crypto_skcipher_decrypt(subreq); + + skcipher_request_zero(subreq); + + return ret; +} + static int sec_skcipher_crypto(struct skcipher_request *sk_req, bool encrypt) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(sk_req); @@ -1716,8 +1787,11 @@ static int sec_skcipher_crypto(struct skcipher_request *sk_req, bool encrypt) struct sec_ctx *ctx = crypto_skcipher_ctx(tfm); int ret; - if (!sk_req->cryptlen) + if (!sk_req->cryptlen) { + if (ctx->c_ctx.c_mode == SEC_CMODE_XTS) + return -EINVAL; return 0; + } req->flag = sk_req->base.flags; req->c_req.sk_req = sk_req; @@ -1728,6 +1802,9 @@ static int sec_skcipher_crypto(struct skcipher_request *sk_req, bool encrypt) if (unlikely(ret)) return -EINVAL; + if (unlikely(ctx->c_ctx.fallback)) + return sec_skcipher_soft_crypto(ctx, sk_req, encrypt); + return ctx->req_op->process(ctx, req); } @@ -1748,7 +1825,9 @@ static int sec_skcipher_decrypt(struct skcipher_request *sk_req) .cra_name = sec_cra_name,\ .cra_driver_name = "hisi_sec_"sec_cra_name,\ .cra_priority = SEC_PRIORITY,\ - .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,\ + .cra_flags = CRYPTO_ALG_ASYNC |\ + CRYPTO_ALG_ALLOCATES_MEMORY |\ + CRYPTO_ALG_NEED_FALLBACK,\ .cra_blocksize = blk_size,\ .cra_ctxsize = sizeof(struct sec_ctx),\ .cra_module = THIS_MODULE,\ From 6161f40c630bd7ced5f236cd5fbabec06e47afae Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Fri, 28 May 2021 19:42:06 +0800 Subject: [PATCH 098/142] crypto: hisilicon/sec - fixup 3des minimum key size declaration Fixup the 3des algorithm minimum key size declaration. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec_crypto.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index b91cf2b33b987..5926b64d0d989 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -1861,11 +1861,11 @@ static struct skcipher_alg sec_skciphers[] = { AES_BLOCK_SIZE, AES_BLOCK_SIZE) SEC_SKCIPHER_ALG("ecb(des3_ede)", sec_setkey_3des_ecb, - SEC_DES3_2KEY_SIZE, SEC_DES3_3KEY_SIZE, + SEC_DES3_3KEY_SIZE, SEC_DES3_3KEY_SIZE, DES3_EDE_BLOCK_SIZE, 0) SEC_SKCIPHER_ALG("cbc(des3_ede)", sec_setkey_3des_cbc, - SEC_DES3_2KEY_SIZE, SEC_DES3_3KEY_SIZE, + SEC_DES3_3KEY_SIZE, SEC_DES3_3KEY_SIZE, DES3_EDE_BLOCK_SIZE, DES3_EDE_BLOCK_SIZE) SEC_SKCIPHER_ALG("xts(sm4)", sec_setkey_sm4_xts, From 1e609f5fb73b6b17af369a031f3a4c2b9b405854 Mon Sep 17 00:00:00 2001 From: Hui Tang Date: Sat, 29 May 2021 16:57:37 +0800 Subject: [PATCH 099/142] crypto: hisilicon/hpre - fix ecdh self test issue When the key length is zero, use stdrng to generate private key to pass the crypto ecdh-nist-p256 self test on vector 2. Signed-off-by: Hui Tang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_crypto.c | 34 +++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c index 294c3688aabb0..6ba5d8af38755 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c +++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -38,6 +39,9 @@ struct hpre_ctx; #define HPRE_DFX_SEC_TO_US 1000000 #define HPRE_DFX_US_TO_NS 1000 +/* due to nist p521 */ +#define HPRE_ECC_MAX_KSZ 66 + /* size in bytes of the n prime */ #define HPRE_ECC_NIST_P192_N_SIZE 24 #define HPRE_ECC_NIST_P256_N_SIZE 32 @@ -1333,11 +1337,32 @@ static bool hpre_key_is_zero(char *key, unsigned short key_sz) return true; } +static int ecdh_gen_privkey(struct hpre_ctx *ctx, struct ecdh *params) +{ + struct device *dev = ctx->dev; + int ret; + + ret = crypto_get_default_rng(); + if (ret) { + dev_err(dev, "failed to get default rng, ret = %d!\n", ret); + return ret; + } + + ret = crypto_rng_get_bytes(crypto_default_rng, (u8 *)params->key, + params->key_size); + crypto_put_default_rng(); + if (ret) + dev_err(dev, "failed to get rng, ret = %d!\n", ret); + + return ret; +} + static int hpre_ecdh_set_secret(struct crypto_kpp *tfm, const void *buf, unsigned int len) { struct hpre_ctx *ctx = kpp_tfm_ctx(tfm); struct device *dev = ctx->dev; + char key[HPRE_ECC_MAX_KSZ]; unsigned int sz, sz_shift; struct ecdh params; int ret; @@ -1347,6 +1372,15 @@ static int hpre_ecdh_set_secret(struct crypto_kpp *tfm, const void *buf, return -EINVAL; } + /* Use stdrng to generate private key */ + if (!params.key || !params.key_size) { + params.key = key; + params.key_size = hpre_ecdh_get_curvesz(ctx->curve_id); + ret = ecdh_gen_privkey(ctx, ¶ms); + if (ret) + return ret; + } + if (hpre_key_is_zero(params.key, params.key_size)) { dev_err(dev, "Invalid hpre key!\n"); return -EINVAL; From 9612581fc10919ef70aae1fa4dcf6e20d85a14a7 Mon Sep 17 00:00:00 2001 From: Hui Tang Date: Sat, 29 May 2021 16:58:19 +0800 Subject: [PATCH 100/142] crypto: hisilicon/hpre - add check before gx modulo p The result of gx modulo p is zero if gx is equal to p, so return error immediately if gx is equal to p. Signed-off-by: Hui Tang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_crypto.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c index 6ba5d8af38755..323418bf66ab7 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c +++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c @@ -1841,8 +1841,12 @@ static int hpre_curve25519_src_init(struct hpre_asym_request *hpre_req, * When src_data equals (2^255 - 19) ~ (2^255 - 1), it is out of p, * we get its modulus to p, and then use it. */ - if (memcmp(ptr, p, ctx->key_sz) >= 0) + if (memcmp(ptr, p, ctx->key_sz) == 0) { + dev_err(dev, "gx is p!\n"); + return -EINVAL; + } else if (memcmp(ptr, p, ctx->key_sz) > 0) { hpre_curve25519_src_modulo_p(ptr); + } hpre_req->src = ptr; msg->in = cpu_to_le64(dma); From b981f7990e1ae61d9a48d717868df8f00f52bc08 Mon Sep 17 00:00:00 2001 From: Hui Tang Date: Sat, 29 May 2021 16:58:47 +0800 Subject: [PATCH 101/142] crypto: hisilicon/hpre - register ecdh NIST P384 Register ecdh NIST P384 curve and add the tfm initialization. Signed-off-by: Hui Tang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_crypto.c | 56 +++++++++++++++++++-- 1 file changed, 52 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c index 323418bf66ab7..a032c192ef1d6 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c +++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c @@ -45,9 +45,11 @@ struct hpre_ctx; /* size in bytes of the n prime */ #define HPRE_ECC_NIST_P192_N_SIZE 24 #define HPRE_ECC_NIST_P256_N_SIZE 32 +#define HPRE_ECC_NIST_P384_N_SIZE 48 /* size in bytes */ #define HPRE_ECC_HW256_KSZ_B 32 +#define HPRE_ECC_HW384_KSZ_B 48 typedef void (*hpre_cb)(struct hpre_ctx *ctx, void *sqe); @@ -1211,12 +1213,21 @@ static void hpre_ecc_clear_ctx(struct hpre_ctx *ctx, bool is_clear_all, hpre_ctx_clear(ctx, is_clear_all); } +/* + * The bits of 192/224/256/384/521 are supported by HPRE, + * and convert the bits like: + * bits<=256, bits=256; 256curve_id = ECC_CURVE_NIST_P384; + + return hpre_ctx_init(ctx, HPRE_V3_ECC_ALG_TYPE); +} + static void hpre_ecdh_exit_tfm(struct crypto_kpp *tfm) { struct hpre_ctx *ctx = kpp_tfm_ctx(tfm); @@ -2017,6 +2039,23 @@ static struct kpp_alg ecdh_nist_p256 = { }, }; +static struct kpp_alg ecdh_nist_p384 = { + .set_secret = hpre_ecdh_set_secret, + .generate_public_key = hpre_ecdh_compute_value, + .compute_shared_secret = hpre_ecdh_compute_value, + .max_size = hpre_ecdh_max_size, + .init = hpre_ecdh_nist_p384_init_tfm, + .exit = hpre_ecdh_exit_tfm, + .reqsize = sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ, + .base = { + .cra_ctxsize = sizeof(struct hpre_ctx), + .cra_priority = HPRE_CRYPTO_ALG_PRI, + .cra_name = "ecdh-nist-p384", + .cra_driver_name = "hpre-ecdh-nist-p384", + .cra_module = THIS_MODULE, + }, +}; + static struct kpp_alg curve25519_alg = { .set_secret = hpre_curve25519_set_secret, .generate_public_key = hpre_curve25519_compute_value, @@ -2044,16 +2083,25 @@ static int hpre_register_ecdh(void) return ret; ret = crypto_register_kpp(&ecdh_nist_p256); - if (ret) { - crypto_unregister_kpp(&ecdh_nist_p192); - return ret; - } + if (ret) + goto unregister_ecdh_p192; + + ret = crypto_register_kpp(&ecdh_nist_p384); + if (ret) + goto unregister_ecdh_p256; return 0; + +unregister_ecdh_p256: + crypto_unregister_kpp(&ecdh_nist_p256); +unregister_ecdh_p192: + crypto_unregister_kpp(&ecdh_nist_p192); + return ret; } static void hpre_unregister_ecdh(void) { + crypto_unregister_kpp(&ecdh_nist_p384); crypto_unregister_kpp(&ecdh_nist_p256); crypto_unregister_kpp(&ecdh_nist_p192); } From 38cd3968bf284929162665b002891de5c60d027a Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Sat, 29 May 2021 22:15:34 +0800 Subject: [PATCH 102/142] crypto: hisilicon/qm - adjust reset interface Kunpeng930 hardware supports PF/VF communications. When the device is reset, PF can send message to VF to stop function and restart function. This patch adjusts the reset interface to support sending message through PF/VF communication. Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 140 +++++++++++++++++++++------------- 1 file changed, 89 insertions(+), 51 deletions(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 7c1f8ab28f995..4af0650d4b187 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -1796,6 +1796,11 @@ static int qm_check_dev_error(struct hisi_qm *qm) (dev_val & (~qm->err_info.dev_ce_mask)); } +static int qm_wait_vf_prepare_finish(struct hisi_qm *qm) +{ + return 0; +} + static int qm_stop_qp(struct hisi_qp *qp) { return qm_mb(qp->qm, QM_MB_CMD_STOP_QP, 0, qp->qp_id, 0); @@ -3806,14 +3811,27 @@ static int qm_vf_reset_prepare(struct hisi_qm *qm, return ret; } -static int qm_reset_prepare_ready(struct hisi_qm *qm) +static int qm_try_stop_vfs(struct hisi_qm *qm, enum qm_stop_reason stop_reason) { struct pci_dev *pdev = qm->pdev; - struct hisi_qm *pf_qm = pci_get_drvdata(pci_physfn(pdev)); + int ret; + + if (!qm->vfs_num) + return 0; + + ret = qm_vf_reset_prepare(qm, stop_reason); + if (ret) + pci_err(pdev, "failed to prepare reset, ret = %d.\n", ret); + + return ret; +} + +static int qm_wait_reset_finish(struct hisi_qm *qm) +{ int delay = 0; /* All reset requests need to be queued for processing */ - while (test_and_set_bit(QM_RESETTING, &pf_qm->misc_ctl)) { + while (test_and_set_bit(QM_RESETTING, &qm->misc_ctl)) { msleep(++delay); if (delay > QM_RESET_WAIT_TIMEOUT) return -EBUSY; @@ -3822,6 +3840,22 @@ static int qm_reset_prepare_ready(struct hisi_qm *qm) return 0; } +static int qm_reset_prepare_ready(struct hisi_qm *qm) +{ + struct pci_dev *pdev = qm->pdev; + struct hisi_qm *pf_qm = pci_get_drvdata(pci_physfn(pdev)); + + return qm_wait_reset_finish(pf_qm); +} + +static void qm_reset_bit_clear(struct hisi_qm *qm) +{ + struct pci_dev *pdev = qm->pdev; + struct hisi_qm *pf_qm = pci_get_drvdata(pci_physfn(pdev)); + + clear_bit(QM_RESETTING, &pf_qm->misc_ctl); +} + static int qm_controller_reset_prepare(struct hisi_qm *qm) { struct pci_dev *pdev = qm->pdev; @@ -3833,22 +3867,21 @@ static int qm_controller_reset_prepare(struct hisi_qm *qm) return ret; } - if (qm->vfs_num) { - ret = qm_vf_reset_prepare(qm, QM_SOFT_RESET); - if (ret) { - pci_err(pdev, "Fails to stop VFs!\n"); - clear_bit(QM_RESETTING, &qm->misc_ctl); - return ret; - } - } + ret = qm_try_stop_vfs(qm, QM_SOFT_RESET); + if (ret) + pci_err(pdev, "failed to stop vfs by pf in soft reset.\n"); ret = hisi_qm_stop(qm, QM_SOFT_RESET); if (ret) { pci_err(pdev, "Fails to stop QM!\n"); - clear_bit(QM_RESETTING, &qm->misc_ctl); + qm_reset_bit_clear(qm); return ret; } + ret = qm_wait_vf_prepare_finish(qm); + if (ret) + pci_err(pdev, "failed to stop by vfs in soft reset!\n"); + clear_bit(QM_RST_SCHED, &qm->misc_ctl); return 0; @@ -3983,6 +4016,27 @@ static int qm_vf_reset_done(struct hisi_qm *qm) return ret; } +static int qm_try_start_vfs(struct hisi_qm *qm) +{ + struct pci_dev *pdev = qm->pdev; + int ret; + + if (!qm->vfs_num) + return 0; + + ret = qm_vf_q_assign(qm, qm->vfs_num); + if (ret) { + pci_err(pdev, "failed to assign VFs, ret = %d.\n", ret); + return ret; + } + + ret = qm_vf_reset_done(qm); + if (ret) + pci_warn(pdev, "failed to start vfs, ret = %d.\n", ret); + + return ret; +} + static int qm_dev_hw_init(struct hisi_qm *qm) { return qm->err_ini->hw_init(qm); @@ -4082,23 +4136,17 @@ static int qm_controller_reset_done(struct hisi_qm *qm) return ret; } - if (qm->vfs_num) { - ret = qm_vf_q_assign(qm, qm->vfs_num); - if (ret) { - pci_err(pdev, "Failed to assign queue!\n"); - return ret; - } - } + ret = qm_try_start_vfs(qm); + if (ret) + pci_err(pdev, "failed to start vfs by pf in soft reset.\n"); - ret = qm_vf_reset_done(qm); - if (ret) { - pci_err(pdev, "Failed to start VFs!\n"); - return -EPERM; - } + ret = qm_wait_vf_prepare_finish(qm); + if (ret) + pci_err(pdev, "failed to start by vfs in soft reset!\n"); qm_restart_done(qm); - clear_bit(QM_RESETTING, &qm->misc_ctl); + qm_reset_bit_clear(qm); return 0; } @@ -4119,13 +4167,13 @@ static int qm_controller_reset(struct hisi_qm *qm) ret = qm_soft_reset(qm); if (ret) { pci_err(pdev, "Controller reset failed (%d)\n", ret); - clear_bit(QM_RESETTING, &qm->misc_ctl); + qm_reset_bit_clear(qm); return ret; } ret = qm_controller_reset_done(qm); if (ret) { - clear_bit(QM_RESETTING, &qm->misc_ctl); + qm_reset_bit_clear(qm); return ret; } @@ -4187,14 +4235,9 @@ void hisi_qm_reset_prepare(struct pci_dev *pdev) return; } - if (qm->vfs_num) { - ret = qm_vf_reset_prepare(qm, QM_FLR); - if (ret) { - pci_err(pdev, "Failed to prepare reset, ret = %d.\n", - ret); - return; - } - } + ret = qm_try_stop_vfs(qm, QM_SOFT_RESET); + if (ret) + pci_err(pdev, "failed to stop vfs by pf in FLR.\n"); ret = hisi_qm_stop(qm, QM_FLR); if (ret) { @@ -4202,6 +4245,10 @@ void hisi_qm_reset_prepare(struct pci_dev *pdev) return; } + ret = qm_wait_vf_prepare_finish(qm); + if (ret) + pci_err(pdev, "failed to stop by vfs in FLR!\n"); + pci_info(pdev, "FLR resetting...\n"); } EXPORT_SYMBOL_GPL(hisi_qm_reset_prepare); @@ -4243,28 +4290,19 @@ void hisi_qm_reset_done(struct pci_dev *pdev) goto flr_done; } - if (qm->fun_type == QM_HW_PF) { - if (!qm->vfs_num) - goto flr_done; - - ret = qm_vf_q_assign(qm, qm->vfs_num); - if (ret) { - pci_err(pdev, "Failed to assign VFs, ret = %d.\n", ret); - goto flr_done; - } + ret = qm_try_start_vfs(qm); + if (ret) + pci_err(pdev, "failed to start vfs by pf in FLR.\n"); - ret = qm_vf_reset_done(qm); - if (ret) { - pci_err(pdev, "Failed to start VFs, ret = %d.\n", ret); - goto flr_done; - } - } + ret = qm_wait_vf_prepare_finish(qm); + if (ret) + pci_err(pdev, "failed to start by vfs in FLR!\n"); flr_done: if (qm_flr_reset_complete(pdev)) pci_info(pdev, "FLR reset complete\n"); - clear_bit(QM_RESETTING, &pf_qm->misc_ctl); + qm_reset_bit_clear(qm); } EXPORT_SYMBOL_GPL(hisi_qm_reset_done); From e3ac4d20e93664755ccea87ad1c71f264a6c9d74 Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Sat, 29 May 2021 22:15:35 +0800 Subject: [PATCH 103/142] crypto: hisilicon/qm - enable PF and VFs communication Kunpeng930 hardware supports the communication between PF and VFs. This patch enables communication between PF and VFs by writing hardware registers, and requests an irq for communication. Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 113 +++++++++++++++++++++++++++++++--- drivers/crypto/hisilicon/qm.h | 1 + 2 files changed, 106 insertions(+), 8 deletions(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 4af0650d4b187..cd25f1fdd40b5 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -25,9 +25,11 @@ #define QM_IRQ_NUM_V1 1 #define QM_IRQ_NUM_PF_V2 4 #define QM_IRQ_NUM_VF_V2 2 +#define QM_IRQ_NUM_VF_V3 3 #define QM_EQ_EVENT_IRQ_VECTOR 0 #define QM_AEQ_EVENT_IRQ_VECTOR 1 +#define QM_CMD_EVENT_IRQ_VECTOR 2 #define QM_ABNORMAL_EVENT_IRQ_VECTOR 3 /* mailbox */ @@ -177,6 +179,16 @@ #define ACC_ROB_ECC_ERR_MULTPL BIT(1) #define QM_MSI_CAP_ENABLE BIT(16) +/* interfunction communication */ +#define QM_IFC_INT_SOURCE_P 0x100138 +#define QM_IFC_INT_SOURCE_V 0x0020 +#define QM_IFC_INT_MASK 0x0024 +#define QM_IFC_INT_STATUS 0x0028 +#define QM_IFC_INT_SOURCE_CLR GENMASK(63, 0) +#define QM_IFC_INT_SOURCE_MASK BIT(0) +#define QM_IFC_INT_DISABLE BIT(0) +#define QM_IFC_INT_STATUS_MASK BIT(0) + #define QM_DFX_MB_CNT_VF 0x104010 #define QM_DFX_DB_CNT_VF 0x104020 #define QM_DFX_SQE_CNT_VF_SQN 0x104030 @@ -633,6 +645,14 @@ static u32 qm_get_irq_num_v2(struct hisi_qm *qm) return QM_IRQ_NUM_VF_V2; } +static u32 qm_get_irq_num_v3(struct hisi_qm *qm) +{ + if (qm->fun_type == QM_HW_PF) + return QM_IRQ_NUM_PF_V2; + + return QM_IRQ_NUM_VF_V3; +} + static struct hisi_qp *qm_to_hisi_qp(struct hisi_qm *qm, struct qm_eqe *eqe) { u16 cqn = le32_to_cpu(eqe->dw0) & QM_EQE_CQN_MASK; @@ -737,6 +757,21 @@ static irqreturn_t qm_irq(int irq, void *data) return IRQ_NONE; } +static irqreturn_t qm_mb_cmd_irq(int irq, void *data) +{ + struct hisi_qm *qm = data; + u32 val; + + val = readl(qm->io_base + QM_IFC_INT_STATUS); + val &= QM_IFC_INT_STATUS_MASK; + if (!val) + return IRQ_NONE; + + schedule_work(&qm->cmd_process); + + return IRQ_HANDLED; +} + static irqreturn_t qm_aeq_irq(int irq, void *data) { struct hisi_qm *qm = data; @@ -777,14 +812,16 @@ static void qm_irq_unregister(struct hisi_qm *qm) free_irq(pci_irq_vector(pdev, QM_EQ_EVENT_IRQ_VECTOR), qm); - if (qm->ver == QM_HW_V1) - return; + if (qm->ver > QM_HW_V1) { + free_irq(pci_irq_vector(pdev, QM_AEQ_EVENT_IRQ_VECTOR), qm); - free_irq(pci_irq_vector(pdev, QM_AEQ_EVENT_IRQ_VECTOR), qm); + if (qm->fun_type == QM_HW_PF) + free_irq(pci_irq_vector(pdev, + QM_ABNORMAL_EVENT_IRQ_VECTOR), qm); + } - if (qm->fun_type == QM_HW_PF) - free_irq(pci_irq_vector(pdev, - QM_ABNORMAL_EVENT_IRQ_VECTOR), qm); + if (qm->ver > QM_HW_V2) + free_irq(pci_irq_vector(pdev, QM_CMD_EVENT_IRQ_VECTOR), qm); } static void qm_init_qp_status(struct hisi_qp *qp) @@ -1796,6 +1833,18 @@ static int qm_check_dev_error(struct hisi_qm *qm) (dev_val & (~qm->err_info.dev_ce_mask)); } +static void qm_clear_cmd_interrupt(struct hisi_qm *qm, u64 vf_mask) +{ + u32 val; + + if (qm->fun_type == QM_HW_PF) + writeq(vf_mask, qm->io_base + QM_IFC_INT_SOURCE_P); + + val = readl(qm->io_base + QM_IFC_INT_SOURCE_V); + val |= QM_IFC_INT_SOURCE_MASK; + writel(val, qm->io_base + QM_IFC_INT_SOURCE_V); +} + static int qm_wait_vf_prepare_finish(struct hisi_qm *qm) { return 0; @@ -1913,7 +1962,7 @@ static const struct hisi_qm_hw_ops qm_hw_ops_v2 = { static const struct hisi_qm_hw_ops qm_hw_ops_v3 = { .get_vft = qm_get_vft_v2, .qm_db = qm_db_v2, - .get_irq_num = qm_get_irq_num_v2, + .get_irq_num = qm_get_irq_num_v3, .hw_error_init = qm_hw_error_init_v3, .hw_error_uninit = qm_hw_error_uninit_v3, .hw_error_handle = qm_hw_error_handle_v2, @@ -2777,6 +2826,34 @@ static void hisi_qm_pre_init(struct hisi_qm *qm) qm->misc_ctl = false; } +static void qm_cmd_uninit(struct hisi_qm *qm) +{ + u32 val; + + if (qm->ver < QM_HW_V3) + return; + + val = readl(qm->io_base + QM_IFC_INT_MASK); + val |= QM_IFC_INT_DISABLE; + writel(val, qm->io_base + QM_IFC_INT_MASK); +} + +static void qm_cmd_init(struct hisi_qm *qm) +{ + u32 val; + + if (qm->ver < QM_HW_V3) + return; + + /* Clear communication interrupt source */ + qm_clear_cmd_interrupt(qm, QM_IFC_INT_SOURCE_CLR); + + /* Enable pf to vf communication reg. */ + val = readl(qm->io_base + QM_IFC_INT_MASK); + val &= ~QM_IFC_INT_DISABLE; + writel(val, qm->io_base + QM_IFC_INT_MASK); +} + static void qm_put_pci_res(struct hisi_qm *qm) { struct pci_dev *pdev = qm->pdev; @@ -2808,6 +2885,7 @@ void hisi_qm_uninit(struct hisi_qm *qm) struct pci_dev *pdev = qm->pdev; struct device *dev = &pdev->dev; + qm_cmd_uninit(qm); down_write(&qm->qps_lock); if (!qm_avail_state(qm, QM_CLOSE)) { @@ -4331,7 +4409,7 @@ static int qm_irq_register(struct hisi_qm *qm) if (ret) return ret; - if (qm->ver != QM_HW_V1) { + if (qm->ver > QM_HW_V1) { ret = request_irq(pci_irq_vector(pdev, QM_AEQ_EVENT_IRQ_VECTOR), qm_aeq_irq, 0, qm->dev_name, qm); if (ret) @@ -4346,8 +4424,18 @@ static int qm_irq_register(struct hisi_qm *qm) } } + if (qm->ver > QM_HW_V2) { + ret = request_irq(pci_irq_vector(pdev, QM_CMD_EVENT_IRQ_VECTOR), + qm_mb_cmd_irq, 0, qm->dev_name, qm); + if (ret) + goto err_mb_cmd_irq; + } + return 0; +err_mb_cmd_irq: + if (qm->fun_type == QM_HW_PF) + free_irq(pci_irq_vector(pdev, QM_ABNORMAL_EVENT_IRQ_VECTOR), qm); err_abonormal_irq: free_irq(pci_irq_vector(pdev, QM_AEQ_EVENT_IRQ_VECTOR), qm); err_aeq_irq: @@ -4384,6 +4472,11 @@ static void hisi_qm_controller_reset(struct work_struct *rst_work) } +static void qm_cmd_process(struct work_struct *cmd_process) +{ + /* handling messages sent by communication source */ +} + /** * hisi_qm_alg_register() - Register alg to crypto and add qm to qm_list. * @qm: The qm needs add. @@ -4615,6 +4708,10 @@ int hisi_qm_init(struct hisi_qm *qm) if (qm->fun_type == QM_HW_PF) INIT_WORK(&qm->rst_work, hisi_qm_controller_reset); + if (qm->ver >= QM_HW_V3) + INIT_WORK(&qm->cmd_process, qm_cmd_process); + + qm_cmd_init(qm); atomic_set(&qm->status.flags, QM_INIT); return 0; diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h index 9048aa6e5f8ab..8a36bade103d8 100644 --- a/drivers/crypto/hisilicon/qm.h +++ b/drivers/crypto/hisilicon/qm.h @@ -250,6 +250,7 @@ struct hisi_qm { struct workqueue_struct *wq; struct work_struct work; struct work_struct rst_work; + struct work_struct cmd_process; const char *algs; bool use_sva; From 3cd53a27c2fc58da9dcf6f22f4ed5705e398a1b9 Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Sat, 29 May 2021 22:15:36 +0800 Subject: [PATCH 104/142] crypto: hisilicon/qm - add callback to support communication This patch adds 'ping_all_vfs' callback that supports pf send message to all vfs and 'ping_pf' callback that supports vf send message to pf. After receiving the interrupt, the communication destination gets the message by sending mailbox. Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 217 +++++++++++++++++++++++++++++----- 1 file changed, 190 insertions(+), 27 deletions(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index cd25f1fdd40b5..04560c3cdd78e 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -41,6 +41,8 @@ #define QM_MB_CMD_CQC_BT 0x5 #define QM_MB_CMD_SQC_VFT_V2 0x6 #define QM_MB_CMD_STOP_QP 0x8 +#define QM_MB_CMD_SRC 0xc +#define QM_MB_CMD_DST 0xd #define QM_MB_CMD_SEND_BASE 0x300 #define QM_MB_EVENT_SHIFT 8 @@ -48,6 +50,8 @@ #define QM_MB_OP_SHIFT 14 #define QM_MB_CMD_DATA_ADDR_L 0x304 #define QM_MB_CMD_DATA_ADDR_H 0x308 +#define QM_MB_PING_ALL_VFS 0xffff +#define QM_MB_CMD_DATA_MASK GENMASK(31, 0) /* sqc shift */ #define QM_SQ_HOP_NUM_SHIFT 0 @@ -180,14 +184,24 @@ #define QM_MSI_CAP_ENABLE BIT(16) /* interfunction communication */ +#define QM_IFC_READY_STATUS 0x100128 +#define QM_IFC_INT_SET_P 0x100130 +#define QM_IFC_INT_CFG 0x100134 #define QM_IFC_INT_SOURCE_P 0x100138 #define QM_IFC_INT_SOURCE_V 0x0020 #define QM_IFC_INT_MASK 0x0024 #define QM_IFC_INT_STATUS 0x0028 +#define QM_IFC_INT_SET_V 0x002C +#define QM_IFC_SEND_ALL_VFS GENMASK(6, 0) #define QM_IFC_INT_SOURCE_CLR GENMASK(63, 0) #define QM_IFC_INT_SOURCE_MASK BIT(0) #define QM_IFC_INT_DISABLE BIT(0) #define QM_IFC_INT_STATUS_MASK BIT(0) +#define QM_IFC_INT_SET_MASK BIT(0) +#define QM_WAIT_DST_ACK 10 +#define QM_MAX_PF_WAIT_COUNT 10 +#define QM_MAX_VF_WAIT_COUNT 40 + #define QM_DFX_MB_CNT_VF 0x104010 #define QM_DFX_DB_CNT_VF 0x104020 @@ -370,6 +384,8 @@ struct hisi_qm_hw_ops { enum acc_err_result (*hw_error_handle)(struct hisi_qm *qm); int (*stop_qp)(struct hisi_qp *qp); int (*set_msi)(struct hisi_qm *qm, bool set); + int (*ping_all_vfs)(struct hisi_qm *qm, u64 cmd); + int (*ping_pf)(struct hisi_qm *qm, u64 cmd); }; struct qm_dfx_item { @@ -510,6 +526,18 @@ static bool qm_qp_avail_state(struct hisi_qm *qm, struct hisi_qp *qp, return avail; } +static void qm_mb_pre_init(struct qm_mailbox *mailbox, u8 cmd, + u64 base, u16 queue, bool op) +{ + mailbox->w0 = cpu_to_le16((cmd) | + ((op) ? 0x1 << QM_MB_OP_SHIFT : 0) | + (0x1 << QM_MB_BUSY_SHIFT)); + mailbox->queue_num = cpu_to_le16(queue); + mailbox->base_l = cpu_to_le32(lower_32_bits(base)); + mailbox->base_h = cpu_to_le32(upper_32_bits(base)); + mailbox->rsvd = 0; +} + /* return 0 mailbox ready, -ETIMEDOUT hardware timeout */ static int qm_wait_mb_ready(struct hisi_qm *qm) { @@ -542,44 +570,42 @@ static void qm_mb_write(struct hisi_qm *qm, const void *src) : "memory"); } -static int qm_mb(struct hisi_qm *qm, u8 cmd, dma_addr_t dma_addr, u16 queue, - bool op) +static int qm_mb_nolock(struct hisi_qm *qm, struct qm_mailbox *mailbox) { - struct qm_mailbox mailbox; - int ret = 0; - - dev_dbg(&qm->pdev->dev, "QM mailbox request to q%u: %u-%llx\n", - queue, cmd, (unsigned long long)dma_addr); - - mailbox.w0 = cpu_to_le16(cmd | - (op ? 0x1 << QM_MB_OP_SHIFT : 0) | - (0x1 << QM_MB_BUSY_SHIFT)); - mailbox.queue_num = cpu_to_le16(queue); - mailbox.base_l = cpu_to_le32(lower_32_bits(dma_addr)); - mailbox.base_h = cpu_to_le32(upper_32_bits(dma_addr)); - mailbox.rsvd = 0; - - mutex_lock(&qm->mailbox_lock); - if (unlikely(qm_wait_mb_ready(qm))) { - ret = -EBUSY; dev_err(&qm->pdev->dev, "QM mailbox is busy to start!\n"); - goto busy_unlock; + goto mb_busy; } - qm_mb_write(qm, &mailbox); + qm_mb_write(qm, mailbox); if (unlikely(qm_wait_mb_ready(qm))) { - ret = -EBUSY; dev_err(&qm->pdev->dev, "QM mailbox operation timeout!\n"); - goto busy_unlock; + goto mb_busy; } -busy_unlock: + return 0; + +mb_busy: + atomic64_inc(&qm->debug.dfx.mb_err_cnt); + return -EBUSY; +} + +static int qm_mb(struct hisi_qm *qm, u8 cmd, dma_addr_t dma_addr, u16 queue, + bool op) +{ + struct qm_mailbox mailbox; + int ret; + + dev_dbg(&qm->pdev->dev, "QM mailbox request to q%u: %u-%llx\n", + queue, cmd, (unsigned long long)dma_addr); + + qm_mb_pre_init(&mailbox, cmd, dma_addr, queue, op); + + mutex_lock(&qm->mailbox_lock); + ret = qm_mb_nolock(qm, &mailbox); mutex_unlock(&qm->mailbox_lock); - if (ret) - atomic64_inc(&qm->debug.dfx.mb_err_cnt); return ret; } @@ -1833,6 +1859,25 @@ static int qm_check_dev_error(struct hisi_qm *qm) (dev_val & (~qm->err_info.dev_ce_mask)); } +static int qm_get_mb_cmd(struct hisi_qm *qm, u64 *msg, u16 fun_num) +{ + struct qm_mailbox mailbox; + int ret; + + qm_mb_pre_init(&mailbox, QM_MB_CMD_DST, 0, fun_num, 0); + mutex_lock(&qm->mailbox_lock); + ret = qm_mb_nolock(qm, &mailbox); + if (ret) + goto err_unlock; + + *msg = readl(qm->io_base + QM_MB_CMD_DATA_ADDR_L) | + ((u64)readl(qm->io_base + QM_MB_CMD_DATA_ADDR_H) << 32); + +err_unlock: + mutex_unlock(&qm->mailbox_lock); + return ret; +} + static void qm_clear_cmd_interrupt(struct hisi_qm *qm, u64 vf_mask) { u32 val; @@ -1850,6 +1895,108 @@ static int qm_wait_vf_prepare_finish(struct hisi_qm *qm) return 0; } +static void qm_trigger_vf_interrupt(struct hisi_qm *qm, u32 fun_num) +{ + u32 val; + + val = readl(qm->io_base + QM_IFC_INT_CFG); + val |= ~QM_IFC_SEND_ALL_VFS; + val |= fun_num; + writel(val, qm->io_base + QM_IFC_INT_CFG); + + val = readl(qm->io_base + QM_IFC_INT_SET_P); + val |= QM_IFC_INT_SET_MASK; + writel(val, qm->io_base + QM_IFC_INT_SET_P); +} + +static void qm_trigger_pf_interrupt(struct hisi_qm *qm) +{ + u32 val; + + val = readl(qm->io_base + QM_IFC_INT_SET_V); + val |= QM_IFC_INT_SET_MASK; + writel(val, qm->io_base + QM_IFC_INT_SET_V); +} + +static int qm_ping_all_vfs(struct hisi_qm *qm, u64 cmd) +{ + struct device *dev = &qm->pdev->dev; + u32 vfs_num = qm->vfs_num; + struct qm_mailbox mailbox; + u64 val = 0; + int cnt = 0; + int ret; + u32 i; + + qm_mb_pre_init(&mailbox, QM_MB_CMD_SRC, cmd, QM_MB_PING_ALL_VFS, 0); + mutex_lock(&qm->mailbox_lock); + /* PF sends command to all VFs by mailbox */ + ret = qm_mb_nolock(qm, &mailbox); + if (ret) { + dev_err(dev, "failed to send command to VFs!\n"); + mutex_unlock(&qm->mailbox_lock); + return ret; + } + + qm_trigger_vf_interrupt(qm, QM_IFC_SEND_ALL_VFS); + while (true) { + msleep(QM_WAIT_DST_ACK); + val = readq(qm->io_base + QM_IFC_READY_STATUS); + /* If all VFs acked, PF notifies VFs successfully. */ + if (!(val & GENMASK(vfs_num, 1))) { + mutex_unlock(&qm->mailbox_lock); + return 0; + } + + if (++cnt > QM_MAX_PF_WAIT_COUNT) + break; + } + + mutex_unlock(&qm->mailbox_lock); + + /* Check which vf respond timeout. */ + for (i = 1; i <= vfs_num; i++) { + if (val & BIT(i)) + dev_err(dev, "failed to get response from VF(%u)!\n", i); + } + + return -ETIMEDOUT; +} + +static int qm_ping_pf(struct hisi_qm *qm, u64 cmd) +{ + struct qm_mailbox mailbox; + int cnt = 0; + u32 val; + int ret; + + qm_mb_pre_init(&mailbox, QM_MB_CMD_SRC, cmd, 0, 0); + mutex_lock(&qm->mailbox_lock); + ret = qm_mb_nolock(qm, &mailbox); + if (ret) { + dev_err(&qm->pdev->dev, "failed to send command to PF!\n"); + goto unlock; + } + + qm_trigger_pf_interrupt(qm); + /* Waiting for PF response */ + while (true) { + msleep(QM_WAIT_DST_ACK); + val = readl(qm->io_base + QM_IFC_INT_SET_V); + if (!(val & QM_IFC_INT_STATUS_MASK)) + break; + + if (++cnt > QM_MAX_VF_WAIT_COUNT) { + ret = -ETIMEDOUT; + break; + } + } + +unlock: + mutex_unlock(&qm->mailbox_lock); + return ret; +} + static int qm_stop_qp(struct hisi_qp *qp) { return qm_mb(qp->qm, QM_MB_CMD_STOP_QP, 0, qp->qp_id, 0); @@ -1968,6 +2115,8 @@ static const struct hisi_qm_hw_ops qm_hw_ops_v3 = { .hw_error_handle = qm_hw_error_handle_v2, .stop_qp = qm_stop_qp, .set_msi = qm_set_msi_v3, + .ping_all_vfs = qm_ping_all_vfs, + .ping_pf = qm_ping_pf, }; static void *qm_get_avail_sqe(struct hisi_qp *qp) @@ -4474,7 +4623,21 @@ static void hisi_qm_controller_reset(struct work_struct *rst_work) static void qm_cmd_process(struct work_struct *cmd_process) { - /* handling messages sent by communication source */ + struct hisi_qm *qm = container_of(cmd_process, + struct hisi_qm, cmd_process); + struct device *dev = &qm->pdev->dev; + u64 msg; + int ret; + + /* + * Get the msg from source by sending mailbox. Whether message is got + * successfully, destination needs to ack source by clearing the interrupt. + */ + ret = qm_get_mb_cmd(qm, &msg, 0); + qm_clear_cmd_interrupt(qm, 0); + if (ret) + dev_err(dev, "failed to get msg from source!\n"); + } /** From 760fe22cf5e9f5d0212aa4c9aef555625c167627 Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Sat, 29 May 2021 22:15:37 +0800 Subject: [PATCH 105/142] crypto: hisilicon/qm - update reset flow This patch updates the reset flow based on PF/VF communications. VFs will be stopped after receiving reset message from PF, and wait for reset finish to restart VFs. Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 279 +++++++++++++++++++++++++++++++--- 1 file changed, 262 insertions(+), 17 deletions(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 04560c3cdd78e..efa14c9ee9763 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -201,7 +201,10 @@ #define QM_WAIT_DST_ACK 10 #define QM_MAX_PF_WAIT_COUNT 10 #define QM_MAX_VF_WAIT_COUNT 40 - +#define QM_VF_RESET_WAIT_US 20000 +#define QM_VF_RESET_WAIT_CNT 3000 +#define QM_VF_RESET_WAIT_TIMEOUT_US \ + (QM_VF_RESET_WAIT_US * QM_VF_RESET_WAIT_CNT) #define QM_DFX_MB_CNT_VF 0x104010 #define QM_DFX_DB_CNT_VF 0x104020 @@ -285,6 +288,16 @@ enum acc_err_result { ACC_ERR_RECOVERED, }; +enum qm_mb_cmd { + QM_PF_FLR_PREPARE = 0x01, + QM_PF_SRST_PREPARE, + QM_PF_RESET_DONE, + QM_VF_PREPARE_DONE, + QM_VF_PREPARE_FAIL, + QM_VF_START_DONE, + QM_VF_START_FAIL, +}; + struct qm_cqe { __le32 rsvd0; __le16 cmd_id; @@ -1890,9 +1903,74 @@ static void qm_clear_cmd_interrupt(struct hisi_qm *qm, u64 vf_mask) writel(val, qm->io_base + QM_IFC_INT_SOURCE_V); } +static void qm_handle_vf_msg(struct hisi_qm *qm, u32 vf_id) +{ + struct device *dev = &qm->pdev->dev; + u32 cmd; + u64 msg; + int ret; + + ret = qm_get_mb_cmd(qm, &msg, vf_id); + if (ret) { + dev_err(dev, "failed to get msg from VF(%u)!\n", vf_id); + return; + } + + cmd = msg & QM_MB_CMD_DATA_MASK; + switch (cmd) { + case QM_VF_PREPARE_FAIL: + dev_err(dev, "failed to stop VF(%u)!\n", vf_id); + break; + case QM_VF_START_FAIL: + dev_err(dev, "failed to start VF(%u)!\n", vf_id); + break; + case QM_VF_PREPARE_DONE: + case QM_VF_START_DONE: + break; + default: + dev_err(dev, "unsupported cmd %u sent by VF(%u)!\n", cmd, vf_id); + break; + } +} + static int qm_wait_vf_prepare_finish(struct hisi_qm *qm) { - return 0; + struct device *dev = &qm->pdev->dev; + u32 vfs_num = qm->vfs_num; + int cnt = 0; + int ret = 0; + u64 val; + u32 i; + + if (!qm->vfs_num || qm->ver < QM_HW_V3) + return 0; + + while (true) { + val = readq(qm->io_base + QM_IFC_INT_SOURCE_P); + /* All VFs send command to PF, break */ + if ((val & GENMASK(vfs_num, 1)) == GENMASK(vfs_num, 1)) + break; + + if (++cnt > QM_MAX_PF_WAIT_COUNT) { + ret = -EBUSY; + break; + } + + msleep(QM_WAIT_DST_ACK); + } + + /* PF check VFs msg */ + for (i = 1; i <= vfs_num; i++) { + if (val & BIT(i)) + qm_handle_vf_msg(qm, i); + else + dev_err(dev, "VF(%u) not ping PF!\n", i); + } + + /* PF clear interrupt to ack VFs */ + qm_clear_cmd_interrupt(qm, val); + + return ret; } static void qm_trigger_vf_interrupt(struct hisi_qm *qm, u32 fun_num) @@ -4038,7 +4116,8 @@ static int qm_vf_reset_prepare(struct hisi_qm *qm, return ret; } -static int qm_try_stop_vfs(struct hisi_qm *qm, enum qm_stop_reason stop_reason) +static int qm_try_stop_vfs(struct hisi_qm *qm, u64 cmd, + enum qm_stop_reason stop_reason) { struct pci_dev *pdev = qm->pdev; int ret; @@ -4046,9 +4125,16 @@ static int qm_try_stop_vfs(struct hisi_qm *qm, enum qm_stop_reason stop_reason) if (!qm->vfs_num) return 0; - ret = qm_vf_reset_prepare(qm, stop_reason); - if (ret) - pci_err(pdev, "failed to prepare reset, ret = %d.\n", ret); + /* Kunpeng930 supports to notify VFs to stop before PF reset */ + if (qm->ops->ping_all_vfs) { + ret = qm->ops->ping_all_vfs(qm, cmd); + if (ret) + pci_err(pdev, "failed to send cmd to all VFs before PF reset!\n"); + } else { + ret = qm_vf_reset_prepare(qm, stop_reason); + if (ret) + pci_err(pdev, "failed to prepare reset, ret = %d.\n", ret); + } return ret; } @@ -4072,7 +4158,14 @@ static int qm_reset_prepare_ready(struct hisi_qm *qm) struct pci_dev *pdev = qm->pdev; struct hisi_qm *pf_qm = pci_get_drvdata(pci_physfn(pdev)); - return qm_wait_reset_finish(pf_qm); + /* + * PF and VF on host doesnot support resetting at the + * same time on Kunpeng920. + */ + if (qm->ver < QM_HW_V3) + return qm_wait_reset_finish(pf_qm); + + return qm_wait_reset_finish(qm); } static void qm_reset_bit_clear(struct hisi_qm *qm) @@ -4080,7 +4173,10 @@ static void qm_reset_bit_clear(struct hisi_qm *qm) struct pci_dev *pdev = qm->pdev; struct hisi_qm *pf_qm = pci_get_drvdata(pci_physfn(pdev)); - clear_bit(QM_RESETTING, &pf_qm->misc_ctl); + if (qm->ver < QM_HW_V3) + clear_bit(QM_RESETTING, &pf_qm->misc_ctl); + + clear_bit(QM_RESETTING, &qm->misc_ctl); } static int qm_controller_reset_prepare(struct hisi_qm *qm) @@ -4094,7 +4190,11 @@ static int qm_controller_reset_prepare(struct hisi_qm *qm) return ret; } - ret = qm_try_stop_vfs(qm, QM_SOFT_RESET); + /* PF obtains the information of VF by querying the register. */ + qm_cmd_uninit(qm); + + /* Whether VFs stop successfully, soft reset will continue. */ + ret = qm_try_stop_vfs(qm, QM_PF_SRST_PREPARE, QM_SOFT_RESET); if (ret) pci_err(pdev, "failed to stop vfs by pf in soft reset.\n"); @@ -4243,7 +4343,7 @@ static int qm_vf_reset_done(struct hisi_qm *qm) return ret; } -static int qm_try_start_vfs(struct hisi_qm *qm) +static int qm_try_start_vfs(struct hisi_qm *qm, enum qm_mb_cmd cmd) { struct pci_dev *pdev = qm->pdev; int ret; @@ -4257,9 +4357,16 @@ static int qm_try_start_vfs(struct hisi_qm *qm) return ret; } - ret = qm_vf_reset_done(qm); - if (ret) - pci_warn(pdev, "failed to start vfs, ret = %d.\n", ret); + /* Kunpeng930 supports to notify VFs to start after PF reset. */ + if (qm->ops->ping_all_vfs) { + ret = qm->ops->ping_all_vfs(qm, cmd); + if (ret) + pci_warn(pdev, "failed to send cmd to all VFs after PF reset!\n"); + } else { + ret = qm_vf_reset_done(qm); + if (ret) + pci_warn(pdev, "failed to start vfs, ret = %d.\n", ret); + } return ret; } @@ -4363,7 +4470,7 @@ static int qm_controller_reset_done(struct hisi_qm *qm) return ret; } - ret = qm_try_start_vfs(qm); + ret = qm_try_start_vfs(qm, QM_PF_RESET_DONE); if (ret) pci_err(pdev, "failed to start vfs by pf in soft reset.\n"); @@ -4371,6 +4478,7 @@ static int qm_controller_reset_done(struct hisi_qm *qm) if (ret) pci_err(pdev, "failed to start by vfs in soft reset!\n"); + qm_cmd_init(qm); qm_restart_done(qm); qm_reset_bit_clear(qm); @@ -4462,7 +4570,11 @@ void hisi_qm_reset_prepare(struct pci_dev *pdev) return; } - ret = qm_try_stop_vfs(qm, QM_SOFT_RESET); + /* PF obtains the information of VF by querying the register. */ + if (qm->fun_type == QM_HW_PF) + qm_cmd_uninit(qm); + + ret = qm_try_stop_vfs(qm, QM_PF_FLR_PREPARE, QM_FLR); if (ret) pci_err(pdev, "failed to stop vfs by pf in FLR.\n"); @@ -4517,7 +4629,7 @@ void hisi_qm_reset_done(struct pci_dev *pdev) goto flr_done; } - ret = qm_try_start_vfs(qm); + ret = qm_try_start_vfs(qm, QM_PF_RESET_DONE); if (ret) pci_err(pdev, "failed to start vfs by pf in FLR.\n"); @@ -4526,6 +4638,9 @@ void hisi_qm_reset_done(struct pci_dev *pdev) pci_err(pdev, "failed to start by vfs in FLR!\n"); flr_done: + if (qm->fun_type == QM_HW_PF) + qm_cmd_init(qm); + if (qm_flr_reset_complete(pdev)) pci_info(pdev, "FLR reset complete\n"); @@ -4621,12 +4736,128 @@ static void hisi_qm_controller_reset(struct work_struct *rst_work) } +static void qm_pf_reset_vf_prepare(struct hisi_qm *qm, + enum qm_stop_reason stop_reason) +{ + enum qm_mb_cmd cmd = QM_VF_PREPARE_DONE; + struct pci_dev *pdev = qm->pdev; + int ret; + + ret = qm_reset_prepare_ready(qm); + if (ret) { + dev_err(&pdev->dev, "reset prepare not ready!\n"); + atomic_set(&qm->status.flags, QM_STOP); + cmd = QM_VF_PREPARE_FAIL; + goto err_prepare; + } + + ret = hisi_qm_stop(qm, stop_reason); + if (ret) { + dev_err(&pdev->dev, "failed to stop QM, ret = %d.\n", ret); + atomic_set(&qm->status.flags, QM_STOP); + cmd = QM_VF_PREPARE_FAIL; + goto err_prepare; + } + +err_prepare: + pci_save_state(pdev); + ret = qm->ops->ping_pf(qm, cmd); + if (ret) + dev_warn(&pdev->dev, "PF responds timeout in reset prepare!\n"); +} + +static void qm_pf_reset_vf_done(struct hisi_qm *qm) +{ + enum qm_mb_cmd cmd = QM_VF_START_DONE; + struct pci_dev *pdev = qm->pdev; + int ret; + + pci_restore_state(pdev); + ret = hisi_qm_start(qm); + if (ret) { + dev_err(&pdev->dev, "failed to start QM, ret = %d.\n", ret); + cmd = QM_VF_START_FAIL; + } + + ret = qm->ops->ping_pf(qm, cmd); + if (ret) + dev_warn(&pdev->dev, "PF responds timeout in reset done!\n"); + + qm_reset_bit_clear(qm); +} + +static int qm_wait_pf_reset_finish(struct hisi_qm *qm) +{ + struct device *dev = &qm->pdev->dev; + u32 val, cmd; + u64 msg; + int ret; + + /* Wait for reset to finish */ + ret = readl_relaxed_poll_timeout(qm->io_base + QM_IFC_INT_SOURCE_V, val, + val == BIT(0), QM_VF_RESET_WAIT_US, + QM_VF_RESET_WAIT_TIMEOUT_US); + /* hardware completion status should be available by this time */ + if (ret) { + dev_err(dev, "couldn't get reset done status from PF, timeout!\n"); + return -ETIMEDOUT; + } + + /* + * Whether message is got successfully, + * VF needs to ack PF by clearing the interrupt. + */ + ret = qm_get_mb_cmd(qm, &msg, 0); + qm_clear_cmd_interrupt(qm, 0); + if (ret) { + dev_err(dev, "failed to get msg from PF in reset done!\n"); + return ret; + } + + cmd = msg & QM_MB_CMD_DATA_MASK; + if (cmd != QM_PF_RESET_DONE) { + dev_err(dev, "the cmd(%u) is not reset done!\n", cmd); + ret = -EINVAL; + } + + return ret; +} + +static void qm_pf_reset_vf_process(struct hisi_qm *qm, + enum qm_stop_reason stop_reason) +{ + struct device *dev = &qm->pdev->dev; + int ret; + + dev_info(dev, "device reset start...\n"); + + /* The message is obtained by querying the register during resetting */ + qm_cmd_uninit(qm); + qm_pf_reset_vf_prepare(qm, stop_reason); + + ret = qm_wait_pf_reset_finish(qm); + if (ret) + goto err_get_status; + + qm_pf_reset_vf_done(qm); + qm_cmd_init(qm); + + dev_info(dev, "device reset done.\n"); + + return; + +err_get_status: + qm_cmd_init(qm); + qm_reset_bit_clear(qm); +} + static void qm_cmd_process(struct work_struct *cmd_process) { struct hisi_qm *qm = container_of(cmd_process, struct hisi_qm, cmd_process); struct device *dev = &qm->pdev->dev; u64 msg; + u32 cmd; int ret; /* @@ -4635,9 +4866,23 @@ static void qm_cmd_process(struct work_struct *cmd_process) */ ret = qm_get_mb_cmd(qm, &msg, 0); qm_clear_cmd_interrupt(qm, 0); - if (ret) + if (ret) { dev_err(dev, "failed to get msg from source!\n"); + return; + } + cmd = msg & QM_MB_CMD_DATA_MASK; + switch (cmd) { + case QM_PF_FLR_PREPARE: + qm_pf_reset_vf_process(qm, QM_FLR); + break; + case QM_PF_SRST_PREPARE: + qm_pf_reset_vf_process(qm, QM_SOFT_RESET); + break; + default: + dev_err(dev, "unsupported cmd %u sent by PF!\n", cmd); + break; + } } /** From 57c126661f50b884d3812e7db6e00f2e778eccfb Mon Sep 17 00:00:00 2001 From: Tong Tiangen Date: Tue, 1 Jun 2021 18:01:55 +0800 Subject: [PATCH 106/142] crypto: nitrox - fix unchecked variable in nitrox_register_interrupts Function nitrox_register_interrupts leaves variable 'nr_vecs' unchecked, which would be use as kcalloc parameter later. Fixes: 5155e118dda9 ("crypto: cavium/nitrox - use pci_alloc_irq_vectors() while enabling MSI-X.") Signed-off-by: Tong Tiangen Signed-off-by: Herbert Xu --- drivers/crypto/cavium/nitrox/nitrox_isr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/crypto/cavium/nitrox/nitrox_isr.c b/drivers/crypto/cavium/nitrox/nitrox_isr.c index c288c4b51783d..f19e520da6d0c 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_isr.c +++ b/drivers/crypto/cavium/nitrox/nitrox_isr.c @@ -307,6 +307,10 @@ int nitrox_register_interrupts(struct nitrox_device *ndev) * Entry 192: NPS_CORE_INT_ACTIVE */ nr_vecs = pci_msix_vec_count(pdev); + if (nr_vecs < 0) { + dev_err(DEV(ndev), "Error in getting vec count %d\n", nr_vecs); + return nr_vecs; + } /* Enable MSI-X */ ret = pci_alloc_irq_vectors(pdev, nr_vecs, nr_vecs, PCI_IRQ_MSIX); From d950cd1b1c204c4a72e08c7c13862451f9d7d902 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Tue, 1 Jun 2021 22:51:17 +0800 Subject: [PATCH 107/142] crypto: omap-des - using pm_runtime_resume_and_get instead of pm_runtime_get_sync Using pm_runtime_resume_and_get() to replace pm_runtime_get_sync and pm_runtime_put_noidle. This change is just to simplify the code, no actual functional changes. Signed-off-by: Zhang Qilong Signed-off-by: Herbert Xu --- drivers/crypto/omap-des.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/omap-des.c b/drivers/crypto/omap-des.c index c9d38bcfd1c77..bc8631363d725 100644 --- a/drivers/crypto/omap-des.c +++ b/drivers/crypto/omap-des.c @@ -229,9 +229,8 @@ static int omap_des_hw_init(struct omap_des_dev *dd) * It may be long delays between requests. * Device might go to off mode to save power. */ - err = pm_runtime_get_sync(dd->dev); + err = pm_runtime_resume_and_get(dd->dev); if (err < 0) { - pm_runtime_put_noidle(dd->dev); dev_err(dd->dev, "%s: failed to get_sync(%d)\n", __func__, err); return err; } @@ -994,9 +993,8 @@ static int omap_des_probe(struct platform_device *pdev) pm_runtime_set_autosuspend_delay(dev, DEFAULT_AUTOSUSPEND_DELAY); pm_runtime_enable(dev); - err = pm_runtime_get_sync(dev); + err = pm_runtime_resume_and_get(dev); if (err < 0) { - pm_runtime_put_noidle(dev); dev_err(dd->dev, "%s: failed to get_sync(%d)\n", __func__, err); goto err_get; } @@ -1124,9 +1122,8 @@ static int omap_des_resume(struct device *dev) { int err; - err = pm_runtime_get_sync(dev); + err = pm_runtime_resume_and_get(dev); if (err < 0) { - pm_runtime_put_noidle(dev); dev_err(dev, "%s: failed to get_sync(%d)\n", __func__, err); return err; } From ca323b2c61ec321eb9f2179a405b9c34cdb4f553 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Tue, 1 Jun 2021 22:51:18 +0800 Subject: [PATCH 108/142] crypto: omap-sham - Fix PM reference leak in omap sham ops pm_runtime_get_sync will increment pm usage counter even it failed. Forgetting to putting operation will result in reference leak here. We fix it by replacing it with pm_runtime_resume_and_get to keep usage counter balanced. Fixes: 604c31039dae4 ("crypto: omap-sham - Check for return value from pm_runtime_get_sync") Signed-off-by: Zhang Qilong Signed-off-by: Herbert Xu --- drivers/crypto/omap-sham.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index ae0d320d3c60d..dd53ad9987b0d 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -372,7 +372,7 @@ static int omap_sham_hw_init(struct omap_sham_dev *dd) { int err; - err = pm_runtime_get_sync(dd->dev); + err = pm_runtime_resume_and_get(dd->dev); if (err < 0) { dev_err(dd->dev, "failed to get sync: %d\n", err); return err; @@ -2244,7 +2244,7 @@ static int omap_sham_suspend(struct device *dev) static int omap_sham_resume(struct device *dev) { - int err = pm_runtime_get_sync(dev); + int err = pm_runtime_resume_and_get(dev); if (err < 0) { dev_err(dev, "failed to get sync: %d\n", err); return err; From 124d77c22c6183c76aa4bb71c29ee0c842562a5f Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 1 Jun 2021 15:11:28 +0000 Subject: [PATCH 109/142] dt-bindings: crypto: Add documentation for sl3516-ce This patch adds documentation for Device-Tree bindings for the SL3516-ce cryptographic offloader driver. Reviewed-by: Linus Walleij Reviewed-by: Rob Herring Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- .../crypto/cortina,sl3516-crypto.yaml | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 Documentation/devicetree/bindings/crypto/cortina,sl3516-crypto.yaml diff --git a/Documentation/devicetree/bindings/crypto/cortina,sl3516-crypto.yaml b/Documentation/devicetree/bindings/crypto/cortina,sl3516-crypto.yaml new file mode 100644 index 0000000000000..b633b8d0e6f05 --- /dev/null +++ b/Documentation/devicetree/bindings/crypto/cortina,sl3516-crypto.yaml @@ -0,0 +1,50 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/crypto/cortina,sl3516-crypto.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: SL3516 cryptographic offloader driver + +maintainers: + - Corentin Labbe + +properties: + compatible: + enum: + - cortina,sl3516-crypto + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + maxItems: 1 + + resets: + maxItems: 1 + +required: + - compatible + - reg + - interrupts + - clocks + - resets + +additionalProperties: false + +examples: + - | + #include + #include + #include + + crypto@62000000 { + compatible = "cortina,sl3516-crypto"; + reg = <0x62000000 0x10000>; + interrupts = <7 IRQ_TYPE_EDGE_RISING>; + resets = <&syscon GEMINI_RESET_SECURITY>; + clocks = <&syscon GEMINI_CLK_GATE_SECURITY>; + }; From 46c5338db7bd45b2cf99570560f00389d60fd6b4 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 1 Jun 2021 15:11:29 +0000 Subject: [PATCH 110/142] crypto: sl3516 - Add sl3516 crypto engine The cortina/gemini SL3516 SoC has a crypto IP name either (crypto engine/crypto acceleration engine in the datasheet). It support many algorithms like [AES|DES|3DES][ECB|CBC], SHA1, MD5 and some HMAC. This patch adds the core files and support for ecb(aes) and the RNG. Acked-by: Linus Walleij Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/Kconfig | 19 + drivers/crypto/Makefile | 1 + drivers/crypto/gemini/Makefile | 2 + drivers/crypto/gemini/sl3516-ce-cipher.c | 388 ++++++++++++++++ drivers/crypto/gemini/sl3516-ce-core.c | 535 +++++++++++++++++++++++ drivers/crypto/gemini/sl3516-ce-rng.c | 61 +++ drivers/crypto/gemini/sl3516-ce.h | 347 +++++++++++++++ 7 files changed, 1353 insertions(+) create mode 100644 drivers/crypto/gemini/Makefile create mode 100644 drivers/crypto/gemini/sl3516-ce-cipher.c create mode 100644 drivers/crypto/gemini/sl3516-ce-core.c create mode 100644 drivers/crypto/gemini/sl3516-ce-rng.c create mode 100644 drivers/crypto/gemini/sl3516-ce.h diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 1d5b342e6b424..99b0907901788 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -266,6 +266,25 @@ config CRYPTO_DEV_NIAGARA2 Group, which can perform encryption, decryption, hashing, checksumming, and raw copies. +config CRYPTO_DEV_SL3516 + tristate "Stormlink SL3516 crypto offloader" + select CRYPTO_SKCIPHER + select CRYPTO_ENGINE + select CRYPTO_ECB + select CRYPTO_AES + select HW_RANDOM + help + This option allows you to have support for SL3516 crypto offloader. + +config CRYPTO_DEV_SL3516_DEBUG + bool "Enable SL3516 stats" + depends on CRYPTO_DEV_SL3516 + depends on DEBUG_FS + help + Say y to enable SL3516 debug stats. + This will create /sys/kernel/debug/sl3516/stats for displaying + the number of requests per algorithm and other internal stats. + config CRYPTO_DEV_HIFN_795X tristate "Driver HIFN 795x crypto accelerator chips" select CRYPTO_LIB_DES diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index fa22cb19e242a..1fe5120eb9663 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -38,6 +38,7 @@ obj-$(CONFIG_CRYPTO_DEV_ROCKCHIP) += rockchip/ obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o obj-$(CONFIG_CRYPTO_DEV_SA2UL) += sa2ul.o obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o +obj-$(CONFIG_CRYPTO_DEV_SL3516) += gemini/ obj-$(CONFIG_ARCH_STM32) += stm32/ obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/ diff --git a/drivers/crypto/gemini/Makefile b/drivers/crypto/gemini/Makefile new file mode 100644 index 0000000000000..c73c8b69260de --- /dev/null +++ b/drivers/crypto/gemini/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_CRYPTO_DEV_SL3516) += sl3516-ce.o +sl3516-ce-y += sl3516-ce-core.o sl3516-ce-cipher.o sl3516-ce-rng.o diff --git a/drivers/crypto/gemini/sl3516-ce-cipher.c b/drivers/crypto/gemini/sl3516-ce-cipher.c new file mode 100644 index 0000000000000..0b34a4971e498 --- /dev/null +++ b/drivers/crypto/gemini/sl3516-ce-cipher.c @@ -0,0 +1,388 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * sl3516-ce-cipher.c - hardware cryptographic offloader for Stormlink SL3516 SoC + * + * Copyright (C) 2021 Corentin LABBE + * + * This file adds support for AES cipher with 128,192,256 bits keysize in + * ECB mode. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "sl3516-ce.h" + +/* sl3516_ce_need_fallback - check if a request can be handled by the CE */ +static bool sl3516_ce_need_fallback(struct skcipher_request *areq) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq); + struct sl3516_ce_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm); + struct sl3516_ce_dev *ce = op->ce; + struct scatterlist *in_sg = areq->src; + struct scatterlist *out_sg = areq->dst; + struct scatterlist *sg; + + if (areq->cryptlen == 0 || areq->cryptlen % 16) { + ce->fallback_mod16++; + return true; + } + + /* + * check if we have enough descriptors for TX + * Note: TX need one control desc for each SG + */ + if (sg_nents(areq->src) > MAXDESC / 2) { + ce->fallback_sg_count_tx++; + return true; + } + /* check if we have enough descriptors for RX */ + if (sg_nents(areq->dst) > MAXDESC) { + ce->fallback_sg_count_rx++; + return true; + } + + sg = areq->src; + while (sg) { + if ((sg->length % 16) != 0) { + ce->fallback_mod16++; + return true; + } + if ((sg_dma_len(sg) % 16) != 0) { + ce->fallback_mod16++; + return true; + } + if (!IS_ALIGNED(sg->offset, 16)) { + ce->fallback_align16++; + return true; + } + sg = sg_next(sg); + } + sg = areq->dst; + while (sg) { + if ((sg->length % 16) != 0) { + ce->fallback_mod16++; + return true; + } + if ((sg_dma_len(sg) % 16) != 0) { + ce->fallback_mod16++; + return true; + } + if (!IS_ALIGNED(sg->offset, 16)) { + ce->fallback_align16++; + return true; + } + sg = sg_next(sg); + } + + /* need same numbers of SG (with same length) for source and destination */ + in_sg = areq->src; + out_sg = areq->dst; + while (in_sg && out_sg) { + if (in_sg->length != out_sg->length) { + ce->fallback_not_same_len++; + return true; + } + in_sg = sg_next(in_sg); + out_sg = sg_next(out_sg); + } + if (in_sg || out_sg) + return true; + + return false; +} + +static int sl3516_ce_cipher_fallback(struct skcipher_request *areq) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq); + struct sl3516_ce_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm); + struct sl3516_ce_cipher_req_ctx *rctx = skcipher_request_ctx(areq); + struct skcipher_alg *alg = crypto_skcipher_alg(tfm); + struct sl3516_ce_alg_template *algt; + int err; + + algt = container_of(alg, struct sl3516_ce_alg_template, alg.skcipher); + algt->stat_fb++; + + skcipher_request_set_tfm(&rctx->fallback_req, op->fallback_tfm); + skcipher_request_set_callback(&rctx->fallback_req, areq->base.flags, + areq->base.complete, areq->base.data); + skcipher_request_set_crypt(&rctx->fallback_req, areq->src, areq->dst, + areq->cryptlen, areq->iv); + if (rctx->op_dir == CE_DECRYPTION) + err = crypto_skcipher_decrypt(&rctx->fallback_req); + else + err = crypto_skcipher_encrypt(&rctx->fallback_req); + return err; +} + +static int sl3516_ce_cipher(struct skcipher_request *areq) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq); + struct sl3516_ce_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm); + struct sl3516_ce_dev *ce = op->ce; + struct sl3516_ce_cipher_req_ctx *rctx = skcipher_request_ctx(areq); + struct skcipher_alg *alg = crypto_skcipher_alg(tfm); + struct sl3516_ce_alg_template *algt; + struct scatterlist *sg; + unsigned int todo, len; + struct pkt_control_ecb *ecb; + int nr_sgs = 0; + int nr_sgd = 0; + int err = 0; + int i; + + algt = container_of(alg, struct sl3516_ce_alg_template, alg.skcipher); + + dev_dbg(ce->dev, "%s %s %u %x IV(%p %u) key=%u\n", __func__, + crypto_tfm_alg_name(areq->base.tfm), + areq->cryptlen, + rctx->op_dir, areq->iv, crypto_skcipher_ivsize(tfm), + op->keylen); + + algt->stat_req++; + + if (areq->src == areq->dst) { + nr_sgs = dma_map_sg(ce->dev, areq->src, sg_nents(areq->src), + DMA_BIDIRECTIONAL); + if (nr_sgs <= 0 || nr_sgs > MAXDESC / 2) { + dev_err(ce->dev, "Invalid sg number %d\n", nr_sgs); + err = -EINVAL; + goto theend; + } + nr_sgd = nr_sgs; + } else { + nr_sgs = dma_map_sg(ce->dev, areq->src, sg_nents(areq->src), + DMA_TO_DEVICE); + if (nr_sgs <= 0 || nr_sgs > MAXDESC / 2) { + dev_err(ce->dev, "Invalid sg number %d\n", nr_sgs); + err = -EINVAL; + goto theend; + } + nr_sgd = dma_map_sg(ce->dev, areq->dst, sg_nents(areq->dst), + DMA_FROM_DEVICE); + if (nr_sgd <= 0 || nr_sgd > MAXDESC) { + dev_err(ce->dev, "Invalid sg number %d\n", nr_sgd); + err = -EINVAL; + goto theend_sgs; + } + } + + len = areq->cryptlen; + i = 0; + sg = areq->src; + while (i < nr_sgs && sg && len) { + if (sg_dma_len(sg) == 0) + goto sgs_next; + rctx->t_src[i].addr = sg_dma_address(sg); + todo = min(len, sg_dma_len(sg)); + rctx->t_src[i].len = todo; + dev_dbg(ce->dev, "%s total=%u SGS(%d %u off=%d) todo=%u\n", __func__, + areq->cryptlen, i, rctx->t_src[i].len, sg->offset, todo); + len -= todo; + i++; +sgs_next: + sg = sg_next(sg); + } + if (len > 0) { + dev_err(ce->dev, "remaining len %d/%u nr_sgs=%d\n", len, areq->cryptlen, nr_sgs); + err = -EINVAL; + goto theend_sgs; + } + + len = areq->cryptlen; + i = 0; + sg = areq->dst; + while (i < nr_sgd && sg && len) { + if (sg_dma_len(sg) == 0) + goto sgd_next; + rctx->t_dst[i].addr = sg_dma_address(sg); + todo = min(len, sg_dma_len(sg)); + rctx->t_dst[i].len = todo; + dev_dbg(ce->dev, "%s total=%u SGD(%d %u off=%d) todo=%u\n", __func__, + areq->cryptlen, i, rctx->t_dst[i].len, sg->offset, todo); + len -= todo; + i++; + +sgd_next: + sg = sg_next(sg); + } + if (len > 0) { + dev_err(ce->dev, "remaining len %d\n", len); + err = -EINVAL; + goto theend_sgs; + } + + switch (algt->mode) { + case ECB_AES: + rctx->pctrllen = sizeof(struct pkt_control_ecb); + ecb = (struct pkt_control_ecb *)ce->pctrl; + + rctx->tqflag = TQ0_TYPE_CTRL; + rctx->tqflag |= TQ1_CIPHER; + ecb->control.op_mode = rctx->op_dir; + ecb->control.cipher_algorithm = ECB_AES; + ecb->cipher.header_len = 0; + ecb->cipher.algorithm_len = areq->cryptlen; + cpu_to_be32_array((__be32 *)ecb->key, (u32 *)op->key, op->keylen / 4); + rctx->h = &ecb->cipher; + + rctx->tqflag |= TQ4_KEY0; + rctx->tqflag |= TQ5_KEY4; + rctx->tqflag |= TQ6_KEY6; + ecb->control.aesnk = op->keylen / 4; + break; + } + + rctx->nr_sgs = nr_sgs; + rctx->nr_sgd = nr_sgd; + err = sl3516_ce_run_task(ce, rctx, crypto_tfm_alg_name(areq->base.tfm)); + +theend_sgs: + if (areq->src == areq->dst) { + dma_unmap_sg(ce->dev, areq->src, sg_nents(areq->src), + DMA_BIDIRECTIONAL); + } else { + dma_unmap_sg(ce->dev, areq->src, sg_nents(areq->src), + DMA_TO_DEVICE); + dma_unmap_sg(ce->dev, areq->dst, sg_nents(areq->dst), + DMA_FROM_DEVICE); + } + +theend: + + return err; +} + +static int sl3516_ce_handle_cipher_request(struct crypto_engine *engine, void *areq) +{ + int err; + struct skcipher_request *breq = container_of(areq, struct skcipher_request, base); + + err = sl3516_ce_cipher(breq); + crypto_finalize_skcipher_request(engine, breq, err); + + return 0; +} + +int sl3516_ce_skdecrypt(struct skcipher_request *areq) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq); + struct sl3516_ce_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm); + struct sl3516_ce_cipher_req_ctx *rctx = skcipher_request_ctx(areq); + struct crypto_engine *engine; + + memset(rctx, 0, sizeof(struct sl3516_ce_cipher_req_ctx)); + rctx->op_dir = CE_DECRYPTION; + + if (sl3516_ce_need_fallback(areq)) + return sl3516_ce_cipher_fallback(areq); + + engine = op->ce->engine; + + return crypto_transfer_skcipher_request_to_engine(engine, areq); +} + +int sl3516_ce_skencrypt(struct skcipher_request *areq) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq); + struct sl3516_ce_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm); + struct sl3516_ce_cipher_req_ctx *rctx = skcipher_request_ctx(areq); + struct crypto_engine *engine; + + memset(rctx, 0, sizeof(struct sl3516_ce_cipher_req_ctx)); + rctx->op_dir = CE_ENCRYPTION; + + if (sl3516_ce_need_fallback(areq)) + return sl3516_ce_cipher_fallback(areq); + + engine = op->ce->engine; + + return crypto_transfer_skcipher_request_to_engine(engine, areq); +} + +int sl3516_ce_cipher_init(struct crypto_tfm *tfm) +{ + struct sl3516_ce_cipher_tfm_ctx *op = crypto_tfm_ctx(tfm); + struct sl3516_ce_alg_template *algt; + const char *name = crypto_tfm_alg_name(tfm); + struct crypto_skcipher *sktfm = __crypto_skcipher_cast(tfm); + struct skcipher_alg *alg = crypto_skcipher_alg(sktfm); + int err; + + memset(op, 0, sizeof(struct sl3516_ce_cipher_tfm_ctx)); + + algt = container_of(alg, struct sl3516_ce_alg_template, alg.skcipher); + op->ce = algt->ce; + + op->fallback_tfm = crypto_alloc_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(op->fallback_tfm)) { + dev_err(op->ce->dev, "ERROR: Cannot allocate fallback for %s %ld\n", + name, PTR_ERR(op->fallback_tfm)); + return PTR_ERR(op->fallback_tfm); + } + + sktfm->reqsize = sizeof(struct sl3516_ce_cipher_req_ctx) + + crypto_skcipher_reqsize(op->fallback_tfm); + + dev_info(op->ce->dev, "Fallback for %s is %s\n", + crypto_tfm_alg_driver_name(&sktfm->base), + crypto_tfm_alg_driver_name(crypto_skcipher_tfm(op->fallback_tfm))); + + op->enginectx.op.do_one_request = sl3516_ce_handle_cipher_request; + op->enginectx.op.prepare_request = NULL; + op->enginectx.op.unprepare_request = NULL; + + err = pm_runtime_get_sync(op->ce->dev); + if (err < 0) + goto error_pm; + + return 0; +error_pm: + pm_runtime_put_noidle(op->ce->dev); + crypto_free_skcipher(op->fallback_tfm); + return err; +} + +void sl3516_ce_cipher_exit(struct crypto_tfm *tfm) +{ + struct sl3516_ce_cipher_tfm_ctx *op = crypto_tfm_ctx(tfm); + + kfree_sensitive(op->key); + crypto_free_skcipher(op->fallback_tfm); + pm_runtime_put_sync_suspend(op->ce->dev); +} + +int sl3516_ce_aes_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) +{ + struct sl3516_ce_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm); + struct sl3516_ce_dev *ce = op->ce; + + switch (keylen) { + case 128 / 8: + break; + case 192 / 8: + break; + case 256 / 8: + break; + default: + dev_dbg(ce->dev, "ERROR: Invalid keylen %u\n", keylen); + return -EINVAL; + } + kfree_sensitive(op->key); + op->keylen = keylen; + op->key = kmemdup(key, keylen, GFP_KERNEL | GFP_DMA); + if (!op->key) + return -ENOMEM; + + crypto_skcipher_clear_flags(op->fallback_tfm, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(op->fallback_tfm, tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK); + + return crypto_skcipher_setkey(op->fallback_tfm, key, keylen); +} diff --git a/drivers/crypto/gemini/sl3516-ce-core.c b/drivers/crypto/gemini/sl3516-ce-core.c new file mode 100644 index 0000000000000..da6cd529a6c01 --- /dev/null +++ b/drivers/crypto/gemini/sl3516-ce-core.c @@ -0,0 +1,535 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * sl3516-ce-core.c - hardware cryptographic offloader for Stormlink SL3516 SoC + * + * Copyright (C) 2021 Corentin Labbe + * + * Core file which registers crypto algorithms supported by the CryptoEngine + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sl3516-ce.h" + +static int sl3516_ce_desc_init(struct sl3516_ce_dev *ce) +{ + const size_t sz = sizeof(struct descriptor) * MAXDESC; + int i; + + ce->tx = dma_alloc_coherent(ce->dev, sz, &ce->dtx, GFP_KERNEL); + if (!ce->tx) + return -ENOMEM; + ce->rx = dma_alloc_coherent(ce->dev, sz, &ce->drx, GFP_KERNEL); + if (!ce->rx) + goto err_rx; + + for (i = 0; i < MAXDESC; i++) { + ce->tx[i].frame_ctrl.bits.own = CE_CPU; + ce->tx[i].next_desc.next_descriptor = ce->dtx + (i + 1) * sizeof(struct descriptor); + } + ce->tx[MAXDESC - 1].next_desc.next_descriptor = ce->dtx; + + for (i = 0; i < MAXDESC; i++) { + ce->rx[i].frame_ctrl.bits.own = CE_CPU; + ce->rx[i].next_desc.next_descriptor = ce->drx + (i + 1) * sizeof(struct descriptor); + } + ce->rx[MAXDESC - 1].next_desc.next_descriptor = ce->drx; + + ce->pctrl = dma_alloc_coherent(ce->dev, sizeof(struct pkt_control_ecb), + &ce->dctrl, GFP_KERNEL); + if (!ce->pctrl) + goto err_pctrl; + + return 0; +err_pctrl: + dma_free_coherent(ce->dev, sz, ce->rx, ce->drx); +err_rx: + dma_free_coherent(ce->dev, sz, ce->tx, ce->dtx); + return -ENOMEM; +} + +static void sl3516_ce_free_descs(struct sl3516_ce_dev *ce) +{ + const size_t sz = sizeof(struct descriptor) * MAXDESC; + + dma_free_coherent(ce->dev, sz, ce->tx, ce->dtx); + dma_free_coherent(ce->dev, sz, ce->rx, ce->drx); + dma_free_coherent(ce->dev, sizeof(struct pkt_control_ecb), ce->pctrl, + ce->dctrl); +} + +static void start_dma_tx(struct sl3516_ce_dev *ce) +{ + u32 v; + + v = TXDMA_CTRL_START | TXDMA_CTRL_CHAIN_MODE | TXDMA_CTRL_CONTINUE | \ + TXDMA_CTRL_INT_FAIL | TXDMA_CTRL_INT_PERR | TXDMA_CTRL_BURST_UNK; + + writel(v, ce->base + IPSEC_TXDMA_CTRL); +} + +static void start_dma_rx(struct sl3516_ce_dev *ce) +{ + u32 v; + + v = RXDMA_CTRL_START | RXDMA_CTRL_CHAIN_MODE | RXDMA_CTRL_CONTINUE | \ + RXDMA_CTRL_BURST_UNK | RXDMA_CTRL_INT_FINISH | \ + RXDMA_CTRL_INT_FAIL | RXDMA_CTRL_INT_PERR | \ + RXDMA_CTRL_INT_EOD | RXDMA_CTRL_INT_EOF; + + writel(v, ce->base + IPSEC_RXDMA_CTRL); +} + +static struct descriptor *get_desc_tx(struct sl3516_ce_dev *ce) +{ + struct descriptor *dd; + + dd = &ce->tx[ce->ctx]; + ce->ctx++; + if (ce->ctx >= MAXDESC) + ce->ctx = 0; + return dd; +} + +static struct descriptor *get_desc_rx(struct sl3516_ce_dev *ce) +{ + struct descriptor *rdd; + + rdd = &ce->rx[ce->crx]; + ce->crx++; + if (ce->crx >= MAXDESC) + ce->crx = 0; + return rdd; +} + +int sl3516_ce_run_task(struct sl3516_ce_dev *ce, struct sl3516_ce_cipher_req_ctx *rctx, + const char *name) +{ + struct descriptor *dd, *rdd = NULL; + u32 v; + int i, err = 0; + + ce->stat_req++; + + reinit_completion(&ce->complete); + ce->status = 0; + + for (i = 0; i < rctx->nr_sgd; i++) { + dev_dbg(ce->dev, "%s handle DST SG %d/%d len=%d\n", __func__, + i, rctx->nr_sgd, rctx->t_dst[i].len); + rdd = get_desc_rx(ce); + rdd->buf_adr = rctx->t_dst[i].addr; + rdd->frame_ctrl.bits.buffer_size = rctx->t_dst[i].len; + rdd->frame_ctrl.bits.own = CE_DMA; + } + rdd->next_desc.bits.eofie = 1; + + for (i = 0; i < rctx->nr_sgs; i++) { + dev_dbg(ce->dev, "%s handle SRC SG %d/%d len=%d\n", __func__, + i, rctx->nr_sgs, rctx->t_src[i].len); + rctx->h->algorithm_len = rctx->t_src[i].len; + + dd = get_desc_tx(ce); + dd->frame_ctrl.raw = 0; + dd->flag_status.raw = 0; + dd->frame_ctrl.bits.buffer_size = rctx->pctrllen; + dd->buf_adr = ce->dctrl; + dd->flag_status.tx_flag.tqflag = rctx->tqflag; + dd->next_desc.bits.eofie = 0; + dd->next_desc.bits.dec = 0; + dd->next_desc.bits.sof_eof = DESC_FIRST | DESC_LAST; + dd->frame_ctrl.bits.own = CE_DMA; + + dd = get_desc_tx(ce); + dd->frame_ctrl.raw = 0; + dd->flag_status.raw = 0; + dd->frame_ctrl.bits.buffer_size = rctx->t_src[i].len; + dd->buf_adr = rctx->t_src[i].addr; + dd->flag_status.tx_flag.tqflag = 0; + dd->next_desc.bits.eofie = 0; + dd->next_desc.bits.dec = 0; + dd->next_desc.bits.sof_eof = DESC_FIRST | DESC_LAST; + dd->frame_ctrl.bits.own = CE_DMA; + start_dma_tx(ce); + start_dma_rx(ce); + } + wait_for_completion_interruptible_timeout(&ce->complete, + msecs_to_jiffies(5000)); + if (ce->status == 0) { + dev_err(ce->dev, "DMA timeout for %s\n", name); + err = -EFAULT; + } + v = readl(ce->base + IPSEC_STATUS_REG); + if (v & 0xFFF) { + dev_err(ce->dev, "IPSEC_STATUS_REG %x\n", v); + err = -EFAULT; + } + + return err; +} + +static irqreturn_t ce_irq_handler(int irq, void *data) +{ + struct sl3516_ce_dev *ce = (struct sl3516_ce_dev *)data; + u32 v; + + ce->stat_irq++; + + v = readl(ce->base + IPSEC_DMA_STATUS); + writel(v, ce->base + IPSEC_DMA_STATUS); + + if (v & DMA_STATUS_TS_DERR) + dev_err(ce->dev, "AHB bus Error While Tx !!!\n"); + if (v & DMA_STATUS_TS_PERR) + dev_err(ce->dev, "Tx Descriptor Protocol Error !!!\n"); + if (v & DMA_STATUS_RS_DERR) + dev_err(ce->dev, "AHB bus Error While Rx !!!\n"); + if (v & DMA_STATUS_RS_PERR) + dev_err(ce->dev, "Rx Descriptor Protocol Error !!!\n"); + + if (v & DMA_STATUS_TS_EOFI) + ce->stat_irq_tx++; + if (v & DMA_STATUS_RS_EOFI) { + ce->status = 1; + complete(&ce->complete); + ce->stat_irq_rx++; + return IRQ_HANDLED; + } + + return IRQ_HANDLED; +} + +static struct sl3516_ce_alg_template ce_algs[] = { +{ + .type = CRYPTO_ALG_TYPE_SKCIPHER, + .mode = ECB_AES, + .alg.skcipher = { + .base = { + .cra_name = "ecb(aes)", + .cra_driver_name = "ecb-aes-sl3516", + .cra_priority = 400, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER | + CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, + .cra_ctxsize = sizeof(struct sl3516_ce_cipher_tfm_ctx), + .cra_module = THIS_MODULE, + .cra_alignmask = 0xf, + .cra_init = sl3516_ce_cipher_init, + .cra_exit = sl3516_ce_cipher_exit, + }, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = sl3516_ce_aes_setkey, + .encrypt = sl3516_ce_skencrypt, + .decrypt = sl3516_ce_skdecrypt, + } +}, +}; + +#ifdef CONFIG_CRYPTO_DEV_SL3516_DEBUG +static int sl3516_ce_debugfs_show(struct seq_file *seq, void *v) +{ + struct sl3516_ce_dev *ce = seq->private; + unsigned int i; + + seq_printf(seq, "HWRNG %lu %lu\n", + ce->hwrng_stat_req, ce->hwrng_stat_bytes); + seq_printf(seq, "IRQ %lu\n", ce->stat_irq); + seq_printf(seq, "IRQ TX %lu\n", ce->stat_irq_tx); + seq_printf(seq, "IRQ RX %lu\n", ce->stat_irq_rx); + seq_printf(seq, "nreq %lu\n", ce->stat_req); + seq_printf(seq, "fallback SG count TX %lu\n", ce->fallback_sg_count_tx); + seq_printf(seq, "fallback SG count RX %lu\n", ce->fallback_sg_count_rx); + seq_printf(seq, "fallback modulo16 %lu\n", ce->fallback_mod16); + seq_printf(seq, "fallback align16 %lu\n", ce->fallback_align16); + seq_printf(seq, "fallback not same len %lu\n", ce->fallback_not_same_len); + + for (i = 0; i < ARRAY_SIZE(ce_algs); i++) { + if (!ce_algs[i].ce) + continue; + switch (ce_algs[i].type) { + case CRYPTO_ALG_TYPE_SKCIPHER: + seq_printf(seq, "%s %s reqs=%lu fallback=%lu\n", + ce_algs[i].alg.skcipher.base.cra_driver_name, + ce_algs[i].alg.skcipher.base.cra_name, + ce_algs[i].stat_req, ce_algs[i].stat_fb); + break; + } + } + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(sl3516_ce_debugfs); +#endif + +static int sl3516_ce_register_algs(struct sl3516_ce_dev *ce) +{ + int err; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(ce_algs); i++) { + ce_algs[i].ce = ce; + switch (ce_algs[i].type) { + case CRYPTO_ALG_TYPE_SKCIPHER: + dev_info(ce->dev, "DEBUG: Register %s\n", + ce_algs[i].alg.skcipher.base.cra_name); + err = crypto_register_skcipher(&ce_algs[i].alg.skcipher); + if (err) { + dev_err(ce->dev, "Fail to register %s\n", + ce_algs[i].alg.skcipher.base.cra_name); + ce_algs[i].ce = NULL; + return err; + } + break; + default: + ce_algs[i].ce = NULL; + dev_err(ce->dev, "ERROR: tried to register an unknown algo\n"); + } + } + return 0; +} + +static void sl3516_ce_unregister_algs(struct sl3516_ce_dev *ce) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(ce_algs); i++) { + if (!ce_algs[i].ce) + continue; + switch (ce_algs[i].type) { + case CRYPTO_ALG_TYPE_SKCIPHER: + dev_info(ce->dev, "Unregister %d %s\n", i, + ce_algs[i].alg.skcipher.base.cra_name); + crypto_unregister_skcipher(&ce_algs[i].alg.skcipher); + break; + } + } +} + +static void sl3516_ce_start(struct sl3516_ce_dev *ce) +{ + ce->ctx = 0; + ce->crx = 0; + writel(ce->dtx, ce->base + IPSEC_TXDMA_CURR_DESC); + writel(ce->drx, ce->base + IPSEC_RXDMA_CURR_DESC); + writel(0, ce->base + IPSEC_DMA_STATUS); +} + +/* + * Power management strategy: The device is suspended unless a TFM exists for + * one of the algorithms proposed by this driver. + */ +static int sl3516_ce_pm_suspend(struct device *dev) +{ + struct sl3516_ce_dev *ce = dev_get_drvdata(dev); + + reset_control_assert(ce->reset); + clk_disable_unprepare(ce->clks); + return 0; +} + +static int sl3516_ce_pm_resume(struct device *dev) +{ + struct sl3516_ce_dev *ce = dev_get_drvdata(dev); + int err; + + err = clk_prepare_enable(ce->clks); + if (err) { + dev_err(ce->dev, "Cannot prepare_enable\n"); + goto error; + } + err = reset_control_deassert(ce->reset); + if (err) { + dev_err(ce->dev, "Cannot deassert reset control\n"); + goto error; + } + + sl3516_ce_start(ce); + + return 0; +error: + sl3516_ce_pm_suspend(dev); + return err; +} + +static const struct dev_pm_ops sl3516_ce_pm_ops = { + SET_RUNTIME_PM_OPS(sl3516_ce_pm_suspend, sl3516_ce_pm_resume, NULL) +}; + +static int sl3516_ce_pm_init(struct sl3516_ce_dev *ce) +{ + int err; + + pm_runtime_use_autosuspend(ce->dev); + pm_runtime_set_autosuspend_delay(ce->dev, 2000); + + err = pm_runtime_set_suspended(ce->dev); + if (err) + return err; + pm_runtime_enable(ce->dev); + return err; +} + +static void sl3516_ce_pm_exit(struct sl3516_ce_dev *ce) +{ + pm_runtime_disable(ce->dev); +} + +static int sl3516_ce_probe(struct platform_device *pdev) +{ + struct sl3516_ce_dev *ce; + int err, irq; + u32 v; + + ce = devm_kzalloc(&pdev->dev, sizeof(*ce), GFP_KERNEL); + if (!ce) + return -ENOMEM; + + ce->dev = &pdev->dev; + platform_set_drvdata(pdev, ce); + + ce->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(ce->base)) + return PTR_ERR(ce->base); + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + + err = devm_request_irq(&pdev->dev, irq, ce_irq_handler, 0, "crypto", ce); + if (err) { + dev_err(ce->dev, "Cannot request Crypto Engine IRQ (err=%d)\n", err); + return err; + } + + ce->reset = devm_reset_control_get(&pdev->dev, NULL); + if (IS_ERR(ce->reset)) + return dev_err_probe(&pdev->dev, PTR_ERR(ce->reset), + "No reset control found\n"); + ce->clks = devm_clk_get(ce->dev, NULL); + if (IS_ERR(ce->clks)) { + err = PTR_ERR(ce->clks); + dev_err(ce->dev, "Cannot get clock err=%d\n", err); + return err; + } + + err = sl3516_ce_desc_init(ce); + if (err) + return err; + + err = sl3516_ce_pm_init(ce); + if (err) + goto error_pm; + + init_completion(&ce->complete); + + ce->engine = crypto_engine_alloc_init(ce->dev, true); + if (!ce->engine) { + dev_err(ce->dev, "Cannot allocate engine\n"); + err = -ENOMEM; + goto error_engine; + } + + err = crypto_engine_start(ce->engine); + if (err) { + dev_err(ce->dev, "Cannot start engine\n"); + goto error_engine; + } + + err = sl3516_ce_register_algs(ce); + if (err) + goto error_alg; + + err = sl3516_ce_rng_register(ce); + if (err) + goto error_rng; + + err = pm_runtime_resume_and_get(ce->dev); + if (err < 0) + goto error_pmuse; + + v = readl(ce->base + IPSEC_ID); + dev_info(ce->dev, "SL3516 dev %lx rev %lx\n", + v & GENMASK(31, 4), + v & GENMASK(3, 0)); + v = readl(ce->base + IPSEC_DMA_DEVICE_ID); + dev_info(ce->dev, "SL3516 DMA dev %lx rev %lx\n", + v & GENMASK(15, 4), + v & GENMASK(3, 0)); + + pm_runtime_put_sync(ce->dev); + +#ifdef CONFIG_CRYPTO_DEV_SL3516_DEBUG + /* Ignore error of debugfs */ + ce->dbgfs_dir = debugfs_create_dir("sl3516", NULL); + ce->dbgfs_stats = debugfs_create_file("stats", 0444, + ce->dbgfs_dir, ce, + &sl3516_ce_debugfs_fops); +#endif + + return 0; +error_pmuse: + sl3516_ce_rng_unregister(ce); +error_rng: + sl3516_ce_unregister_algs(ce); +error_alg: + crypto_engine_exit(ce->engine); +error_engine: + sl3516_ce_pm_exit(ce); +error_pm: + sl3516_ce_free_descs(ce); + return err; +} + +static int sl3516_ce_remove(struct platform_device *pdev) +{ + struct sl3516_ce_dev *ce = platform_get_drvdata(pdev); + + sl3516_ce_rng_unregister(ce); + sl3516_ce_unregister_algs(ce); + crypto_engine_exit(ce->engine); + sl3516_ce_pm_exit(ce); + sl3516_ce_free_descs(ce); + +#ifdef CONFIG_CRYPTO_DEV_SL3516_DEBUG + debugfs_remove_recursive(ce->dbgfs_dir); +#endif + + return 0; +} + +static const struct of_device_id sl3516_ce_crypto_of_match_table[] = { + { .compatible = "cortina,sl3516-crypto"}, + {} +}; +MODULE_DEVICE_TABLE(of, sl3516_ce_crypto_of_match_table); + +static struct platform_driver sl3516_ce_driver = { + .probe = sl3516_ce_probe, + .remove = sl3516_ce_remove, + .driver = { + .name = "sl3516-crypto", + .pm = &sl3516_ce_pm_ops, + .of_match_table = sl3516_ce_crypto_of_match_table, + }, +}; + +module_platform_driver(sl3516_ce_driver); + +MODULE_DESCRIPTION("SL3516 cryptographic offloader"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Corentin Labbe "); diff --git a/drivers/crypto/gemini/sl3516-ce-rng.c b/drivers/crypto/gemini/sl3516-ce-rng.c new file mode 100644 index 0000000000000..76931ec1cec55 --- /dev/null +++ b/drivers/crypto/gemini/sl3516-ce-rng.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * sl3516-ce-rng.c - hardware cryptographic offloader for SL3516 SoC. + * + * Copyright (C) 2021 Corentin Labbe + * + * This file handle the RNG found in the SL3516 crypto engine + */ +#include "sl3516-ce.h" +#include +#include + +static int sl3516_ce_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait) +{ + struct sl3516_ce_dev *ce; + u32 *data = buf; + size_t read = 0; + int err; + + ce = container_of(rng, struct sl3516_ce_dev, trng); + +#ifdef CONFIG_CRYPTO_DEV_SL3516_DEBUG + ce->hwrng_stat_req++; + ce->hwrng_stat_bytes += max; +#endif + + err = pm_runtime_get_sync(ce->dev); + if (err < 0) { + pm_runtime_put_noidle(ce->dev); + return err; + } + + while (read < max) { + *data = readl(ce->base + IPSEC_RAND_NUM_REG); + data++; + read += 4; + } + + pm_runtime_put(ce->dev); + + return read; +} + +int sl3516_ce_rng_register(struct sl3516_ce_dev *ce) +{ + int ret; + + ce->trng.name = "SL3516 Crypto Engine RNG"; + ce->trng.read = sl3516_ce_rng_read; + ce->trng.quality = 700; + + ret = hwrng_register(&ce->trng); + if (ret) + dev_err(ce->dev, "Fail to register the RNG\n"); + return ret; +} + +void sl3516_ce_rng_unregister(struct sl3516_ce_dev *ce) +{ + hwrng_unregister(&ce->trng); +} diff --git a/drivers/crypto/gemini/sl3516-ce.h b/drivers/crypto/gemini/sl3516-ce.h new file mode 100644 index 0000000000000..4c0ec6c920d1d --- /dev/null +++ b/drivers/crypto/gemini/sl3516-ce.h @@ -0,0 +1,347 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * sl3516-ce.h - hardware cryptographic offloader for cortina/gemini SoC + * + * Copyright (C) 2021 Corentin LABBE + * + * General notes on this driver: + * Called either Crypto Acceleration Engine Module, Security Acceleration Engine + * or IPSEC module in the datasheet, it will be called Crypto Engine for short + * in this driver. + * The CE was designed to handle IPSEC and wifi(TKIP WEP) protocol. + * It can handle AES, DES, 3DES, MD5, WEP, TKIP, SHA1, HMAC(MD5), HMAC(SHA1), + * Michael cipher/digest suites. + * It acts the same as a network hw, with both RX and TX chained descriptors. + */ +#include +#include +#include +#include +#include +#include +#include + +#define TQ0_TYPE_DATA 0 +#define TQ0_TYPE_CTRL BIT(0) +#define TQ1_CIPHER BIT(1) +#define TQ2_AUTH BIT(2) +#define TQ3_IV BIT(3) +#define TQ4_KEY0 BIT(4) +#define TQ5_KEY4 BIT(5) +#define TQ6_KEY6 BIT(6) +#define TQ7_AKEY0 BIT(7) +#define TQ8_AKEY2 BIT(8) +#define TQ9_AKEY2 BIT(9) + +#define ECB_AES 0x2 + +#define DESC_LAST 0x01 +#define DESC_FIRST 0x02 + +#define IPSEC_ID 0x0000 +#define IPSEC_STATUS_REG 0x00a8 +#define IPSEC_RAND_NUM_REG 0x00ac +#define IPSEC_DMA_DEVICE_ID 0xff00 +#define IPSEC_DMA_STATUS 0xff04 +#define IPSEC_TXDMA_CTRL 0xff08 +#define IPSEC_TXDMA_FIRST_DESC 0xff0c +#define IPSEC_TXDMA_CURR_DESC 0xff10 +#define IPSEC_RXDMA_CTRL 0xff14 +#define IPSEC_RXDMA_FIRST_DESC 0xff18 +#define IPSEC_RXDMA_CURR_DESC 0xff1c +#define IPSEC_TXDMA_BUF_ADDR 0xff28 +#define IPSEC_RXDMA_BUF_ADDR 0xff38 +#define IPSEC_RXDMA_BUF_SIZE 0xff30 + +#define CE_ENCRYPTION 0x01 +#define CE_DECRYPTION 0x03 + +#define MAXDESC 6 + +#define DMA_STATUS_RS_EOFI BIT(22) +#define DMA_STATUS_RS_PERR BIT(24) +#define DMA_STATUS_RS_DERR BIT(25) +#define DMA_STATUS_TS_EOFI BIT(27) +#define DMA_STATUS_TS_PERR BIT(29) +#define DMA_STATUS_TS_DERR BIT(30) + +#define TXDMA_CTRL_START BIT(31) +#define TXDMA_CTRL_CONTINUE BIT(30) +#define TXDMA_CTRL_CHAIN_MODE BIT(29) +/* the burst value is not documented in the datasheet */ +#define TXDMA_CTRL_BURST_UNK BIT(22) +#define TXDMA_CTRL_INT_FAIL BIT(17) +#define TXDMA_CTRL_INT_PERR BIT(16) + +#define RXDMA_CTRL_START BIT(31) +#define RXDMA_CTRL_CONTINUE BIT(30) +#define RXDMA_CTRL_CHAIN_MODE BIT(29) +/* the burst value is not documented in the datasheet */ +#define RXDMA_CTRL_BURST_UNK BIT(22) +#define RXDMA_CTRL_INT_FINISH BIT(18) +#define RXDMA_CTRL_INT_FAIL BIT(17) +#define RXDMA_CTRL_INT_PERR BIT(16) +#define RXDMA_CTRL_INT_EOD BIT(15) +#define RXDMA_CTRL_INT_EOF BIT(14) + +#define CE_CPU 0 +#define CE_DMA 1 + +/* + * struct sl3516_ce_descriptor - descriptor for CE operations + * @frame_ctrl: Information for the current descriptor + * @flag_status: For send packet, describe flag of operations. + * @buf_adr: pointer to a send/recv buffer for data packet + * @next_desc: control linking to other descriptors + */ +struct descriptor { + union { + u32 raw; + /* + * struct desc_frame_ctrl - Information for the current descriptor + * @buffer_size: the size of buffer at buf_adr + * @desc_count: Upon completion of a DMA operation, DMA + * write the number of descriptors used + * for the current frame + * @checksum: unknown + * @authcomp: unknown + * @perr: Protocol error during processing this descriptor + * @derr: Data error during processing this descriptor + * @own: 0 if owned by CPU, 1 for DMA + */ + struct desc_frame_ctrl { + u32 buffer_size :16; + u32 desc_count :6; + u32 checksum :6; + u32 authcomp :1; + u32 perr :1; + u32 derr :1; + u32 own :1; + } bits; + } frame_ctrl; + + union { + u32 raw; + /* + * struct desc_flag_status - flag for this descriptor + * @tqflag: list of flag describing the type of operation + * to be performed. + */ + struct desc_tx_flag_status { + u32 tqflag :10; + u32 unused :22; + } tx_flag; + } flag_status; + + u32 buf_adr; + + union { + u32 next_descriptor; + /* + * struct desc_next - describe chaining of descriptors + * @sof_eof: does the descriptor is first (0x11), + * the last (0x01), middle of a chan (0x00) + * or the only one (0x11) + * @dec: AHB bus address increase (0), decrease (1) + * @eofie: End of frame interrupt enable + * @ndar: Next descriptor address + */ + struct desc_next { + u32 sof_eof :2; + u32 dec :1; + u32 eofie :1; + u32 ndar :28; + } bits; + } next_desc; +}; + +/* + * struct control - The value of this register is used to set the + * operation mode of the IPSec Module. + * @process_id: Used to identify the process. The number will be copied + * to the descriptor status of the received packet. + * @auth_check_len: Number of 32-bit words to be checked or appended by the + * authentication module + * @auth_algorithm: + * @auth_mode: 0:append 1:Check Authentication Result + * @fcs_stream_copy: 0:enable 1:disable authentication stream copy + * @mix_key_sel: 0:use rCipherKey0-3 1:use Key Mixer + * @aesnk: AES Key Size + * @cipher_algorithm: choice of CBC/ECE and AES/DES/3DES + * @op_mode: Operation Mode for the IPSec Module + */ +struct pkt_control_header { + u32 process_id :8; + u32 auth_check_len :3; + u32 un1 :1; + u32 auth_algorithm :3; + u32 auth_mode :1; + u32 fcs_stream_copy :1; + u32 un2 :2; + u32 mix_key_sel :1; + u32 aesnk :4; + u32 cipher_algorithm :3; + u32 un3 :1; + u32 op_mode :4; +}; + +struct pkt_control_cipher { + u32 algorithm_len :16; + u32 header_len :16; +}; + +/* + * struct pkt_control_ecb - control packet for ECB + */ +struct pkt_control_ecb { + struct pkt_control_header control; + struct pkt_control_cipher cipher; + unsigned char key[AES_MAX_KEY_SIZE]; +}; + +/* + * struct sl3516_ce_dev - main container for all this driver information + * @base: base address + * @clks: clocks used + * @reset: pointer to reset controller + * @dev: the platform device + * @engine: ptr to the crypto/crypto_engine + * @complete: completion for the current task on this flow + * @status: set to 1 by interrupt if task is done + * @dtx: base DMA address for TX descriptors + * @tx base address of TX descriptors + * @drx: base DMA address for RX descriptors + * @rx base address of RX descriptors + * @ctx current used TX descriptor + * @crx current used RX descriptor + * @trng hw_random structure for RNG + * @hwrng_stat_req number of HWRNG requests + * @hwrng_stat_bytes total number of bytes generated by RNG + * @stat_irq number of IRQ handled by CE + * @stat_irq_tx number of TX IRQ handled by CE + * @stat_irq_rx number of RX IRQ handled by CE + * @stat_req number of requests handled by CE + * @fallbak_sg_count_tx number of fallback due to destination SG count + * @fallbak_sg_count_rx number of fallback due to source SG count + * @fallbak_not_same_len number of fallback due to difference in SG length + * @dbgfs_dir: Debugfs dentry for statistic directory + * @dbgfs_stats: Debugfs dentry for statistic counters + */ +struct sl3516_ce_dev { + void __iomem *base; + struct clk *clks; + struct reset_control *reset; + struct device *dev; + struct crypto_engine *engine; + struct completion complete; + int status; + dma_addr_t dtx; + struct descriptor *tx; + dma_addr_t drx; + struct descriptor *rx; + int ctx; + int crx; + struct hwrng trng; + unsigned long hwrng_stat_req; + unsigned long hwrng_stat_bytes; + unsigned long stat_irq; + unsigned long stat_irq_tx; + unsigned long stat_irq_rx; + unsigned long stat_req; + unsigned long fallback_sg_count_tx; + unsigned long fallback_sg_count_rx; + unsigned long fallback_not_same_len; + unsigned long fallback_mod16; + unsigned long fallback_align16; +#ifdef CONFIG_CRYPTO_DEV_SL3516_DEBUG + struct dentry *dbgfs_dir; + struct dentry *dbgfs_stats; +#endif + void *pctrl; + dma_addr_t dctrl; +}; + +struct sginfo { + u32 addr; + u32 len; +}; + +/* + * struct sl3516_ce_cipher_req_ctx - context for a skcipher request + * @t_src: list of mapped SGs with their size + * @t_dst: list of mapped SGs with their size + * @op_dir: direction (encrypt vs decrypt) for this request + * @pctrllen: the length of the ctrl packet + * @tqflag: the TQflag to set in data packet + * @h pointer to the pkt_control_cipher header + * @nr_sgs: number of source SG + * @nr_sgd: number of destination SG + * @fallback_req: request struct for invoking the fallback skcipher TFM + */ +struct sl3516_ce_cipher_req_ctx { + struct sginfo t_src[MAXDESC]; + struct sginfo t_dst[MAXDESC]; + u32 op_dir; + unsigned int pctrllen; + u32 tqflag; + struct pkt_control_cipher *h; + int nr_sgs; + int nr_sgd; + struct skcipher_request fallback_req; // keep at the end +}; + +/* + * struct sl3516_ce_cipher_tfm_ctx - context for a skcipher TFM + * @enginectx: crypto_engine used by this TFM + * @key: pointer to key data + * @keylen: len of the key + * @ce: pointer to the private data of driver handling this TFM + * @fallback_tfm: pointer to the fallback TFM + * + * enginectx must be the first element + */ +struct sl3516_ce_cipher_tfm_ctx { + struct crypto_engine_ctx enginectx; + u32 *key; + u32 keylen; + struct sl3516_ce_dev *ce; + struct crypto_skcipher *fallback_tfm; +}; + +/* + * struct sl3516_ce_alg_template - crypto_alg template + * @type: the CRYPTO_ALG_TYPE for this template + * @mode: value to be used in control packet for this algorithm + * @ce: pointer to the sl3516_ce_dev structure associated with + * this template + * @alg: one of sub struct must be used + * @stat_req: number of request done on this template + * @stat_fb: number of request which has fallbacked + * @stat_bytes: total data size done by this template + */ +struct sl3516_ce_alg_template { + u32 type; + u32 mode; + struct sl3516_ce_dev *ce; + union { + struct skcipher_alg skcipher; + } alg; + unsigned long stat_req; + unsigned long stat_fb; + unsigned long stat_bytes; +}; + +int sl3516_ce_enqueue(struct crypto_async_request *areq, u32 type); + +int sl3516_ce_aes_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen); +int sl3516_ce_cipher_init(struct crypto_tfm *tfm); +void sl3516_ce_cipher_exit(struct crypto_tfm *tfm); +int sl3516_ce_skdecrypt(struct skcipher_request *areq); +int sl3516_ce_skencrypt(struct skcipher_request *areq); + +int sl3516_ce_run_task(struct sl3516_ce_dev *ce, + struct sl3516_ce_cipher_req_ctx *rctx, const char *name); + +int sl3516_ce_rng_register(struct sl3516_ce_dev *ce); +void sl3516_ce_rng_unregister(struct sl3516_ce_dev *ce); From 2dcf45622481a22ffe108e2f381a929c9132c605 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 1 Jun 2021 15:11:32 +0000 Subject: [PATCH 111/142] MAINTAINERS: add gemini crypto sl3516-ce Add myself as maintainer of gemini sl3516-ce crypto driver. Add also the driver to the list of files for gemini SoC. Reviewed-by: Linus Walleij Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index d478f44be7ce6..388924c2d23ac 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1811,6 +1811,7 @@ F: Documentation/devicetree/bindings/net/cortina,gemini-ethernet.txt F: Documentation/devicetree/bindings/pinctrl/cortina,gemini-pinctrl.txt F: Documentation/devicetree/bindings/rtc/faraday,ftrtc010.txt F: arch/arm/mach-gemini/ +F: drivers/crypto/gemini/ F: drivers/net/ethernet/cortina/ F: drivers/pinctrl/pinctrl-gemini.c F: drivers/rtc/rtc-ftrtc010.c @@ -7549,6 +7550,12 @@ M: Kieran Bingham S: Supported F: scripts/gdb/ +GEMINI CRYPTO DRIVER +M: Corentin Labbe +L: linux-crypto@vger.kernel.org +S: Maintained +F: drivers/crypto/gemini/ + GEMTEK FM RADIO RECEIVER DRIVER M: Hans Verkuil L: linux-media@vger.kernel.org From a8bc4f5e7a72e4067f5afd7e98b61624231713ca Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 2 Jun 2021 11:36:45 +0000 Subject: [PATCH 112/142] crypto: qce - fix error return code in qce_skcipher_async_req_handle() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: 1339a7c3ba05 ("crypto: qce: skcipher: Fix incorrect sg count for dma transfers") Reported-by: Hulk Robot Signed-off-by: Wei Yongjun Reviewed-by: Thara Gopinath Signed-off-by: Herbert Xu --- drivers/crypto/qce/skcipher.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/qce/skcipher.c b/drivers/crypto/qce/skcipher.c index 2594184792272..8ff10928f581d 100644 --- a/drivers/crypto/qce/skcipher.c +++ b/drivers/crypto/qce/skcipher.c @@ -124,13 +124,17 @@ qce_skcipher_async_req_handle(struct crypto_async_request *async_req) rctx->dst_sg = rctx->dst_tbl.sgl; dst_nents = dma_map_sg(qce->dev, rctx->dst_sg, rctx->dst_nents, dir_dst); - if (dst_nents < 0) + if (dst_nents < 0) { + ret = dst_nents; goto error_free; + } if (diff_dst) { src_nents = dma_map_sg(qce->dev, req->src, rctx->src_nents, dir_src); - if (src_nents < 0) + if (src_nents < 0) { + ret = src_nents; goto error_unmap_dst; + } rctx->src_sg = req->src; } else { rctx->src_sg = rctx->dst_sg; From 1b82435d17774f3eaab35dce239d354548aa9da2 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 3 Jun 2021 01:53:40 -0400 Subject: [PATCH 113/142] crypto: x86/curve25519 - fix cpu feature checking logic in mod_exit In curve25519_mod_init() the curve25519_alg will be registered only when (X86_FEATURE_BMI2 && X86_FEATURE_ADX). But in curve25519_mod_exit() it still checks (X86_FEATURE_BMI2 || X86_FEATURE_ADX) when do crypto unregister. This will trigger a BUG_ON in crypto_unregister_alg() as alg->cra_refcnt is 0 if the cpu only supports one of X86_FEATURE_BMI2 and X86_FEATURE_ADX. Fixes: 07b586fe0662 ("crypto: x86/curve25519 - replace with formally verified implementation") Signed-off-by: Hangbin Liu Reviewed-by: Jason A. Donenfeld Signed-off-by: Herbert Xu --- arch/x86/crypto/curve25519-x86_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/crypto/curve25519-x86_64.c b/arch/x86/crypto/curve25519-x86_64.c index 6706b6cb1d0fc..38caf61cd5b7d 100644 --- a/arch/x86/crypto/curve25519-x86_64.c +++ b/arch/x86/crypto/curve25519-x86_64.c @@ -1500,7 +1500,7 @@ static int __init curve25519_mod_init(void) static void __exit curve25519_mod_exit(void) { if (IS_REACHABLE(CONFIG_CRYPTO_KPP) && - (boot_cpu_has(X86_FEATURE_BMI2) || boot_cpu_has(X86_FEATURE_ADX))) + static_branch_likely(&curve25519_use_bmi2_adx)) crypto_unregister_kpp(&curve25519_alg); } From c16a70c1f253e70f5d49b8e1054769bc8dbc3848 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Fri, 4 Jun 2021 09:31:26 +0800 Subject: [PATCH 114/142] crypto: hisilicon/sec - add new algorithm mode for AEAD Add new algorithm mode for AEAD: CCM(AES), GCM(AES), CCM(SM4), GCM(SM4). Signed-off-by: Kai Ye Signed-off-by: Longfang Liu Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec.h | 4 + drivers/crypto/hisilicon/sec2/sec_crypto.c | 367 +++++++++++++++++++-- drivers/crypto/hisilicon/sec2/sec_crypto.h | 8 + 3 files changed, 345 insertions(+), 34 deletions(-) diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h index 935d8d95dcb91..2960faeea1b3a 100644 --- a/drivers/crypto/hisilicon/sec2/sec.h +++ b/drivers/crypto/hisilicon/sec2/sec.h @@ -13,6 +13,8 @@ struct sec_alg_res { dma_addr_t pbuf_dma; u8 *c_ivin; dma_addr_t c_ivin_dma; + u8 *a_ivin; + dma_addr_t a_ivin_dma; u8 *out_mac; dma_addr_t out_mac_dma; }; @@ -33,6 +35,8 @@ struct sec_cipher_req { struct sec_aead_req { u8 *out_mac; dma_addr_t out_mac_dma; + u8 *a_ivin; + dma_addr_t a_ivin_dma; struct aead_request *aead_req; }; diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index 5926b64d0d989..f2ab9ffa8f0ee 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -79,10 +79,24 @@ #define SEC_SQE_CFLAG 2 #define SEC_SQE_AEAD_FLAG 3 #define SEC_SQE_DONE 0x1 +#define MIN_MAC_LEN 4 +#define MAC_LEN_MASK 0x1U #define MAX_INPUT_DATA_LEN 0xFFFE00 #define BITS_MASK 0xFF #define BYTE_BITS 0x8 #define SEC_XTS_NAME_SZ 0x3 +#define IV_CM_CAL_NUM 2 +#define IV_CL_MASK 0x7 +#define IV_CL_MIN 2 +#define IV_CL_MID 4 +#define IV_CL_MAX 8 +#define IV_FLAGS_OFFSET 0x6 +#define IV_CM_OFFSET 0x3 +#define IV_LAST_BYTE1 1 +#define IV_LAST_BYTE2 2 +#define IV_LAST_BYTE_MASK 0xFF +#define IV_CTR_INIT 0x1 +#define IV_BYTE_OFFSET 0x8 /* Get an en/de-cipher queue cyclically to balance load over queues of TFM */ static inline int sec_alloc_queue_id(struct sec_ctx *ctx, struct sec_req *req) @@ -316,6 +330,30 @@ static void sec_free_civ_resource(struct device *dev, struct sec_alg_res *res) res->c_ivin, res->c_ivin_dma); } +static int sec_alloc_aiv_resource(struct device *dev, struct sec_alg_res *res) +{ + int i; + + res->a_ivin = dma_alloc_coherent(dev, SEC_TOTAL_IV_SZ, + &res->a_ivin_dma, GFP_KERNEL); + if (!res->a_ivin) + return -ENOMEM; + + for (i = 1; i < QM_Q_DEPTH; i++) { + res[i].a_ivin_dma = res->a_ivin_dma + i * SEC_IV_SIZE; + res[i].a_ivin = res->a_ivin + i * SEC_IV_SIZE; + } + + return 0; +} + +static void sec_free_aiv_resource(struct device *dev, struct sec_alg_res *res) +{ + if (res->a_ivin) + dma_free_coherent(dev, SEC_TOTAL_IV_SZ, + res->a_ivin, res->a_ivin_dma); +} + static int sec_alloc_mac_resource(struct device *dev, struct sec_alg_res *res) { int i; @@ -398,9 +436,13 @@ static int sec_alg_resource_alloc(struct sec_ctx *ctx, return ret; if (ctx->alg_type == SEC_AEAD) { + ret = sec_alloc_aiv_resource(dev, res); + if (ret) + goto alloc_aiv_fail; + ret = sec_alloc_mac_resource(dev, res); if (ret) - goto alloc_fail; + goto alloc_mac_fail; } if (ctx->pbuf_supported) { ret = sec_alloc_pbuf_resource(dev, res); @@ -415,7 +457,10 @@ static int sec_alg_resource_alloc(struct sec_ctx *ctx, alloc_pbuf_fail: if (ctx->alg_type == SEC_AEAD) sec_free_mac_resource(dev, qp_ctx->res); -alloc_fail: +alloc_mac_fail: + if (ctx->alg_type == SEC_AEAD) + sec_free_aiv_resource(dev, res); +alloc_aiv_fail: sec_free_civ_resource(dev, res); return ret; } @@ -871,6 +916,8 @@ static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req, c_req->c_ivin = res->pbuf + SEC_PBUF_IV_OFFSET; c_req->c_ivin_dma = res->pbuf_dma + SEC_PBUF_IV_OFFSET; if (ctx->alg_type == SEC_AEAD) { + a_req->a_ivin = res->a_ivin; + a_req->a_ivin_dma = res->a_ivin_dma; a_req->out_mac = res->pbuf + SEC_PBUF_MAC_OFFSET; a_req->out_mac_dma = res->pbuf_dma + SEC_PBUF_MAC_OFFSET; @@ -881,6 +928,8 @@ static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req, c_req->c_ivin = res->c_ivin; c_req->c_ivin_dma = res->c_ivin_dma; if (ctx->alg_type == SEC_AEAD) { + a_req->a_ivin = res->a_ivin; + a_req->a_ivin_dma = res->a_ivin_dma; a_req->out_mac = res->out_mac; a_req->out_mac_dma = res->out_mac_dma; } @@ -1012,6 +1061,17 @@ static int sec_aead_setkey(struct crypto_aead *tfm, const u8 *key, ctx->a_ctx.mac_len = mac_len; c_ctx->c_mode = c_mode; + if (c_mode == SEC_CMODE_CCM || c_mode == SEC_CMODE_GCM) { + ret = sec_skcipher_aes_sm4_setkey(c_ctx, keylen, c_mode); + if (ret) { + dev_err(dev, "set sec aes ccm cipher key err!\n"); + return ret; + } + memcpy(c_ctx->c_key, key, keylen); + + return 0; + } + if (crypto_authenc_extractkeys(&keys, key, keylen)) goto bad_key; @@ -1054,6 +1114,14 @@ GEN_SEC_AEAD_SETKEY_FUNC(aes_cbc_sha256, SEC_A_HMAC_SHA256, SEC_CALG_AES, SEC_HMAC_SHA256_MAC, SEC_CMODE_CBC) GEN_SEC_AEAD_SETKEY_FUNC(aes_cbc_sha512, SEC_A_HMAC_SHA512, SEC_CALG_AES, SEC_HMAC_SHA512_MAC, SEC_CMODE_CBC) +GEN_SEC_AEAD_SETKEY_FUNC(aes_ccm, 0, SEC_CALG_AES, + SEC_HMAC_CCM_MAC, SEC_CMODE_CCM) +GEN_SEC_AEAD_SETKEY_FUNC(aes_gcm, 0, SEC_CALG_AES, + SEC_HMAC_GCM_MAC, SEC_CMODE_GCM) +GEN_SEC_AEAD_SETKEY_FUNC(sm4_ccm, 0, SEC_CALG_SM4, + SEC_HMAC_CCM_MAC, SEC_CMODE_CCM) +GEN_SEC_AEAD_SETKEY_FUNC(sm4_gcm, 0, SEC_CALG_SM4, + SEC_HMAC_GCM_MAC, SEC_CMODE_GCM) static int sec_aead_sgl_map(struct sec_ctx *ctx, struct sec_req *req) { @@ -1295,12 +1363,125 @@ static void sec_skcipher_callback(struct sec_ctx *ctx, struct sec_req *req, sk_req->base.complete(&sk_req->base, err); } -static void sec_aead_copy_iv(struct sec_ctx *ctx, struct sec_req *req) +static void set_aead_auth_iv(struct sec_ctx *ctx, struct sec_req *req) { struct aead_request *aead_req = req->aead_req.aead_req; struct sec_cipher_req *c_req = &req->c_req; + struct sec_aead_req *a_req = &req->aead_req; + size_t authsize = ctx->a_ctx.mac_len; + u32 data_size = aead_req->cryptlen; + u8 flage = 0; + u8 cm, cl; + + /* the specification has been checked in aead_iv_demension_check() */ + cl = c_req->c_ivin[0] + 1; + c_req->c_ivin[ctx->c_ctx.ivsize - cl] = 0x00; + memset(&c_req->c_ivin[ctx->c_ctx.ivsize - cl], 0, cl); + c_req->c_ivin[ctx->c_ctx.ivsize - IV_LAST_BYTE1] = IV_CTR_INIT; + + /* the last 3bit is L' */ + flage |= c_req->c_ivin[0] & IV_CL_MASK; + + /* the M' is bit3~bit5, the Flags is bit6 */ + cm = (authsize - IV_CM_CAL_NUM) / IV_CM_CAL_NUM; + flage |= cm << IV_CM_OFFSET; + if (aead_req->assoclen) + flage |= 0x01 << IV_FLAGS_OFFSET; + + memcpy(a_req->a_ivin, c_req->c_ivin, ctx->c_ctx.ivsize); + a_req->a_ivin[0] = flage; + + /* + * the last 32bit is counter's initial number, + * but the nonce uses the first 16bit + * the tail 16bit fill with the cipher length + */ + if (!c_req->encrypt) + data_size = aead_req->cryptlen - authsize; + + a_req->a_ivin[ctx->c_ctx.ivsize - IV_LAST_BYTE1] = + data_size & IV_LAST_BYTE_MASK; + data_size >>= IV_BYTE_OFFSET; + a_req->a_ivin[ctx->c_ctx.ivsize - IV_LAST_BYTE2] = + data_size & IV_LAST_BYTE_MASK; +} + +static void sec_aead_set_iv(struct sec_ctx *ctx, struct sec_req *req) +{ + struct aead_request *aead_req = req->aead_req.aead_req; + struct crypto_aead *tfm = crypto_aead_reqtfm(aead_req); + size_t authsize = crypto_aead_authsize(tfm); + struct sec_cipher_req *c_req = &req->c_req; + struct sec_aead_req *a_req = &req->aead_req; memcpy(c_req->c_ivin, aead_req->iv, ctx->c_ctx.ivsize); + + if (ctx->c_ctx.c_mode == SEC_CMODE_CCM) { + /* + * CCM 16Byte Cipher_IV: {1B_Flage,13B_IV,2B_counter}, + * the counter must set to 0x01 + */ + ctx->a_ctx.mac_len = authsize; + /* CCM 16Byte Auth_IV: {1B_AFlage,13B_IV,2B_Ptext_length} */ + set_aead_auth_iv(ctx, req); + } + + /* GCM 12Byte Cipher_IV == Auth_IV */ + if (ctx->c_ctx.c_mode == SEC_CMODE_GCM) { + ctx->a_ctx.mac_len = authsize; + memcpy(a_req->a_ivin, c_req->c_ivin, SEC_AIV_SIZE); + } +} + +static void sec_auth_bd_fill_xcm(struct sec_auth_ctx *ctx, int dir, + struct sec_req *req, struct sec_sqe *sec_sqe) +{ + struct sec_aead_req *a_req = &req->aead_req; + struct aead_request *aq = a_req->aead_req; + + /* C_ICV_Len is MAC size, 0x4 ~ 0x10 */ + sec_sqe->type2.icvw_kmode |= cpu_to_le16((u16)ctx->mac_len); + + /* mode set to CCM/GCM, don't set {A_Alg, AKey_Len, MAC_Len} */ + sec_sqe->type2.a_key_addr = sec_sqe->type2.c_key_addr; + sec_sqe->type2.a_ivin_addr = cpu_to_le64(a_req->a_ivin_dma); + sec_sqe->type_cipher_auth |= SEC_NO_AUTH << SEC_AUTH_OFFSET; + + if (dir) + sec_sqe->sds_sa_type &= SEC_CIPHER_AUTH; + else + sec_sqe->sds_sa_type |= SEC_AUTH_CIPHER; + + sec_sqe->type2.alen_ivllen = cpu_to_le32(aq->assoclen); + sec_sqe->type2.auth_src_offset = cpu_to_le16(0x0); + sec_sqe->type2.cipher_src_offset = cpu_to_le16((u16)aq->assoclen); + + sec_sqe->type2.mac_addr = cpu_to_le64(a_req->out_mac_dma); +} + +static void sec_auth_bd_fill_xcm_v3(struct sec_auth_ctx *ctx, int dir, + struct sec_req *req, struct sec_sqe3 *sqe3) +{ + struct sec_aead_req *a_req = &req->aead_req; + struct aead_request *aq = a_req->aead_req; + + /* C_ICV_Len is MAC size, 0x4 ~ 0x10 */ + sqe3->c_icv_key |= cpu_to_le16((u16)ctx->mac_len << SEC_MAC_OFFSET_V3); + + /* mode set to CCM/GCM, don't set {A_Alg, AKey_Len, MAC_Len} */ + sqe3->a_key_addr = sqe3->c_key_addr; + sqe3->auth_ivin.a_ivin_addr = cpu_to_le64(a_req->a_ivin_dma); + sqe3->auth_mac_key |= SEC_NO_AUTH; + + if (dir) + sqe3->huk_iv_seq &= SEC_CIPHER_AUTH_V3; + else + sqe3->huk_iv_seq |= SEC_AUTH_CIPHER_V3; + + sqe3->a_len_key = cpu_to_le32(aq->assoclen); + sqe3->auth_src_offset = cpu_to_le16(0x0); + sqe3->cipher_src_offset = cpu_to_le16((u16)aq->assoclen); + sqe3->mac_addr = cpu_to_le64(a_req->out_mac_dma); } static void sec_auth_bd_fill_ex(struct sec_auth_ctx *ctx, int dir, @@ -1348,7 +1529,11 @@ static int sec_aead_bd_fill(struct sec_ctx *ctx, struct sec_req *req) return ret; } - sec_auth_bd_fill_ex(auth_ctx, req->c_req.encrypt, req, sec_sqe); + if (ctx->c_ctx.c_mode == SEC_CMODE_CCM || + ctx->c_ctx.c_mode == SEC_CMODE_GCM) + sec_auth_bd_fill_xcm(auth_ctx, req->c_req.encrypt, req, sec_sqe); + else + sec_auth_bd_fill_ex(auth_ctx, req->c_req.encrypt, req, sec_sqe); return 0; } @@ -1399,7 +1584,13 @@ static int sec_aead_bd_fill_v3(struct sec_ctx *ctx, struct sec_req *req) return ret; } - sec_auth_bd_fill_ex_v3(auth_ctx, req->c_req.encrypt, req, sec_sqe3); + if (ctx->c_ctx.c_mode == SEC_CMODE_CCM || + ctx->c_ctx.c_mode == SEC_CMODE_GCM) + sec_auth_bd_fill_xcm_v3(auth_ctx, req->c_req.encrypt, + req, sec_sqe3); + else + sec_auth_bd_fill_ex_v3(auth_ctx, req->c_req.encrypt, + req, sec_sqe3); return 0; } @@ -1531,7 +1722,7 @@ static const struct sec_req_op sec_skcipher_req_ops = { static const struct sec_req_op sec_aead_req_ops = { .buf_map = sec_aead_sgl_map, .buf_unmap = sec_aead_sgl_unmap, - .do_transfer = sec_aead_copy_iv, + .do_transfer = sec_aead_set_iv, .bd_fill = sec_aead_bd_fill, .bd_send = sec_bd_send, .callback = sec_aead_callback, @@ -1551,7 +1742,7 @@ static const struct sec_req_op sec_skcipher_req_ops_v3 = { static const struct sec_req_op sec_aead_req_ops_v3 = { .buf_map = sec_aead_sgl_map, .buf_unmap = sec_aead_sgl_unmap, - .do_transfer = sec_aead_copy_iv, + .do_transfer = sec_aead_set_iv, .bd_fill = sec_aead_bd_fill_v3, .bd_send = sec_bd_send, .callback = sec_aead_callback, @@ -1591,8 +1782,9 @@ static int sec_aead_init(struct crypto_aead *tfm) crypto_aead_set_reqsize(tfm, sizeof(struct sec_req)); ctx->alg_type = SEC_AEAD; ctx->c_ctx.ivsize = crypto_aead_ivsize(tfm); - if (ctx->c_ctx.ivsize > SEC_IV_SIZE) { - dev_err(ctx->dev, "get error aead iv size!\n"); + if (ctx->c_ctx.ivsize < SEC_AIV_SIZE || + ctx->c_ctx.ivsize > SEC_IV_SIZE) { + pr_err("get error aead iv size!\n"); return -EINVAL; } @@ -1663,6 +1855,25 @@ static void sec_aead_ctx_exit(struct crypto_aead *tfm) sec_aead_exit(tfm); } +static int sec_aead_xcm_ctx_init(struct crypto_aead *tfm) +{ + struct sec_ctx *ctx = crypto_aead_ctx(tfm); + int ret; + + ret = sec_aead_init(tfm); + if (ret) { + dev_err(ctx->dev, "hisi_sec2: aead xcm init error!\n"); + return ret; + } + + return 0; +} + +static void sec_aead_xcm_ctx_exit(struct crypto_aead *tfm) +{ + sec_aead_exit(tfm); +} + static int sec_aead_sha1_ctx_init(struct crypto_aead *tfm) { return sec_aead_ctx_init(tfm, "sha1"); @@ -1903,41 +2114,100 @@ static struct skcipher_alg sec_skciphers_v3[] = { SEC_MIN_BLOCK_SZ, AES_BLOCK_SIZE) }; -static int sec_aead_param_check(struct sec_ctx *ctx, struct sec_req *sreq) +static int aead_iv_demension_check(struct aead_request *aead_req) +{ + u8 cl; + + cl = aead_req->iv[0] + 1; + if (cl < IV_CL_MIN || cl > IV_CL_MAX) + return -EINVAL; + + if (cl < IV_CL_MID && aead_req->cryptlen >> (BYTE_BITS * cl)) + return -EOVERFLOW; + + return 0; +} + +static int sec_aead_spec_check(struct sec_ctx *ctx, struct sec_req *sreq) { struct aead_request *req = sreq->aead_req.aead_req; struct crypto_aead *tfm = crypto_aead_reqtfm(req); size_t authsize = crypto_aead_authsize(tfm); + u8 c_mode = ctx->c_ctx.c_mode; struct device *dev = ctx->dev; - u8 c_alg = ctx->c_ctx.c_alg; + int ret; - if (unlikely(!req->src || !req->dst || !req->cryptlen || - req->assoclen > SEC_MAX_AAD_LEN)) { - dev_err(dev, "aead input param error!\n"); + if (unlikely(req->cryptlen + req->assoclen > MAX_INPUT_DATA_LEN || + req->assoclen > SEC_MAX_AAD_LEN)) { + dev_err(dev, "aead input spec error!\n"); return -EINVAL; } - if (ctx->pbuf_supported && (req->cryptlen + req->assoclen) <= - SEC_PBUF_SZ) - sreq->use_pbuf = true; - else - sreq->use_pbuf = false; - - /* Support AES only */ - if (unlikely(c_alg != SEC_CALG_AES)) { - dev_err(dev, "aead crypto alg error!\n"); + if (unlikely((c_mode == SEC_CMODE_GCM && authsize < DES_BLOCK_SIZE) || + (c_mode == SEC_CMODE_CCM && (authsize < MIN_MAC_LEN || + authsize & MAC_LEN_MASK)))) { + dev_err(dev, "aead input mac length error!\n"); return -EINVAL; } + + if (c_mode == SEC_CMODE_CCM) { + ret = aead_iv_demension_check(req); + if (ret) { + dev_err(dev, "aead input iv param error!\n"); + return ret; + } + } + if (sreq->c_req.encrypt) sreq->c_req.c_len = req->cryptlen; else sreq->c_req.c_len = req->cryptlen - authsize; + if (c_mode == SEC_CMODE_CBC) { + if (unlikely(sreq->c_req.c_len & (AES_BLOCK_SIZE - 1))) { + dev_err(dev, "aead crypto length error!\n"); + return -EINVAL; + } + } + + return 0; +} - if (unlikely(sreq->c_req.c_len & (AES_BLOCK_SIZE - 1))) { - dev_err(dev, "aead crypto length error!\n"); +static int sec_aead_param_check(struct sec_ctx *ctx, struct sec_req *sreq) +{ + struct aead_request *req = sreq->aead_req.aead_req; + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + size_t authsize = crypto_aead_authsize(tfm); + struct device *dev = ctx->dev; + u8 c_alg = ctx->c_ctx.c_alg; + + if (unlikely(!req->src || !req->dst)) { + dev_err(dev, "aead input param error!\n"); return -EINVAL; } + if (ctx->sec->qm.ver == QM_HW_V2) { + if (unlikely(!req->cryptlen || (!sreq->c_req.encrypt && + req->cryptlen <= authsize))) { + dev_err(dev, "Kunpeng920 not support 0 length!\n"); + return -EINVAL; + } + } + + /* Support AES or SM4 */ + if (unlikely(c_alg != SEC_CALG_AES && c_alg != SEC_CALG_SM4)) { + dev_err(dev, "aead crypto alg error!\n"); + return -EINVAL; + } + + if (unlikely(sec_aead_spec_check(ctx, sreq))) + return -EINVAL; + + if (ctx->pbuf_supported && (req->cryptlen + req->assoclen) <= + SEC_PBUF_SZ) + sreq->use_pbuf = true; + else + sreq->use_pbuf = false; + return 0; } @@ -1970,7 +2240,7 @@ static int sec_aead_decrypt(struct aead_request *a_req) return sec_aead_crypto(a_req, false); } -#define SEC_AEAD_GEN_ALG(sec_cra_name, sec_set_key, ctx_init,\ +#define SEC_AEAD_ALG(sec_cra_name, sec_set_key, ctx_init,\ ctx_exit, blk_size, iv_size, max_authsize)\ {\ .base = {\ @@ -1991,22 +2261,39 @@ static int sec_aead_decrypt(struct aead_request *a_req) .maxauthsize = max_authsize,\ } -#define SEC_AEAD_ALG(algname, keyfunc, aead_init, blksize, ivsize, authsize)\ - SEC_AEAD_GEN_ALG(algname, keyfunc, aead_init,\ - sec_aead_ctx_exit, blksize, ivsize, authsize) - static struct aead_alg sec_aeads[] = { SEC_AEAD_ALG("authenc(hmac(sha1),cbc(aes))", sec_setkey_aes_cbc_sha1, sec_aead_sha1_ctx_init, - AES_BLOCK_SIZE, AES_BLOCK_SIZE, SHA1_DIGEST_SIZE), + sec_aead_ctx_exit, AES_BLOCK_SIZE, + AES_BLOCK_SIZE, SHA1_DIGEST_SIZE), SEC_AEAD_ALG("authenc(hmac(sha256),cbc(aes))", sec_setkey_aes_cbc_sha256, sec_aead_sha256_ctx_init, - AES_BLOCK_SIZE, AES_BLOCK_SIZE, SHA256_DIGEST_SIZE), + sec_aead_ctx_exit, AES_BLOCK_SIZE, + AES_BLOCK_SIZE, SHA256_DIGEST_SIZE), SEC_AEAD_ALG("authenc(hmac(sha512),cbc(aes))", sec_setkey_aes_cbc_sha512, sec_aead_sha512_ctx_init, - AES_BLOCK_SIZE, AES_BLOCK_SIZE, SHA512_DIGEST_SIZE), + sec_aead_ctx_exit, AES_BLOCK_SIZE, + AES_BLOCK_SIZE, SHA512_DIGEST_SIZE), + + SEC_AEAD_ALG("ccm(aes)", sec_setkey_aes_ccm, sec_aead_xcm_ctx_init, + sec_aead_xcm_ctx_exit, SEC_MIN_BLOCK_SZ, + AES_BLOCK_SIZE, AES_BLOCK_SIZE), + + SEC_AEAD_ALG("gcm(aes)", sec_setkey_aes_gcm, sec_aead_xcm_ctx_init, + sec_aead_xcm_ctx_exit, SEC_MIN_BLOCK_SZ, + SEC_AIV_SIZE, AES_BLOCK_SIZE) +}; + +static struct aead_alg sec_aeads_v3[] = { + SEC_AEAD_ALG("ccm(sm4)", sec_setkey_sm4_ccm, sec_aead_xcm_ctx_init, + sec_aead_xcm_ctx_exit, SEC_MIN_BLOCK_SZ, + AES_BLOCK_SIZE, AES_BLOCK_SIZE), + + SEC_AEAD_ALG("gcm(sm4)", sec_setkey_sm4_gcm, sec_aead_xcm_ctx_init, + sec_aead_xcm_ctx_exit, SEC_MIN_BLOCK_SZ, + SEC_AIV_SIZE, AES_BLOCK_SIZE) }; int sec_register_to_crypto(struct hisi_qm *qm) @@ -2025,11 +2312,19 @@ int sec_register_to_crypto(struct hisi_qm *qm) if (ret) goto reg_skcipher_fail; } + ret = crypto_register_aeads(sec_aeads, ARRAY_SIZE(sec_aeads)); if (ret) goto reg_aead_fail; + if (qm->ver > QM_HW_V2) { + ret = crypto_register_aeads(sec_aeads_v3, ARRAY_SIZE(sec_aeads_v3)); + if (ret) + goto reg_aead_v3_fail; + } return ret; +reg_aead_v3_fail: + crypto_unregister_aeads(sec_aeads, ARRAY_SIZE(sec_aeads)); reg_aead_fail: if (qm->ver > QM_HW_V2) crypto_unregister_skciphers(sec_skciphers_v3, @@ -2042,10 +2337,14 @@ int sec_register_to_crypto(struct hisi_qm *qm) void sec_unregister_from_crypto(struct hisi_qm *qm) { + if (qm->ver > QM_HW_V2) + crypto_unregister_aeads(sec_aeads_v3, + ARRAY_SIZE(sec_aeads_v3)); + crypto_unregister_aeads(sec_aeads, ARRAY_SIZE(sec_aeads)); + if (qm->ver > QM_HW_V2) crypto_unregister_skciphers(sec_skciphers_v3, ARRAY_SIZE(sec_skciphers_v3)); crypto_unregister_skciphers(sec_skciphers, ARRAY_SIZE(sec_skciphers)); - crypto_unregister_aeads(sec_aeads, ARRAY_SIZE(sec_aeads)); } diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.h b/drivers/crypto/hisilicon/sec2/sec_crypto.h index c9bfe75d32e36..a7bcd3e2affd7 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.h +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.h @@ -4,6 +4,7 @@ #ifndef __HISI_SEC_V2_CRYPTO_H #define __HISI_SEC_V2_CRYPTO_H +#define SEC_AIV_SIZE 12 #define SEC_IV_SIZE 24 #define SEC_MAX_KEY_SIZE 64 #define SEC_COMM_SCENE 0 @@ -22,6 +23,11 @@ enum sec_hash_alg { }; enum sec_mac_len { + SEC_HMAC_CCM_MAC = 16, + SEC_HMAC_GCM_MAC = 16, + SEC_SM3_MAC = 32, + SEC_HMAC_SM3_MAC = 32, + SEC_HMAC_MD5_MAC = 16, SEC_HMAC_SHA1_MAC = 20, SEC_HMAC_SHA256_MAC = 32, SEC_HMAC_SHA512_MAC = 64, @@ -33,6 +39,8 @@ enum sec_cmode { SEC_CMODE_CFB = 0x2, SEC_CMODE_OFB = 0x3, SEC_CMODE_CTR = 0x4, + SEC_CMODE_CCM = 0x5, + SEC_CMODE_GCM = 0x6, SEC_CMODE_XTS = 0x7, }; From 6c46a3297beae4ae2d22b26da5e091f058381c7c Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Fri, 4 Jun 2021 09:31:27 +0800 Subject: [PATCH 115/142] crypto: hisilicon/sec - add fallback tfm supporting for aeads Add fallback tfm supporting for hisi_sec driver. Due to the Kunpeng920's CCM/GCM algorithm not supports 0 byte src length. So the driver needs to setting the soft fallback aead tfm. Signed-off-by: Kai Ye Signed-off-by: Longfang Liu Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec.h | 2 + drivers/crypto/hisilicon/sec2/sec_crypto.c | 97 ++++++++++++++++++++-- 2 files changed, 94 insertions(+), 5 deletions(-) diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h index 2960faeea1b3a..3fe78754fba2e 100644 --- a/drivers/crypto/hisilicon/sec2/sec.h +++ b/drivers/crypto/hisilicon/sec2/sec.h @@ -88,7 +88,9 @@ struct sec_auth_ctx { u8 a_key_len; u8 mac_len; u8 a_alg; + bool fallback; struct crypto_shash *hash_tfm; + struct crypto_aead *fallback_aead_tfm; }; /* SEC cipher context which cipher's relatives */ diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index f2ab9ffa8f0ee..194a9bca9c5e0 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -2,6 +2,7 @@ /* Copyright (c) 2019 HiSilicon Limited. */ #include +#include #include #include #include @@ -853,12 +854,16 @@ GEN_SEC_SETKEY_FUNC(sm4_ctr, SEC_CALG_SM4, SEC_CMODE_CTR) static int sec_cipher_pbuf_map(struct sec_ctx *ctx, struct sec_req *req, struct scatterlist *src) { - struct aead_request *aead_req = req->aead_req.aead_req; + struct sec_aead_req *a_req = &req->aead_req; + struct aead_request *aead_req = a_req->aead_req; struct sec_cipher_req *c_req = &req->c_req; struct sec_qp_ctx *qp_ctx = req->qp_ctx; struct device *dev = ctx->dev; int copy_size, pbuf_length; int req_id = req->req_id; + struct crypto_aead *tfm; + size_t authsize; + u8 *mac_offset; if (ctx->alg_type == SEC_AEAD) copy_size = aead_req->cryptlen + aead_req->assoclen; @@ -866,12 +871,17 @@ static int sec_cipher_pbuf_map(struct sec_ctx *ctx, struct sec_req *req, copy_size = c_req->c_len; pbuf_length = sg_copy_to_buffer(src, sg_nents(src), - qp_ctx->res[req_id].pbuf, - copy_size); + qp_ctx->res[req_id].pbuf, copy_size); if (unlikely(pbuf_length != copy_size)) { dev_err(dev, "copy src data to pbuf error!\n"); return -EINVAL; } + if (!c_req->encrypt && ctx->alg_type == SEC_AEAD) { + tfm = crypto_aead_reqtfm(aead_req); + authsize = crypto_aead_authsize(tfm); + mac_offset = qp_ctx->res[req_id].pbuf + copy_size - authsize; + memcpy(a_req->out_mac, mac_offset, authsize); + } c_req->c_in_dma = qp_ctx->res[req_id].pbuf_dma; c_req->c_out_dma = c_req->c_in_dma; @@ -1044,6 +1054,28 @@ static int sec_aead_auth_set_key(struct sec_auth_ctx *ctx, return 0; } +static int sec_aead_setauthsize(struct crypto_aead *aead, unsigned int authsize) +{ + struct crypto_tfm *tfm = crypto_aead_tfm(aead); + struct sec_ctx *ctx = crypto_tfm_ctx(tfm); + struct sec_auth_ctx *a_ctx = &ctx->a_ctx; + + if (unlikely(a_ctx->fallback_aead_tfm)) + return crypto_aead_setauthsize(a_ctx->fallback_aead_tfm, authsize); + + return 0; +} + +static int sec_aead_fallback_setkey(struct sec_auth_ctx *a_ctx, + struct crypto_aead *tfm, const u8 *key, + unsigned int keylen) +{ + crypto_aead_clear_flags(a_ctx->fallback_aead_tfm, CRYPTO_TFM_REQ_MASK); + crypto_aead_set_flags(a_ctx->fallback_aead_tfm, + crypto_aead_get_flags(tfm) & CRYPTO_TFM_REQ_MASK); + return crypto_aead_setkey(a_ctx->fallback_aead_tfm, key, keylen); +} + static int sec_aead_setkey(struct crypto_aead *tfm, const u8 *key, const u32 keylen, const enum sec_hash_alg a_alg, const enum sec_calg c_alg, @@ -1052,6 +1084,7 @@ static int sec_aead_setkey(struct crypto_aead *tfm, const u8 *key, { struct sec_ctx *ctx = crypto_aead_ctx(tfm); struct sec_cipher_ctx *c_ctx = &ctx->c_ctx; + struct sec_auth_ctx *a_ctx = &ctx->a_ctx; struct device *dev = ctx->dev; struct crypto_authenc_keys keys; int ret; @@ -1069,6 +1102,12 @@ static int sec_aead_setkey(struct crypto_aead *tfm, const u8 *key, } memcpy(c_ctx->c_key, key, keylen); + if (unlikely(a_ctx->fallback_aead_tfm)) { + ret = sec_aead_fallback_setkey(a_ctx, tfm, key, keylen); + if (ret) + return ret; + } + return 0; } @@ -1857,7 +1896,10 @@ static void sec_aead_ctx_exit(struct crypto_aead *tfm) static int sec_aead_xcm_ctx_init(struct crypto_aead *tfm) { + struct aead_alg *alg = crypto_aead_alg(tfm); struct sec_ctx *ctx = crypto_aead_ctx(tfm); + struct sec_auth_ctx *a_ctx = &ctx->a_ctx; + const char *aead_name = alg->base.cra_name; int ret; ret = sec_aead_init(tfm); @@ -1866,11 +1908,24 @@ static int sec_aead_xcm_ctx_init(struct crypto_aead *tfm) return ret; } + a_ctx->fallback_aead_tfm = crypto_alloc_aead(aead_name, 0, + CRYPTO_ALG_NEED_FALLBACK | + CRYPTO_ALG_ASYNC); + if (IS_ERR(a_ctx->fallback_aead_tfm)) { + dev_err(ctx->dev, "aead driver alloc fallback tfm error!\n"); + sec_aead_exit(tfm); + return PTR_ERR(a_ctx->fallback_aead_tfm); + } + a_ctx->fallback = false; + return 0; } static void sec_aead_xcm_ctx_exit(struct crypto_aead *tfm) { + struct sec_ctx *ctx = crypto_aead_ctx(tfm); + + crypto_free_aead(ctx->a_ctx.fallback_aead_tfm); sec_aead_exit(tfm); } @@ -2189,6 +2244,7 @@ static int sec_aead_param_check(struct sec_ctx *ctx, struct sec_req *sreq) if (unlikely(!req->cryptlen || (!sreq->c_req.encrypt && req->cryptlen <= authsize))) { dev_err(dev, "Kunpeng920 not support 0 length!\n"); + ctx->a_ctx.fallback = true; return -EINVAL; } } @@ -2211,6 +2267,31 @@ static int sec_aead_param_check(struct sec_ctx *ctx, struct sec_req *sreq) return 0; } +static int sec_aead_soft_crypto(struct sec_ctx *ctx, + struct aead_request *aead_req, + bool encrypt) +{ + struct aead_request *subreq = aead_request_ctx(aead_req); + struct sec_auth_ctx *a_ctx = &ctx->a_ctx; + struct device *dev = ctx->dev; + + /* Kunpeng920 aead mode not support input 0 size */ + if (!a_ctx->fallback_aead_tfm) { + dev_err(dev, "aead fallbcak tfm is NULL!\n"); + return -EINVAL; + } + + aead_request_set_tfm(subreq, a_ctx->fallback_aead_tfm); + aead_request_set_callback(subreq, aead_req->base.flags, + aead_req->base.complete, aead_req->base.data); + aead_request_set_crypt(subreq, aead_req->src, aead_req->dst, + aead_req->cryptlen, aead_req->iv); + aead_request_set_ad(subreq, aead_req->assoclen); + + return encrypt ? crypto_aead_encrypt(subreq) : + crypto_aead_decrypt(subreq); +} + static int sec_aead_crypto(struct aead_request *a_req, bool encrypt) { struct crypto_aead *tfm = crypto_aead_reqtfm(a_req); @@ -2224,8 +2305,11 @@ static int sec_aead_crypto(struct aead_request *a_req, bool encrypt) req->ctx = ctx; ret = sec_aead_param_check(ctx, req); - if (unlikely(ret)) + if (unlikely(ret)) { + if (ctx->a_ctx.fallback) + return sec_aead_soft_crypto(ctx, a_req, encrypt); return -EINVAL; + } return ctx->req_op->process(ctx, req); } @@ -2247,7 +2331,9 @@ static int sec_aead_decrypt(struct aead_request *a_req) .cra_name = sec_cra_name,\ .cra_driver_name = "hisi_sec_"sec_cra_name,\ .cra_priority = SEC_PRIORITY,\ - .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,\ + .cra_flags = CRYPTO_ALG_ASYNC |\ + CRYPTO_ALG_ALLOCATES_MEMORY |\ + CRYPTO_ALG_NEED_FALLBACK,\ .cra_blocksize = blk_size,\ .cra_ctxsize = sizeof(struct sec_ctx),\ .cra_module = THIS_MODULE,\ @@ -2255,6 +2341,7 @@ static int sec_aead_decrypt(struct aead_request *a_req) .init = ctx_init,\ .exit = ctx_exit,\ .setkey = sec_set_key,\ + .setauthsize = sec_aead_setauthsize,\ .decrypt = sec_aead_decrypt,\ .encrypt = sec_aead_encrypt,\ .ivsize = iv_size,\ From 668f1ab70378d836a9df0cc01abf21c40c4d9348 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Fri, 4 Jun 2021 09:31:28 +0800 Subject: [PATCH 116/142] crypto: hisilicon/sec - add hardware integrity check value process Use hardware integrity check value process instead of soft verify process when doing aead decryption. Signed-off-by: Kai Ye Signed-off-by: Longfang Liu Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec_crypto.c | 81 +++++++++++++--------- drivers/crypto/hisilicon/sec2/sec_crypto.h | 1 + 2 files changed, 48 insertions(+), 34 deletions(-) diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index 194a9bca9c5e0..75122f020642f 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -54,6 +54,7 @@ #define SEC_FLAG_MASK 0x0780 #define SEC_TYPE_MASK 0x0F #define SEC_DONE_MASK 0x0001 +#define SEC_ICV_MASK 0x000E #define SEC_SQE_LEN_RATE_MASK 0x3 #define SEC_TOTAL_IV_SZ (SEC_IV_SIZE * QM_Q_DEPTH) @@ -80,6 +81,7 @@ #define SEC_SQE_CFLAG 2 #define SEC_SQE_AEAD_FLAG 3 #define SEC_SQE_DONE 0x1 +#define SEC_ICV_ERR 0x2 #define MIN_MAC_LEN 4 #define MAC_LEN_MASK 0x1U #define MAX_INPUT_DATA_LEN 0xFFFE00 @@ -156,32 +158,12 @@ static void sec_free_req_id(struct sec_req *req) mutex_unlock(&qp_ctx->req_lock); } -static int sec_aead_verify(struct sec_req *req) -{ - struct aead_request *aead_req = req->aead_req.aead_req; - struct crypto_aead *tfm = crypto_aead_reqtfm(aead_req); - size_t authsize = crypto_aead_authsize(tfm); - u8 *mac_out = req->aead_req.out_mac; - u8 *mac = mac_out + SEC_MAX_MAC_LEN; - struct scatterlist *sgl = aead_req->src; - size_t sz; - - sz = sg_pcopy_to_buffer(sgl, sg_nents(sgl), mac, authsize, - aead_req->cryptlen + aead_req->assoclen - - authsize); - if (unlikely(sz != authsize || memcmp(mac_out, mac, sz))) { - dev_err(req->ctx->dev, "aead verify failure!\n"); - return -EBADMSG; - } - - return 0; -} - static u8 pre_parse_finished_bd(struct bd_status *status, void *resp) { struct sec_sqe *bd = resp; status->done = le16_to_cpu(bd->type2.done_flag) & SEC_DONE_MASK; + status->icv = (le16_to_cpu(bd->type2.done_flag) & SEC_ICV_MASK) >> 1; status->flag = (le16_to_cpu(bd->type2.done_flag) & SEC_FLAG_MASK) >> SEC_FLAG_OFFSET; status->tag = le16_to_cpu(bd->type2.tag); @@ -195,6 +177,7 @@ static u8 pre_parse_finished_bd3(struct bd_status *status, void *resp) struct sec_sqe3 *bd3 = resp; status->done = le16_to_cpu(bd3->done_flag) & SEC_DONE_MASK; + status->icv = (le16_to_cpu(bd3->done_flag) & SEC_ICV_MASK) >> 1; status->flag = (le16_to_cpu(bd3->done_flag) & SEC_FLAG_MASK) >> SEC_FLAG_OFFSET; status->tag = le64_to_cpu(bd3->tag); @@ -220,6 +203,14 @@ static int sec_cb_status_check(struct sec_req *req, status->flag); return -EIO; } + } else if (unlikely(ctx->alg_type == SEC_AEAD)) { + if (unlikely(status->flag != SEC_SQE_AEAD_FLAG || + status->icv == SEC_ICV_ERR)) { + dev_err_ratelimited(ctx->dev, + "flag[%u], icv[%u]\n", + status->flag, status->icv); + return -EBADMSG; + } } return 0; @@ -262,9 +253,6 @@ static void sec_req_cb(struct hisi_qp *qp, void *resp) if (err) atomic64_inc(&dfx->done_flag_cnt); - if (ctx->alg_type == SEC_AEAD && !req->c_req.encrypt) - err = sec_aead_verify(req); - atomic64_inc(&dfx->recv_cnt); ctx->req_op->buf_unmap(ctx, req); @@ -895,7 +883,6 @@ static void sec_cipher_pbuf_unmap(struct sec_ctx *ctx, struct sec_req *req, struct aead_request *aead_req = req->aead_req.aead_req; struct sec_cipher_req *c_req = &req->c_req; struct sec_qp_ctx *qp_ctx = req->qp_ctx; - struct device *dev = ctx->dev; int copy_size, pbuf_length; int req_id = req->req_id; @@ -905,10 +892,29 @@ static void sec_cipher_pbuf_unmap(struct sec_ctx *ctx, struct sec_req *req, copy_size = c_req->c_len; pbuf_length = sg_copy_from_buffer(dst, sg_nents(dst), - qp_ctx->res[req_id].pbuf, - copy_size); + qp_ctx->res[req_id].pbuf, copy_size); if (unlikely(pbuf_length != copy_size)) - dev_err(dev, "copy pbuf data to dst error!\n"); + dev_err(ctx->dev, "copy pbuf data to dst error!\n"); +} + +static int sec_aead_mac_init(struct sec_aead_req *req) +{ + struct aead_request *aead_req = req->aead_req; + struct crypto_aead *tfm = crypto_aead_reqtfm(aead_req); + size_t authsize = crypto_aead_authsize(tfm); + u8 *mac_out = req->out_mac; + struct scatterlist *sgl = aead_req->src; + size_t copy_size; + off_t skip_size; + + /* Copy input mac */ + skip_size = aead_req->assoclen + aead_req->cryptlen - authsize; + copy_size = sg_pcopy_to_buffer(sgl, sg_nents(sgl), mac_out, + authsize, skip_size); + if (unlikely(copy_size != authsize)) + return -EINVAL; + + return 0; } static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req, @@ -922,7 +928,6 @@ static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req, int ret; if (req->use_pbuf) { - ret = sec_cipher_pbuf_map(ctx, req, src); c_req->c_ivin = res->pbuf + SEC_PBUF_IV_OFFSET; c_req->c_ivin_dma = res->pbuf_dma + SEC_PBUF_IV_OFFSET; if (ctx->alg_type == SEC_AEAD) { @@ -932,6 +937,7 @@ static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req, a_req->out_mac_dma = res->pbuf_dma + SEC_PBUF_MAC_OFFSET; } + ret = sec_cipher_pbuf_map(ctx, req, src); return ret; } @@ -954,6 +960,13 @@ static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req, return PTR_ERR(c_req->c_in); } + if (!c_req->encrypt && ctx->alg_type == SEC_AEAD) { + ret = sec_aead_mac_init(a_req); + if (unlikely(ret)) { + dev_err(dev, "fail to init mac data for ICV!\n"); + return ret; + } + } if (dst == src) { c_req->c_out = c_req->c_in; c_req->c_out_dma = c_req->c_in_dma; @@ -1542,13 +1555,13 @@ static void sec_auth_bd_fill_ex(struct sec_auth_ctx *ctx, int dir, sec_sqe->type2.mac_key_alg |= cpu_to_le32((u32)(ctx->a_alg) << SEC_AEAD_ALG_OFFSET); - sec_sqe->type_cipher_auth |= SEC_AUTH_TYPE1 << SEC_AUTH_OFFSET; - - if (dir) + if (dir) { + sec_sqe->type_cipher_auth |= SEC_AUTH_TYPE1 << SEC_AUTH_OFFSET; sec_sqe->sds_sa_type &= SEC_CIPHER_AUTH; - else + } else { + sec_sqe->type_cipher_auth |= SEC_AUTH_TYPE2 << SEC_AUTH_OFFSET; sec_sqe->sds_sa_type |= SEC_AUTH_CIPHER; - + } sec_sqe->type2.alen_ivllen = cpu_to_le32(c_req->c_len + aq->assoclen); sec_sqe->type2.cipher_src_offset = cpu_to_le16((u16)aq->assoclen); diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.h b/drivers/crypto/hisilicon/sec2/sec_crypto.h index a7bcd3e2affd7..9f71c358a6d35 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.h +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.h @@ -80,6 +80,7 @@ struct bd_status { u8 done; u8 err_type; u16 flag; + u16 icv; }; enum { From 9039878ade5d7ec6ac8db299ab8e7d0d563e3447 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Fri, 4 Jun 2021 09:31:29 +0800 Subject: [PATCH 117/142] crypto: hisilicon/sec - modify the SEC request structure Modify the SEC request structure, combines two common parameters of the SEC request into one parameter. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec.h | 7 +++-- drivers/crypto/hisilicon/sec2/sec_crypto.c | 34 +++++++++++----------- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h index 3fe78754fba2e..018415b9840a9 100644 --- a/drivers/crypto/hisilicon/sec2/sec.h +++ b/drivers/crypto/hisilicon/sec2/sec.h @@ -21,8 +21,6 @@ struct sec_alg_res { /* Cipher request of SEC private */ struct sec_cipher_req { - struct hisi_acc_hw_sgl *c_in; - dma_addr_t c_in_dma; struct hisi_acc_hw_sgl *c_out; dma_addr_t c_out_dma; u8 *c_ivin; @@ -49,6 +47,11 @@ struct sec_req { struct sec_ctx *ctx; struct sec_qp_ctx *qp_ctx; + /** + * Common parameter of the SEC request. + */ + struct hisi_acc_hw_sgl *in; + dma_addr_t in_dma; struct sec_cipher_req c_req; struct sec_aead_req aead_req; struct list_head backlog_head; diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index 75122f020642f..f23af61661dea 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -871,8 +871,8 @@ static int sec_cipher_pbuf_map(struct sec_ctx *ctx, struct sec_req *req, memcpy(a_req->out_mac, mac_offset, authsize); } - c_req->c_in_dma = qp_ctx->res[req_id].pbuf_dma; - c_req->c_out_dma = c_req->c_in_dma; + req->in_dma = qp_ctx->res[req_id].pbuf_dma; + c_req->c_out_dma = req->in_dma; return 0; } @@ -950,14 +950,13 @@ static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req, a_req->out_mac_dma = res->out_mac_dma; } - c_req->c_in = hisi_acc_sg_buf_map_to_hw_sgl(dev, src, - qp_ctx->c_in_pool, - req->req_id, - &c_req->c_in_dma); - - if (IS_ERR(c_req->c_in)) { + req->in = hisi_acc_sg_buf_map_to_hw_sgl(dev, src, + qp_ctx->c_in_pool, + req->req_id, + &req->in_dma); + if (IS_ERR(req->in)) { dev_err(dev, "fail to dma map input sgl buffers!\n"); - return PTR_ERR(c_req->c_in); + return PTR_ERR(req->in); } if (!c_req->encrypt && ctx->alg_type == SEC_AEAD) { @@ -967,9 +966,10 @@ static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req, return ret; } } + if (dst == src) { - c_req->c_out = c_req->c_in; - c_req->c_out_dma = c_req->c_in_dma; + c_req->c_out = req->in; + c_req->c_out_dma = req->in_dma; } else { c_req->c_out = hisi_acc_sg_buf_map_to_hw_sgl(dev, dst, qp_ctx->c_out_pool, @@ -978,7 +978,7 @@ static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req, if (IS_ERR(c_req->c_out)) { dev_err(dev, "fail to dma map output sgl buffers!\n"); - hisi_acc_sg_buf_unmap(dev, src, c_req->c_in); + hisi_acc_sg_buf_unmap(dev, src, req->in); return PTR_ERR(c_req->c_out); } } @@ -996,7 +996,7 @@ static void sec_cipher_unmap(struct sec_ctx *ctx, struct sec_req *req, sec_cipher_pbuf_unmap(ctx, req, dst); } else { if (dst != src) - hisi_acc_sg_buf_unmap(dev, src, c_req->c_in); + hisi_acc_sg_buf_unmap(dev, src, req->in); hisi_acc_sg_buf_unmap(dev, dst, c_req->c_out); } @@ -1236,7 +1236,7 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req) sec_sqe->type2.c_key_addr = cpu_to_le64(c_ctx->c_key_dma); sec_sqe->type2.c_ivin_addr = cpu_to_le64(c_req->c_ivin_dma); - sec_sqe->type2.data_src_addr = cpu_to_le64(c_req->c_in_dma); + sec_sqe->type2.data_src_addr = cpu_to_le64(req->in_dma); sec_sqe->type2.data_dst_addr = cpu_to_le64(c_req->c_out_dma); sec_sqe->type2.icvw_kmode |= cpu_to_le16(((u16)c_ctx->c_mode) << @@ -1263,7 +1263,7 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req) sec_sqe->sdm_addr_type |= da_type; scene = SEC_COMM_SCENE << SEC_SCENE_OFFSET; - if (c_req->c_in_dma != c_req->c_out_dma) + if (req->in_dma != c_req->c_out_dma) de = 0x1 << SEC_DE_OFFSET; sec_sqe->sds_sa_type = (de | scene | sa_type); @@ -1286,7 +1286,7 @@ static int sec_skcipher_bd_fill_v3(struct sec_ctx *ctx, struct sec_req *req) sec_sqe3->c_key_addr = cpu_to_le64(c_ctx->c_key_dma); sec_sqe3->no_scene.c_ivin_addr = cpu_to_le64(c_req->c_ivin_dma); - sec_sqe3->data_src_addr = cpu_to_le64(c_req->c_in_dma); + sec_sqe3->data_src_addr = cpu_to_le64(req->in_dma); sec_sqe3->data_dst_addr = cpu_to_le64(c_req->c_out_dma); sec_sqe3->c_mode_alg = ((u8)c_ctx->c_alg << SEC_CALG_OFFSET_V3) | @@ -1309,7 +1309,7 @@ static int sec_skcipher_bd_fill_v3(struct sec_ctx *ctx, struct sec_req *req) } bd_param |= SEC_COMM_SCENE << SEC_SCENE_OFFSET_V3; - if (c_req->c_in_dma != c_req->c_out_dma) + if (req->in_dma != c_req->c_out_dma) bd_param |= 0x1 << SEC_DE_OFFSET_V3; bd_param |= SEC_BD_TYPE3; From 5cd259ca5d466f65ffd21e2e2fa00fb648a8c555 Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Fri, 4 Jun 2021 14:30:35 +0800 Subject: [PATCH 118/142] crypto: sm2 - fix a memory leak in sm2 SM2 module alloc ec->Q in sm2_set_pub_key(), when doing alg test in test_akcipher_one(), it will set public key for every test vector, and don't free ec->Q. This will cause a memory leak. This patch alloc ec->Q in sm2_ec_ctx_init(). Fixes: ea7ecb66440b ("crypto: sm2 - introduce OSCCA SM2 asymmetric cipher algorithm") Signed-off-by: Hongbo Li Reviewed-by: Tianjia Zhang Signed-off-by: Herbert Xu --- crypto/sm2.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/crypto/sm2.c b/crypto/sm2.c index b21addc3ac06a..db8a4a265669d 100644 --- a/crypto/sm2.c +++ b/crypto/sm2.c @@ -79,10 +79,17 @@ static int sm2_ec_ctx_init(struct mpi_ec_ctx *ec) goto free; rc = -ENOMEM; + + ec->Q = mpi_point_new(0); + if (!ec->Q) + goto free; + /* mpi_ec_setup_elliptic_curve */ ec->G = mpi_point_new(0); - if (!ec->G) + if (!ec->G) { + mpi_point_release(ec->Q); goto free; + } mpi_set(ec->G->x, x); mpi_set(ec->G->y, y); @@ -91,6 +98,7 @@ static int sm2_ec_ctx_init(struct mpi_ec_ctx *ec) rc = -EINVAL; ec->n = mpi_scanval(ecp->n); if (!ec->n) { + mpi_point_release(ec->Q); mpi_point_release(ec->G); goto free; } @@ -386,27 +394,15 @@ static int sm2_set_pub_key(struct crypto_akcipher *tfm, MPI a; int rc; - ec->Q = mpi_point_new(0); - if (!ec->Q) - return -ENOMEM; - /* include the uncompressed flag '0x04' */ - rc = -ENOMEM; a = mpi_read_raw_data(key, keylen); if (!a) - goto error; + return -ENOMEM; mpi_normalize(a); rc = sm2_ecc_os2ec(ec->Q, a); mpi_free(a); - if (rc) - goto error; - - return 0; -error: - mpi_point_release(ec->Q); - ec->Q = NULL; return rc; } From 0dc64297c8ac98503a7c7621b3c78e151deb75b6 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 5 Jun 2021 14:55:56 +0200 Subject: [PATCH 119/142] crypto: cavium/nitrox - Fix an error rhandling path in 'nitrox_probe()' If an error occurs after a successful 'ioremap()' call, it must be undone by a corresponding 'iounmap()' call, as already done in the remove function. Add a 'pf_sw_fail' label in the error handling path and add the missing 'iounmap()'. While at it, also add a 'flr_fail' label in the error handling path and use it to avoid some code duplication. Fixes: 14fa93cdcd9b ("crypto: cavium - Add support for CNN55XX adapters.") Signed-off-by: Christophe JAILLET Signed-off-by: Herbert Xu --- drivers/crypto/cavium/nitrox/nitrox_main.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/crypto/cavium/nitrox/nitrox_main.c b/drivers/crypto/cavium/nitrox/nitrox_main.c index 6af05df281a98..96bc7b5c6532d 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_main.c +++ b/drivers/crypto/cavium/nitrox/nitrox_main.c @@ -424,8 +424,7 @@ static int nitrox_probe(struct pci_dev *pdev, err = nitrox_device_flr(pdev); if (err) { dev_err(&pdev->dev, "FLR failed\n"); - pci_disable_device(pdev); - return err; + goto flr_fail; } if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) { @@ -434,17 +433,13 @@ static int nitrox_probe(struct pci_dev *pdev, err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (err) { dev_err(&pdev->dev, "DMA configuration failed\n"); - pci_disable_device(pdev); - return err; + goto flr_fail; } } err = pci_request_mem_regions(pdev, nitrox_driver_name); - if (err) { - pci_disable_device(pdev); - dev_err(&pdev->dev, "Failed to request mem regions!\n"); - return err; - } + if (err) + goto flr_fail; pci_set_master(pdev); ndev = kzalloc(sizeof(*ndev), GFP_KERNEL); @@ -480,7 +475,7 @@ static int nitrox_probe(struct pci_dev *pdev, err = nitrox_pf_sw_init(ndev); if (err) - goto ioremap_err; + goto pf_sw_fail; err = nitrox_pf_hw_init(ndev); if (err) @@ -510,12 +505,15 @@ static int nitrox_probe(struct pci_dev *pdev, smp_mb__after_atomic(); pf_hw_fail: nitrox_pf_sw_cleanup(ndev); +pf_sw_fail: + iounmap(ndev->bar_addr); ioremap_err: nitrox_remove_from_devlist(ndev); kfree(ndev); pci_set_drvdata(pdev, NULL); ndev_fail: pci_release_mem_regions(pdev); +flr_fail: pci_disable_device(pdev); return err; } From 3f52c9aef27b0427ff4091f3d08095219e1046af Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Tue, 8 Jun 2021 14:23:43 -0700 Subject: [PATCH 120/142] crypto: marvell/cesa - change FPGA indirect article to an Change use of 'a fpga' to 'an fpga' Signed-off-by: Tom Rix Signed-off-by: Herbert Xu --- drivers/crypto/marvell/cesa/cesa.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/marvell/cesa/cesa.h b/drivers/crypto/marvell/cesa/cesa.h index c1007f2ba79c8..d215a6bed6bc7 100644 --- a/drivers/crypto/marvell/cesa/cesa.h +++ b/drivers/crypto/marvell/cesa/cesa.h @@ -66,7 +66,7 @@ #define CESA_SA_ST_ACT_1 BIT(1) /* - * CESA_SA_FPGA_INT_STATUS looks like a FPGA leftover and is documented only + * CESA_SA_FPGA_INT_STATUS looks like an FPGA leftover and is documented only * in Errata 4.12. It looks like that it was part of an IRQ-controller in FPGA * and someone forgot to remove it while switching to the core and moving to * CESA_SA_INT_STATUS. From 87c8ba5cd7f99b1c05589c455703f54e92f43ed0 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Wed, 9 Jun 2021 15:15:26 +0800 Subject: [PATCH 121/142] crypto: ccp - Use list_move_tail instead of list_del/list_add_tail in ccp-dmaengine.c Using list_move_tail() instead of list_del() + list_add_tail() in ccp-dmaengine.c. Reported-by: Hulk Robot Signed-off-by: Baokun Li Signed-off-by: Herbert Xu --- drivers/crypto/ccp/ccp-dmaengine.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/crypto/ccp/ccp-dmaengine.c b/drivers/crypto/ccp/ccp-dmaengine.c index 0770a83bf1a57..d718db224be42 100644 --- a/drivers/crypto/ccp/ccp-dmaengine.c +++ b/drivers/crypto/ccp/ccp-dmaengine.c @@ -307,8 +307,7 @@ static dma_cookie_t ccp_tx_submit(struct dma_async_tx_descriptor *tx_desc) spin_lock_irqsave(&chan->lock, flags); cookie = dma_cookie_assign(tx_desc); - list_del(&desc->entry); - list_add_tail(&desc->entry, &chan->pending); + list_move_tail(&desc->entry, &chan->pending); spin_unlock_irqrestore(&chan->lock, flags); From 22ca9f4aaf431a9413dcc115dd590123307f274f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 10 Jun 2021 08:21:50 +0200 Subject: [PATCH 122/142] crypto: shash - avoid comparing pointers to exported functions under CFI crypto_shash_alg_has_setkey() is implemented by testing whether the .setkey() member of a struct shash_alg points to the default version, called shash_no_setkey(). As crypto_shash_alg_has_setkey() is a static inline, this requires shash_no_setkey() to be exported to modules. Unfortunately, when building with CFI, function pointers are routed via CFI stubs which are private to each module (or to the kernel proper) and so this function pointer comparison may fail spuriously. Let's fix this by turning crypto_shash_alg_has_setkey() into an out of line function. Cc: Sami Tolvanen Cc: Eric Biggers Signed-off-by: Ard Biesheuvel Reviewed-by: Eric Biggers Reviewed-by: Sami Tolvanen Signed-off-by: Herbert Xu --- crypto/shash.c | 18 +++++++++++++++--- include/crypto/internal/hash.h | 8 +------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/crypto/shash.c b/crypto/shash.c index 2e3433ad97629..0a0a50cb694f0 100644 --- a/crypto/shash.c +++ b/crypto/shash.c @@ -20,12 +20,24 @@ static const struct crypto_type crypto_shash_type; -int shash_no_setkey(struct crypto_shash *tfm, const u8 *key, - unsigned int keylen) +static int shash_no_setkey(struct crypto_shash *tfm, const u8 *key, + unsigned int keylen) { return -ENOSYS; } -EXPORT_SYMBOL_GPL(shash_no_setkey); + +/* + * Check whether an shash algorithm has a setkey function. + * + * For CFI compatibility, this must not be an inline function. This is because + * when CFI is enabled, modules won't get the same address for shash_no_setkey + * (if it were exported, which inlining would require) as the core kernel will. + */ +bool crypto_shash_alg_has_setkey(struct shash_alg *alg) +{ + return alg->setkey != shash_no_setkey; +} +EXPORT_SYMBOL_GPL(crypto_shash_alg_has_setkey); static int shash_setkey_unaligned(struct crypto_shash *tfm, const u8 *key, unsigned int keylen) diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index 0a288dddcf5be..25806141db591 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -75,13 +75,7 @@ void crypto_unregister_ahashes(struct ahash_alg *algs, int count); int ahash_register_instance(struct crypto_template *tmpl, struct ahash_instance *inst); -int shash_no_setkey(struct crypto_shash *tfm, const u8 *key, - unsigned int keylen); - -static inline bool crypto_shash_alg_has_setkey(struct shash_alg *alg) -{ - return alg->setkey != shash_no_setkey; -} +bool crypto_shash_alg_has_setkey(struct shash_alg *alg); static inline bool crypto_shash_alg_needs_key(struct shash_alg *alg) { From 10ff9976d06fc6a11f512755d500ab2860cbe650 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Fri, 11 Jun 2021 10:01:00 +0800 Subject: [PATCH 123/142] crypto: api - remove CRYPTOA_U32 and related functions According to the advice of Eric and Herbert, type CRYPTOA_U32 has been unused for over a decade, so remove the code related to CRYPTOA_U32. After removing CRYPTOA_U32, the type of the variable attrs can be changed from union to struct. Signed-off-by: Liu Shixin Reviewed-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/algapi.c | 18 ------------------ crypto/algboss.c | 31 ++++++------------------------- include/crypto/algapi.h | 1 - include/linux/crypto.h | 5 ----- 4 files changed, 6 insertions(+), 49 deletions(-) diff --git a/crypto/algapi.c b/crypto/algapi.c index fdabf2675b63f..43f999dba4dc0 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -868,24 +868,6 @@ const char *crypto_attr_alg_name(struct rtattr *rta) } EXPORT_SYMBOL_GPL(crypto_attr_alg_name); -int crypto_attr_u32(struct rtattr *rta, u32 *num) -{ - struct crypto_attr_u32 *nu32; - - if (!rta) - return -ENOENT; - if (RTA_PAYLOAD(rta) < sizeof(*nu32)) - return -EINVAL; - if (rta->rta_type != CRYPTOA_U32) - return -EINVAL; - - nu32 = RTA_DATA(rta); - *num = nu32->num; - - return 0; -} -EXPORT_SYMBOL_GPL(crypto_attr_u32); - int crypto_inst_setname(struct crypto_instance *inst, const char *name, struct crypto_alg *alg) { diff --git a/crypto/algboss.c b/crypto/algboss.c index 5ebccbd6b74ed..1814d2c5188a3 100644 --- a/crypto/algboss.c +++ b/crypto/algboss.c @@ -28,16 +28,9 @@ struct cryptomgr_param { struct crypto_attr_type data; } type; - union { + struct { struct rtattr attr; - struct { - struct rtattr attr; - struct crypto_attr_alg data; - } alg; - struct { - struct rtattr attr; - struct crypto_attr_u32 data; - } nu32; + struct crypto_attr_alg data; } attrs[CRYPTO_MAX_ATTRS]; char template[CRYPTO_MAX_ALG_NAME]; @@ -104,12 +97,10 @@ static int cryptomgr_schedule_probe(struct crypto_larval *larval) i = 0; for (;;) { - int notnum = 0; - name = ++p; for (; isalnum(*p) || *p == '-' || *p == '_'; p++) - notnum |= !isdigit(*p); + ; if (*p == '(') { int recursion = 0; @@ -123,7 +114,6 @@ static int cryptomgr_schedule_probe(struct crypto_larval *larval) break; } - notnum = 1; p++; } @@ -131,18 +121,9 @@ static int cryptomgr_schedule_probe(struct crypto_larval *larval) if (!len) goto err_free_param; - if (notnum) { - param->attrs[i].alg.attr.rta_len = - sizeof(param->attrs[i].alg); - param->attrs[i].alg.attr.rta_type = CRYPTOA_ALG; - memcpy(param->attrs[i].alg.data.name, name, len); - } else { - param->attrs[i].nu32.attr.rta_len = - sizeof(param->attrs[i].nu32); - param->attrs[i].nu32.attr.rta_type = CRYPTOA_U32; - param->attrs[i].nu32.data.num = - simple_strtol(name, NULL, 0); - } + param->attrs[i].attr.rta_len = sizeof(param->attrs[i]); + param->attrs[i].attr.rta_type = CRYPTOA_ALG; + memcpy(param->attrs[i].data.name, name, len); param->tb[i + 1] = ¶m->attrs[i].attr; i++; diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index 86f0748009af3..41d42e649da4f 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -118,7 +118,6 @@ void *crypto_spawn_tfm2(struct crypto_spawn *spawn); struct crypto_attr_type *crypto_get_attr_type(struct rtattr **tb); int crypto_check_attr_type(struct rtattr **tb, u32 type, u32 *mask_ret); const char *crypto_attr_alg_name(struct rtattr *rta); -int crypto_attr_u32(struct rtattr *rta, u32 *num); int crypto_inst_setname(struct crypto_instance *inst, const char *name, struct crypto_alg *alg); diff --git a/include/linux/crypto.h b/include/linux/crypto.h index da5e0d74bb2f4..3b9263d6122fd 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -647,7 +647,6 @@ enum { CRYPTOA_UNSPEC, CRYPTOA_ALG, CRYPTOA_TYPE, - CRYPTOA_U32, __CRYPTOA_MAX, }; @@ -665,10 +664,6 @@ struct crypto_attr_type { u32 mask; }; -struct crypto_attr_u32 { - u32 num; -}; - /* * Transform user interface. */ From 72b010dc33b9598883bc84d40b0a9d07c16f5e39 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Fri, 11 Jun 2021 17:06:43 +0800 Subject: [PATCH 124/142] crypto: hisilicon/qm - supports writing QoS int the host Based on the Token bucket algorithm. The HAC driver supports to configure each function's QoS in the host. The driver supports writing QoS by the debugfs node that named "alg_qos". The qos value is 1~1000. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 298 +++++++++++++++++++++++++++++++++- drivers/crypto/hisilicon/qm.h | 14 ++ 2 files changed, 310 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index efa14c9ee9763..2dd450fdc01a4 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -240,6 +240,32 @@ #define QM_DRIVER_REMOVING 0 #define QM_RST_SCHED 1 #define QM_RESETTING 2 +#define QM_QOS_PARAM_NUM 2 +#define QM_QOS_VAL_NUM 1 +#define QM_QOS_BDF_PARAM_NUM 4 +#define QM_QOS_MAX_VAL 1000 +#define QM_QOS_RATE 100 +#define QM_QOS_EXPAND_RATE 1000 +#define QM_SHAPER_CIR_B_MASK GENMASK(7, 0) +#define QM_SHAPER_CIR_U_MASK GENMASK(10, 8) +#define QM_SHAPER_CIR_S_MASK GENMASK(14, 11) +#define QM_SHAPER_FACTOR_CIR_U_SHIFT 8 +#define QM_SHAPER_FACTOR_CIR_S_SHIFT 11 +#define QM_SHAPER_FACTOR_CBS_B_SHIFT 15 +#define QM_SHAPER_FACTOR_CBS_S_SHIFT 19 +#define QM_SHAPER_CBS_B 1 +#define QM_SHAPER_CBS_S 16 +#define QM_SHAPER_VFT_OFFSET 6 +#define QM_QOS_MIN_ERROR_RATE 5 +#define QM_QOS_TYPICAL_NUM 8 +#define QM_SHAPER_MIN_CBS_S 8 +#define QM_QOS_TICK 0x300U +#define QM_QOS_DIVISOR_CLK 0x1f40U +#define QM_QOS_MAX_CIR_B 200 +#define QM_QOS_MIN_CIR_B 100 +#define QM_QOS_MAX_CIR_U 6 +#define QM_QOS_MAX_CIR_S 11 +#define QM_QOS_VAL_MAX_LEN 32 #define QM_MK_CQC_DW3_V1(hop_num, pg_sz, buf_sz, cqe_sz) \ (((hop_num) << QM_CQ_HOP_NUM_SHIFT) | \ @@ -280,6 +306,7 @@ enum vft_type { SQC_VFT = 0, CQC_VFT, + SHAPER_VFT, }; enum acc_err_result { @@ -288,6 +315,11 @@ enum acc_err_result { ACC_ERR_RECOVERED, }; +enum qm_alg_type { + ALG_TYPE_0, + ALG_TYPE_1, +}; + enum qm_mb_cmd { QM_PF_FLR_PREPARE = 0x01, QM_PF_SRST_PREPARE, @@ -460,6 +492,11 @@ static const char * const qp_s[] = { "none", "init", "start", "stop", "close", }; +static const u32 typical_qos_val[QM_QOS_TYPICAL_NUM] = {100, 250, 500, 1000, + 10000, 25000, 50000, 100000}; +static const u32 typical_qos_cbs_s[QM_QOS_TYPICAL_NUM] = {9, 10, 11, 12, 16, + 17, 18, 19}; + static bool qm_avail_state(struct hisi_qm *qm, enum qm_state new) { enum qm_state curr = atomic_read(&qm->status.flags); @@ -899,8 +936,69 @@ static void qm_init_prefetch(struct hisi_qm *qm) writel(page_type, qm->io_base + QM_PAGE_SIZE); } +/* + * the formula: + * IR = X Mbps if ir = 1 means IR = 100 Mbps, if ir = 10000 means = 10Gbps + * + * IR_b * (2 ^ IR_u) * 8 + * IR(Mbps) * 10 ^ -3 = ------------------------- + * Tick * (2 ^ IR_s) + */ +static u32 acc_shaper_para_calc(u64 cir_b, u64 cir_u, u64 cir_s) +{ + return ((cir_b * QM_QOS_DIVISOR_CLK) * (1 << cir_u)) / + (QM_QOS_TICK * (1 << cir_s)); +} + +static u32 acc_shaper_calc_cbs_s(u32 ir) +{ + int i; + + if (ir < typical_qos_val[0]) + return QM_SHAPER_MIN_CBS_S; + + for (i = 1; i < QM_QOS_TYPICAL_NUM; i++) { + if (ir >= typical_qos_val[i - 1] && ir < typical_qos_val[i]) + return typical_qos_cbs_s[i - 1]; + } + + return typical_qos_cbs_s[QM_QOS_TYPICAL_NUM - 1]; +} + +static int qm_get_shaper_para(u32 ir, struct qm_shaper_factor *factor) +{ + u32 cir_b, cir_u, cir_s, ir_calc; + u32 error_rate; + + factor->cbs_s = acc_shaper_calc_cbs_s(ir); + + for (cir_b = QM_QOS_MIN_CIR_B; cir_b <= QM_QOS_MAX_CIR_B; cir_b++) { + for (cir_u = 0; cir_u <= QM_QOS_MAX_CIR_U; cir_u++) { + for (cir_s = 0; cir_s <= QM_QOS_MAX_CIR_S; cir_s++) { + /** the formula is changed to: + * IR_b * (2 ^ IR_u) * DIVISOR_CLK + * IR(Mbps) = ------------------------- + * 768 * (2 ^ IR_s) + */ + ir_calc = acc_shaper_para_calc(cir_b, cir_u, + cir_s); + error_rate = QM_QOS_EXPAND_RATE * (u32)abs(ir_calc - ir) / ir; + if (error_rate <= QM_QOS_MIN_ERROR_RATE) { + factor->cir_b = cir_b; + factor->cir_u = cir_u; + factor->cir_s = cir_s; + + return 0; + } + } + } + } + + return -EINVAL; +} + static void qm_vft_data_cfg(struct hisi_qm *qm, enum vft_type type, u32 base, - u32 number) + u32 number, struct qm_shaper_factor *factor) { u64 tmp = 0; @@ -929,6 +1027,15 @@ static void qm_vft_data_cfg(struct hisi_qm *qm, enum vft_type type, u32 base, tmp = QM_CQC_VFT_VALID; } break; + case SHAPER_VFT: + if (qm->ver >= QM_HW_V3) { + tmp = factor->cir_b | + (factor->cir_u << QM_SHAPER_FACTOR_CIR_U_SHIFT) | + (factor->cir_s << QM_SHAPER_FACTOR_CIR_S_SHIFT) | + (QM_SHAPER_CBS_B << QM_SHAPER_FACTOR_CBS_B_SHIFT) | + (factor->cbs_s << QM_SHAPER_FACTOR_CBS_S_SHIFT); + } + break; } } @@ -939,6 +1046,7 @@ static void qm_vft_data_cfg(struct hisi_qm *qm, enum vft_type type, u32 base, static int qm_set_vft_common(struct hisi_qm *qm, enum vft_type type, u32 fun_num, u32 base, u32 number) { + struct qm_shaper_factor *factor = &qm->factor[fun_num]; unsigned int val; int ret; @@ -950,9 +1058,12 @@ static int qm_set_vft_common(struct hisi_qm *qm, enum vft_type type, writel(0x0, qm->io_base + QM_VFT_CFG_OP_WR); writel(type, qm->io_base + QM_VFT_CFG_TYPE); + if (type == SHAPER_VFT) + fun_num |= base << QM_SHAPER_VFT_OFFSET; + writel(fun_num, qm->io_base + QM_VFT_CFG); - qm_vft_data_cfg(qm, type, base, number); + qm_vft_data_cfg(qm, type, base, number, factor); writel(0x0, qm->io_base + QM_VFT_CFG_RDY); writel(0x1, qm->io_base + QM_VFT_CFG_OP_ENABLE); @@ -962,6 +1073,27 @@ static int qm_set_vft_common(struct hisi_qm *qm, enum vft_type type, POLL_TIMEOUT); } +static int qm_shaper_init_vft(struct hisi_qm *qm, u32 fun_num) +{ + int ret, i; + + qm->factor[fun_num].func_qos = QM_QOS_MAX_VAL; + ret = qm_get_shaper_para(QM_QOS_MAX_VAL * QM_QOS_RATE, &qm->factor[fun_num]); + if (ret) { + dev_err(&qm->pdev->dev, "failed to calculate shaper parameter!\n"); + return ret; + } + writel(qm->type_rate, qm->io_base + QM_SHAPER_CFG); + for (i = ALG_TYPE_0; i <= ALG_TYPE_1; i++) { + /* The base number of queue reuse for different alg type */ + ret = qm_set_vft_common(qm, SHAPER_VFT, fun_num, i, 1); + if (ret) + return ret; + } + + return 0; +} + /* The config should be conducted after qm_dev_mem_reset() */ static int qm_set_sqc_cqc_vft(struct hisi_qm *qm, u32 fun_num, u32 base, u32 number) @@ -974,7 +1106,21 @@ static int qm_set_sqc_cqc_vft(struct hisi_qm *qm, u32 fun_num, u32 base, return ret; } + /* init default shaper qos val */ + if (qm->ver >= QM_HW_V3) { + ret = qm_shaper_init_vft(qm, fun_num); + if (ret) + goto back_sqc_cqc; + } + return 0; +back_sqc_cqc: + for (i = SQC_VFT; i <= CQC_VFT; i++) { + ret = qm_set_vft_common(qm, i, fun_num, 0, 0); + if (ret) + return ret; + } + return ret; } static int qm_get_vft_v2(struct hisi_qm *qm, u32 *base, u32 *number) @@ -3113,6 +3259,7 @@ void hisi_qm_uninit(struct hisi_qm *qm) struct device *dev = &pdev->dev; qm_cmd_uninit(qm); + kfree(qm->factor); down_write(&qm->qps_lock); if (!qm_avail_state(qm, QM_CLOSE)) { @@ -3842,6 +3989,149 @@ static int qm_clear_vft_config(struct hisi_qm *qm) return 0; } +static int qm_func_shaper_enable(struct hisi_qm *qm, u32 fun_index, u32 qos) +{ + struct device *dev = &qm->pdev->dev; + u32 ir = qos * QM_QOS_RATE; + int ret, total_vfs, i; + + total_vfs = pci_sriov_get_totalvfs(qm->pdev); + if (fun_index > total_vfs) + return -EINVAL; + + qm->factor[fun_index].func_qos = qos; + + ret = qm_get_shaper_para(ir, &qm->factor[fun_index]); + if (ret) { + dev_err(dev, "failed to calculate shaper parameter!\n"); + return -EINVAL; + } + + for (i = ALG_TYPE_0; i <= ALG_TYPE_1; i++) { + /* The base number of queue reuse for different alg type */ + ret = qm_set_vft_common(qm, SHAPER_VFT, fun_index, i, 1); + if (ret) { + dev_err(dev, "type: %d, failed to set shaper vft!\n", i); + return -EINVAL; + } + } + + return 0; +} + +static ssize_t qm_qos_value_init(const char *buf, unsigned long *val) +{ + int buflen = strlen(buf); + int ret, i; + + for (i = 0; i < buflen; i++) { + if (!isdigit(buf[i])) + return -EINVAL; + } + + ret = sscanf(buf, "%ld", val); + if (ret != QM_QOS_VAL_NUM) + return -EINVAL; + + return 0; +} + +static ssize_t qm_algqos_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct hisi_qm *qm = filp->private_data; + char tbuf[QM_DBG_READ_LEN]; + int tmp1, bus, device, function; + char tbuf_bdf[QM_DBG_READ_LEN] = {0}; + char val_buf[QM_QOS_VAL_MAX_LEN] = {0}; + unsigned int fun_index; + unsigned long val = 0; + int len, ret; + + if (qm->fun_type == QM_HW_VF) + return -EINVAL; + + /* Mailbox and reset cannot be operated at the same time */ + if (test_and_set_bit(QM_RESETTING, &qm->misc_ctl)) { + pci_err(qm->pdev, "dev resetting, write alg qos failed!\n"); + return -EAGAIN; + } + + if (*pos != 0) { + ret = 0; + goto err_get_status; + } + + if (count >= QM_DBG_READ_LEN) { + ret = -ENOSPC; + goto err_get_status; + } + + len = simple_write_to_buffer(tbuf, QM_DBG_READ_LEN - 1, pos, buf, count); + if (len < 0) { + ret = len; + goto err_get_status; + } + + tbuf[len] = '\0'; + ret = sscanf(tbuf, "%s %s", tbuf_bdf, val_buf); + if (ret != QM_QOS_PARAM_NUM) { + ret = -EINVAL; + goto err_get_status; + } + + ret = qm_qos_value_init(val_buf, &val); + if (val == 0 || val > QM_QOS_MAX_VAL || ret) { + pci_err(qm->pdev, "input qos value is error, please set 1~1000!\n"); + ret = -EINVAL; + goto err_get_status; + } + + ret = sscanf(tbuf_bdf, "%d:%x:%d.%d", &tmp1, &bus, &device, &function); + if (ret != QM_QOS_BDF_PARAM_NUM) { + pci_err(qm->pdev, "input pci bdf value is error!\n"); + ret = -EINVAL; + goto err_get_status; + } + + fun_index = device * 8 + function; + + ret = qm_func_shaper_enable(qm, fun_index, val); + if (ret) { + pci_err(qm->pdev, "failed to enable function shaper!\n"); + ret = -EINVAL; + goto err_get_status; + } + + ret = count; + +err_get_status: + clear_bit(QM_RESETTING, &qm->misc_ctl); + return ret; +} + +static const struct file_operations qm_algqos_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = qm_algqos_write, +}; + +/** + * hisi_qm_set_algqos_init() - Initialize function qos debugfs files. + * @qm: The qm for which we want to add debugfs files. + * + * Create function qos debugfs files. + */ +static void hisi_qm_set_algqos_init(struct hisi_qm *qm) +{ + if (qm->fun_type == QM_HW_PF) + debugfs_create_file("alg_qos", 0644, qm->debug.debug_root, + qm, &qm_algqos_fops); + else + debugfs_create_file("alg_qos", 0444, qm->debug.debug_root, + qm, &qm_algqos_fops); +} + /** * hisi_qm_sriov_enable() - enable virtual functions * @pdev: the PCIe device @@ -3896,6 +4186,7 @@ EXPORT_SYMBOL_GPL(hisi_qm_sriov_enable); int hisi_qm_sriov_disable(struct pci_dev *pdev, bool is_frozen) { struct hisi_qm *qm = pci_get_drvdata(pdev); + int total_vfs = pci_sriov_get_totalvfs(qm->pdev); if (pci_vfs_assigned(pdev)) { pci_err(pdev, "Failed to disable VFs as VFs are assigned!\n"); @@ -3909,6 +4200,9 @@ int hisi_qm_sriov_disable(struct pci_dev *pdev, bool is_frozen) } pci_disable_sriov(pdev); + /* clear vf function shaper configure array */ + memset(qm->factor + 1, 0, sizeof(struct qm_shaper_factor) * total_vfs); + return qm_clear_vft_config(qm); } EXPORT_SYMBOL_GPL(hisi_qm_sriov_disable); diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h index 8a36bade103d8..035eaf8c442dd 100644 --- a/drivers/crypto/hisilicon/qm.h +++ b/drivers/crypto/hisilicon/qm.h @@ -76,6 +76,9 @@ #define QM_Q_DEPTH 1024 #define QM_MIN_QNUM 2 #define HISI_ACC_SGL_SGE_NR_MAX 255 +#define QM_SHAPER_CFG 0x100164 +#define QM_SHAPER_ENABLE BIT(30) +#define QM_SHAPER_TYPE1_OFFSET 10 /* page number for queue file region */ #define QM_DOORBELL_PAGE_NR 1 @@ -148,6 +151,14 @@ struct qm_debug { struct debugfs_file files[DEBUG_FILE_NUM]; }; +struct qm_shaper_factor { + u32 func_qos; + u64 cir_b; + u64 cir_u; + u64 cir_s; + u64 cbs_s; +}; + struct qm_dma { void *va; dma_addr_t dma; @@ -262,6 +273,9 @@ struct hisi_qm { resource_size_t db_phys_base; struct uacce_device *uacce; int mode; + struct qm_shaper_factor *factor; + u32 mb_qos; + u32 type_rate; }; struct hisi_qp_status { From cc0c40c613d2c7a00f3bce4770a925dc56672f01 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Fri, 11 Jun 2021 17:06:44 +0800 Subject: [PATCH 125/142] crypto: hisilicon/qm - add the "alg_qos" file node 1. Just move the code as needed. 2. Add the "alg_qos" file node in the qm debug sysfs. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 284 +++++++++++++++++++--------------- 1 file changed, 155 insertions(+), 129 deletions(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 2dd450fdc01a4..4350b67968217 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -3125,62 +3125,6 @@ static int hisi_qp_memory_init(struct hisi_qm *qm, size_t dma_size, int id) return 0; } -static int hisi_qm_memory_init(struct hisi_qm *qm) -{ - struct device *dev = &qm->pdev->dev; - size_t qp_dma_size, off = 0; - int i, ret = 0; - -#define QM_INIT_BUF(qm, type, num) do { \ - (qm)->type = ((qm)->qdma.va + (off)); \ - (qm)->type##_dma = (qm)->qdma.dma + (off); \ - off += QMC_ALIGN(sizeof(struct qm_##type) * (num)); \ -} while (0) - - idr_init(&qm->qp_idr); - qm->qdma.size = QMC_ALIGN(sizeof(struct qm_eqe) * QM_EQ_DEPTH) + - QMC_ALIGN(sizeof(struct qm_aeqe) * QM_Q_DEPTH) + - QMC_ALIGN(sizeof(struct qm_sqc) * qm->qp_num) + - QMC_ALIGN(sizeof(struct qm_cqc) * qm->qp_num); - qm->qdma.va = dma_alloc_coherent(dev, qm->qdma.size, &qm->qdma.dma, - GFP_ATOMIC); - dev_dbg(dev, "allocate qm dma buf size=%zx)\n", qm->qdma.size); - if (!qm->qdma.va) - return -ENOMEM; - - QM_INIT_BUF(qm, eqe, QM_EQ_DEPTH); - QM_INIT_BUF(qm, aeqe, QM_Q_DEPTH); - QM_INIT_BUF(qm, sqc, qm->qp_num); - QM_INIT_BUF(qm, cqc, qm->qp_num); - - qm->qp_array = kcalloc(qm->qp_num, sizeof(struct hisi_qp), GFP_KERNEL); - if (!qm->qp_array) { - ret = -ENOMEM; - goto err_alloc_qp_array; - } - - /* one more page for device or qp statuses */ - qp_dma_size = qm->sqe_size * QM_Q_DEPTH + - sizeof(struct qm_cqe) * QM_Q_DEPTH; - qp_dma_size = PAGE_ALIGN(qp_dma_size); - for (i = 0; i < qm->qp_num; i++) { - ret = hisi_qp_memory_init(qm, qp_dma_size, i); - if (ret) - goto err_init_qp_mem; - - dev_dbg(dev, "allocate qp dma buf size=%zx)\n", qp_dma_size); - } - - return ret; - -err_init_qp_mem: - hisi_qp_memory_uninit(qm, i); -err_alloc_qp_array: - dma_free_coherent(dev, qm->qdma.size, qm->qdma.va, qm->qdma.dma); - - return ret; -} - static void hisi_qm_pre_init(struct hisi_qm *qm) { struct pci_dev *pdev = qm->pdev; @@ -3661,79 +3605,6 @@ static int qm_debugfs_atomic64_get(void *data, u64 *val) DEFINE_DEBUGFS_ATTRIBUTE(qm_atomic64_ops, qm_debugfs_atomic64_get, qm_debugfs_atomic64_set, "%llu\n"); -/** - * hisi_qm_debug_init() - Initialize qm related debugfs files. - * @qm: The qm for which we want to add debugfs files. - * - * Create qm related debugfs files. - */ -void hisi_qm_debug_init(struct hisi_qm *qm) -{ - struct qm_dfx *dfx = &qm->debug.dfx; - struct dentry *qm_d; - void *data; - int i; - - qm_d = debugfs_create_dir("qm", qm->debug.debug_root); - qm->debug.qm_d = qm_d; - - /* only show this in PF */ - if (qm->fun_type == QM_HW_PF) { - qm_create_debugfs_file(qm, qm->debug.debug_root, CURRENT_QM); - for (i = CURRENT_Q; i < DEBUG_FILE_NUM; i++) - qm_create_debugfs_file(qm, qm_d, i); - } - - debugfs_create_file("regs", 0444, qm->debug.qm_d, qm, &qm_regs_fops); - - debugfs_create_file("cmd", 0444, qm->debug.qm_d, qm, &qm_cmd_fops); - - debugfs_create_file("status", 0444, qm->debug.qm_d, qm, - &qm_status_fops); - for (i = 0; i < ARRAY_SIZE(qm_dfx_files); i++) { - data = (atomic64_t *)((uintptr_t)dfx + qm_dfx_files[i].offset); - debugfs_create_file(qm_dfx_files[i].name, - 0644, - qm_d, - data, - &qm_atomic64_ops); - } -} -EXPORT_SYMBOL_GPL(hisi_qm_debug_init); - -/** - * hisi_qm_debug_regs_clear() - clear qm debug related registers. - * @qm: The qm for which we want to clear its debug registers. - */ -void hisi_qm_debug_regs_clear(struct hisi_qm *qm) -{ - struct qm_dfx_registers *regs; - int i; - - /* clear current_qm */ - writel(0x0, qm->io_base + QM_DFX_MB_CNT_VF); - writel(0x0, qm->io_base + QM_DFX_DB_CNT_VF); - - /* clear current_q */ - writel(0x0, qm->io_base + QM_DFX_SQE_CNT_VF_SQN); - writel(0x0, qm->io_base + QM_DFX_CQE_CNT_VF_CQN); - - /* - * these registers are reading and clearing, so clear them after - * reading them. - */ - writel(0x1, qm->io_base + QM_DFX_CNT_CLR_CE); - - regs = qm_dfx_regs; - for (i = 0; i < CNT_CYC_REGS_NUM; i++) { - readl(qm->io_base + regs->reg_offset); - regs++; - } - - writel(0x0, qm->io_base + QM_DFX_CNT_CLR_CE); -} -EXPORT_SYMBOL_GPL(hisi_qm_debug_regs_clear); - static void qm_hw_error_init(struct hisi_qm *qm) { struct hisi_qm_err_info *err_info = &qm->err_info; @@ -4132,6 +4003,83 @@ static void hisi_qm_set_algqos_init(struct hisi_qm *qm) qm, &qm_algqos_fops); } +/** + * hisi_qm_debug_init() - Initialize qm related debugfs files. + * @qm: The qm for which we want to add debugfs files. + * + * Create qm related debugfs files. + */ +void hisi_qm_debug_init(struct hisi_qm *qm) +{ + struct qm_dfx *dfx = &qm->debug.dfx; + struct dentry *qm_d; + void *data; + int i; + + qm_d = debugfs_create_dir("qm", qm->debug.debug_root); + qm->debug.qm_d = qm_d; + + /* only show this in PF */ + if (qm->fun_type == QM_HW_PF) { + qm_create_debugfs_file(qm, qm->debug.debug_root, CURRENT_QM); + for (i = CURRENT_Q; i < DEBUG_FILE_NUM; i++) + qm_create_debugfs_file(qm, qm->debug.qm_d, i); + } + + debugfs_create_file("regs", 0444, qm->debug.qm_d, qm, &qm_regs_fops); + + debugfs_create_file("cmd", 0600, qm->debug.qm_d, qm, &qm_cmd_fops); + + debugfs_create_file("status", 0444, qm->debug.qm_d, qm, + &qm_status_fops); + for (i = 0; i < ARRAY_SIZE(qm_dfx_files); i++) { + data = (atomic64_t *)((uintptr_t)dfx + qm_dfx_files[i].offset); + debugfs_create_file(qm_dfx_files[i].name, + 0644, + qm_d, + data, + &qm_atomic64_ops); + } + + if (qm->ver >= QM_HW_V3) + hisi_qm_set_algqos_init(qm); +} +EXPORT_SYMBOL_GPL(hisi_qm_debug_init); + +/** + * hisi_qm_debug_regs_clear() - clear qm debug related registers. + * @qm: The qm for which we want to clear its debug registers. + */ +void hisi_qm_debug_regs_clear(struct hisi_qm *qm) +{ + struct qm_dfx_registers *regs; + int i; + + /* clear current_qm */ + writel(0x0, qm->io_base + QM_DFX_MB_CNT_VF); + writel(0x0, qm->io_base + QM_DFX_DB_CNT_VF); + + /* clear current_q */ + writel(0x0, qm->io_base + QM_DFX_SQE_CNT_VF_SQN); + writel(0x0, qm->io_base + QM_DFX_CQE_CNT_VF_CQN); + + /* + * these registers are reading and clearing, so clear them after + * reading them. + */ + writel(0x1, qm->io_base + QM_DFX_CNT_CLR_CE); + + regs = qm_dfx_regs; + for (i = 0; i < CNT_CYC_REGS_NUM; i++) { + readl(qm->io_base + regs->reg_offset); + regs++; + } + + /* clear clear_enable */ + writel(0x0, qm->io_base + QM_DFX_CNT_CLR_CE); +} +EXPORT_SYMBOL_GPL(hisi_qm_debug_regs_clear); + /** * hisi_qm_sriov_enable() - enable virtual functions * @pdev: the PCIe device @@ -5369,6 +5317,84 @@ static int hisi_qm_pci_init(struct hisi_qm *qm) return ret; } +static int hisi_qp_alloc_memory(struct hisi_qm *qm) +{ + struct device *dev = &qm->pdev->dev; + size_t qp_dma_size; + int i, ret; + + qm->qp_array = kcalloc(qm->qp_num, sizeof(struct hisi_qp), GFP_KERNEL); + if (!qm->qp_array) + return -ENOMEM; + + /* one more page for device or qp statuses */ + qp_dma_size = qm->sqe_size * QM_Q_DEPTH + + sizeof(struct qm_cqe) * QM_Q_DEPTH; + qp_dma_size = PAGE_ALIGN(qp_dma_size) + PAGE_SIZE; + for (i = 0; i < qm->qp_num; i++) { + ret = hisi_qp_memory_init(qm, qp_dma_size, i); + if (ret) + goto err_init_qp_mem; + + dev_dbg(dev, "allocate qp dma buf size=%zx)\n", qp_dma_size); + } + + return 0; +err_init_qp_mem: + hisi_qp_memory_uninit(qm, i); + + return ret; +} + +static int hisi_qm_memory_init(struct hisi_qm *qm) +{ + struct device *dev = &qm->pdev->dev; + int ret, total_vfs; + size_t off = 0; + + total_vfs = pci_sriov_get_totalvfs(qm->pdev); + qm->factor = kcalloc(total_vfs + 1, sizeof(struct qm_shaper_factor), GFP_KERNEL); + if (!qm->factor) + return -ENOMEM; + +#define QM_INIT_BUF(qm, type, num) do { \ + (qm)->type = ((qm)->qdma.va + (off)); \ + (qm)->type##_dma = (qm)->qdma.dma + (off); \ + off += QMC_ALIGN(sizeof(struct qm_##type) * (num)); \ +} while (0) + + idr_init(&qm->qp_idr); + qm->qdma.size = QMC_ALIGN(sizeof(struct qm_eqe) * QM_EQ_DEPTH) + + QMC_ALIGN(sizeof(struct qm_aeqe) * QM_Q_DEPTH) + + QMC_ALIGN(sizeof(struct qm_sqc) * qm->qp_num) + + QMC_ALIGN(sizeof(struct qm_cqc) * qm->qp_num); + qm->qdma.va = dma_alloc_coherent(dev, qm->qdma.size, &qm->qdma.dma, + GFP_ATOMIC); + dev_dbg(dev, "allocate qm dma buf size=%zx)\n", qm->qdma.size); + if (!qm->qdma.va) { + ret = -ENOMEM; + goto err_alloc_qdma; + } + + QM_INIT_BUF(qm, eqe, QM_EQ_DEPTH); + QM_INIT_BUF(qm, aeqe, QM_Q_DEPTH); + QM_INIT_BUF(qm, sqc, qm->qp_num); + QM_INIT_BUF(qm, cqc, qm->qp_num); + + ret = hisi_qp_alloc_memory(qm); + if (ret) + goto err_alloc_qp_array; + + return 0; + +err_alloc_qp_array: + dma_free_coherent(dev, qm->qdma.size, qm->qdma.va, qm->qdma.dma); +err_alloc_qdma: + kfree(qm->factor); + + return ret; +} + /** * hisi_qm_init() - Initialize configures about qm. * @qm: The qm needing init. From 362c50bad3a792969f8142372a0813aadee89a61 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Fri, 11 Jun 2021 17:06:45 +0800 Subject: [PATCH 126/142] crypto: hisilicon/qm - merges the work initialization process into a single function Merges the work initialization process into a single function from qm initialization. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 4350b67968217..754ddb5dec219 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -5317,6 +5317,16 @@ static int hisi_qm_pci_init(struct hisi_qm *qm) return ret; } +static void hisi_qm_init_work(struct hisi_qm *qm) +{ + INIT_WORK(&qm->work, qm_work_process); + if (qm->fun_type == QM_HW_PF) + INIT_WORK(&qm->rst_work, hisi_qm_controller_reset); + + if (qm->ver > QM_HW_V2) + INIT_WORK(&qm->cmd_process, qm_cmd_process); +} + static int hisi_qp_alloc_memory(struct hisi_qm *qm) { struct device *dev = &qm->pdev->dev; @@ -5432,13 +5442,7 @@ int hisi_qm_init(struct hisi_qm *qm) if (ret) goto err_alloc_uacce; - INIT_WORK(&qm->work, qm_work_process); - if (qm->fun_type == QM_HW_PF) - INIT_WORK(&qm->rst_work, hisi_qm_controller_reset); - - if (qm->ver >= QM_HW_V3) - INIT_WORK(&qm->cmd_process, qm_cmd_process); - + hisi_qm_init_work(qm); qm_cmd_init(qm); atomic_set(&qm->status.flags, QM_INIT); From 2966d9d3078c623f48054ef1bfe9a975e5d1fe0c Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Fri, 11 Jun 2021 17:06:46 +0800 Subject: [PATCH 127/142] crypto: hisilicon/qm - add pf ping single vf function According to the function communication, add pf ping single vf function to be used in the vf read QoS. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 36 +++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 754ddb5dec219..735c8b07b1e9e 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -2142,6 +2142,42 @@ static void qm_trigger_pf_interrupt(struct hisi_qm *qm) writel(val, qm->io_base + QM_IFC_INT_SET_V); } +static int qm_ping_single_vf(struct hisi_qm *qm, u64 cmd, u32 fun_num) +{ + struct device *dev = &qm->pdev->dev; + struct qm_mailbox mailbox; + int cnt = 0; + u64 val; + int ret; + + qm_mb_pre_init(&mailbox, QM_MB_CMD_SRC, cmd, fun_num, 0); + mutex_lock(&qm->mailbox_lock); + ret = qm_mb_nolock(qm, &mailbox); + if (ret) { + dev_err(dev, "failed to send command to vf(%u)!\n", fun_num); + goto err_unlock; + } + + qm_trigger_vf_interrupt(qm, fun_num); + while (true) { + msleep(QM_WAIT_DST_ACK); + val = readq(qm->io_base + QM_IFC_READY_STATUS); + /* if VF respond, PF notifies VF successfully. */ + if (!(val & BIT(fun_num))) + goto err_unlock; + + if (++cnt > QM_MAX_PF_WAIT_COUNT) { + dev_err(dev, "failed to get response from VF(%u)!\n", fun_num); + ret = -ETIMEDOUT; + break; + } + } + +err_unlock: + mutex_unlock(&qm->mailbox_lock); + return ret; +} + static int qm_ping_all_vfs(struct hisi_qm *qm, u64 cmd) { struct device *dev = &qm->pdev->dev; From 3bbf0783636be8fd672907df25904288f14566f2 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Fri, 11 Jun 2021 17:06:47 +0800 Subject: [PATCH 128/142] crypto: hisilicon/qm - supports to inquiry each function's QoS 1. The ACC driver supports to inquiry each function's QoS in the Host and VM. The driver supports reading QoS by the device debug SysFS attribute file "alg_qos", like "cat alg_qos". 2. Modify the communication process between pf and vf as needed. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 181 ++++++++++++++++++++++++++++++++-- 1 file changed, 174 insertions(+), 7 deletions(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 735c8b07b1e9e..580709408cfc7 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -51,6 +51,7 @@ #define QM_MB_CMD_DATA_ADDR_L 0x304 #define QM_MB_CMD_DATA_ADDR_H 0x308 #define QM_MB_PING_ALL_VFS 0xffff +#define QM_MB_CMD_DATA_SHIFT 32 #define QM_MB_CMD_DATA_MASK GENMASK(31, 0) /* sqc shift */ @@ -185,6 +186,7 @@ /* interfunction communication */ #define QM_IFC_READY_STATUS 0x100128 +#define QM_IFC_C_STS_M 0x10012C #define QM_IFC_INT_SET_P 0x100130 #define QM_IFC_INT_CFG 0x100134 #define QM_IFC_INT_SOURCE_P 0x100138 @@ -256,6 +258,7 @@ #define QM_SHAPER_CBS_B 1 #define QM_SHAPER_CBS_S 16 #define QM_SHAPER_VFT_OFFSET 6 +#define WAIT_FOR_QOS_VF 100 #define QM_QOS_MIN_ERROR_RATE 5 #define QM_QOS_TYPICAL_NUM 8 #define QM_SHAPER_MIN_CBS_S 8 @@ -328,6 +331,8 @@ enum qm_mb_cmd { QM_VF_PREPARE_FAIL, QM_VF_START_DONE, QM_VF_START_FAIL, + QM_PF_SET_QOS, + QM_VF_GET_QOS, }; struct qm_cqe { @@ -2124,7 +2129,7 @@ static void qm_trigger_vf_interrupt(struct hisi_qm *qm, u32 fun_num) u32 val; val = readl(qm->io_base + QM_IFC_INT_CFG); - val |= ~QM_IFC_SEND_ALL_VFS; + val &= ~QM_IFC_SEND_ALL_VFS; val |= fun_num; writel(val, qm->io_base + QM_IFC_INT_CFG); @@ -3926,6 +3931,139 @@ static int qm_func_shaper_enable(struct hisi_qm *qm, u32 fun_index, u32 qos) return 0; } +static u32 qm_get_shaper_vft_qos(struct hisi_qm *qm, u32 fun_index) +{ + u64 cir_u = 0, cir_b = 0, cir_s = 0; + u64 shaper_vft, ir_calc, ir; + unsigned int val; + u32 error_rate; + int ret; + + ret = readl_relaxed_poll_timeout(qm->io_base + QM_VFT_CFG_RDY, val, + val & BIT(0), POLL_PERIOD, + POLL_TIMEOUT); + if (ret) + return 0; + + writel(0x1, qm->io_base + QM_VFT_CFG_OP_WR); + writel(SHAPER_VFT, qm->io_base + QM_VFT_CFG_TYPE); + writel(fun_index, qm->io_base + QM_VFT_CFG); + + writel(0x0, qm->io_base + QM_VFT_CFG_RDY); + writel(0x1, qm->io_base + QM_VFT_CFG_OP_ENABLE); + + ret = readl_relaxed_poll_timeout(qm->io_base + QM_VFT_CFG_RDY, val, + val & BIT(0), POLL_PERIOD, + POLL_TIMEOUT); + if (ret) + return 0; + + shaper_vft = readl(qm->io_base + QM_VFT_CFG_DATA_L) | + ((u64)readl(qm->io_base + QM_VFT_CFG_DATA_H) << 32); + + cir_b = shaper_vft & QM_SHAPER_CIR_B_MASK; + cir_u = shaper_vft & QM_SHAPER_CIR_U_MASK; + cir_u = cir_u >> QM_SHAPER_FACTOR_CIR_U_SHIFT; + + cir_s = shaper_vft & QM_SHAPER_CIR_S_MASK; + cir_s = cir_s >> QM_SHAPER_FACTOR_CIR_S_SHIFT; + + ir_calc = acc_shaper_para_calc(cir_b, cir_u, cir_s); + + ir = qm->factor[fun_index].func_qos * QM_QOS_RATE; + + error_rate = QM_QOS_EXPAND_RATE * (u32)abs(ir_calc - ir) / ir; + if (error_rate > QM_QOS_MIN_ERROR_RATE) { + pci_err(qm->pdev, "error_rate: %u, get function qos is error!\n", error_rate); + return 0; + } + + return ir; +} + +static void qm_vf_get_qos(struct hisi_qm *qm, u32 fun_num) +{ + struct device *dev = &qm->pdev->dev; + u64 mb_cmd; + u32 qos; + int ret; + + qos = qm_get_shaper_vft_qos(qm, fun_num); + if (!qos) { + dev_err(dev, "function(%u) failed to get qos by PF!\n", fun_num); + return; + } + + mb_cmd = QM_PF_SET_QOS | (u64)qos << QM_MB_CMD_DATA_SHIFT; + ret = qm_ping_single_vf(qm, mb_cmd, fun_num); + if (ret) + dev_err(dev, "failed to send cmd to VF(%u)!\n", fun_num); +} + +static int qm_vf_read_qos(struct hisi_qm *qm) +{ + int cnt = 0; + int ret; + + /* reset mailbox qos val */ + qm->mb_qos = 0; + + /* vf ping pf to get function qos */ + if (qm->ops->ping_pf) { + ret = qm->ops->ping_pf(qm, QM_VF_GET_QOS); + if (ret) { + pci_err(qm->pdev, "failed to send cmd to PF to get qos!\n"); + return ret; + } + } + + while (true) { + msleep(QM_WAIT_DST_ACK); + if (qm->mb_qos) + break; + + if (++cnt > QM_MAX_VF_WAIT_COUNT) { + pci_err(qm->pdev, "PF ping VF timeout!\n"); + return -ETIMEDOUT; + } + } + + return ret; +} + +static ssize_t qm_algqos_read(struct file *filp, char __user *buf, + size_t count, loff_t *pos) +{ + struct hisi_qm *qm = filp->private_data; + char tbuf[QM_DBG_READ_LEN]; + u32 qos_val, ir; + int ret; + + /* Mailbox and reset cannot be operated at the same time */ + if (test_and_set_bit(QM_RESETTING, &qm->misc_ctl)) { + pci_err(qm->pdev, "dev resetting, read alg qos failed!\n"); + return -EAGAIN; + } + + if (qm->fun_type == QM_HW_PF) { + ir = qm_get_shaper_vft_qos(qm, 0); + } else { + ret = qm_vf_read_qos(qm); + if (ret) + goto err_get_status; + ir = qm->mb_qos; + } + + qos_val = ir / QM_QOS_RATE; + ret = scnprintf(tbuf, QM_DBG_READ_LEN, "%u\n", qos_val); + + ret = simple_read_from_buffer(buf, count, pos, tbuf, ret); + +err_get_status: + clear_bit(QM_RESETTING, &qm->misc_ctl); + return ret; +} + static ssize_t qm_qos_value_init(const char *buf, unsigned long *val) { int buflen = strlen(buf); @@ -4020,6 +4158,7 @@ static ssize_t qm_algqos_write(struct file *filp, const char __user *buf, static const struct file_operations qm_algqos_fops = { .owner = THIS_MODULE, .open = simple_open, + .read = qm_algqos_read, .write = qm_algqos_write, }; @@ -5129,10 +5268,8 @@ static void qm_pf_reset_vf_process(struct hisi_qm *qm, qm_reset_bit_clear(qm); } -static void qm_cmd_process(struct work_struct *cmd_process) +static void qm_handle_cmd_msg(struct hisi_qm *qm, u32 fun_num) { - struct hisi_qm *qm = container_of(cmd_process, - struct hisi_qm, cmd_process); struct device *dev = &qm->pdev->dev; u64 msg; u32 cmd; @@ -5142,8 +5279,8 @@ static void qm_cmd_process(struct work_struct *cmd_process) * Get the msg from source by sending mailbox. Whether message is got * successfully, destination needs to ack source by clearing the interrupt. */ - ret = qm_get_mb_cmd(qm, &msg, 0); - qm_clear_cmd_interrupt(qm, 0); + ret = qm_get_mb_cmd(qm, &msg, fun_num); + qm_clear_cmd_interrupt(qm, BIT(fun_num)); if (ret) { dev_err(dev, "failed to get msg from source!\n"); return; @@ -5157,12 +5294,42 @@ static void qm_cmd_process(struct work_struct *cmd_process) case QM_PF_SRST_PREPARE: qm_pf_reset_vf_process(qm, QM_SOFT_RESET); break; + case QM_VF_GET_QOS: + qm_vf_get_qos(qm, fun_num); + break; + case QM_PF_SET_QOS: + qm->mb_qos = msg >> QM_MB_CMD_DATA_SHIFT; + break; default: - dev_err(dev, "unsupported cmd %u sent by PF!\n", cmd); + dev_err(dev, "unsupported cmd %u sent by function(%u)!\n", cmd, fun_num); break; } } +static void qm_cmd_process(struct work_struct *cmd_process) +{ + struct hisi_qm *qm = container_of(cmd_process, + struct hisi_qm, cmd_process); + u32 vfs_num = qm->vfs_num; + u64 val; + u32 i; + + if (qm->fun_type == QM_HW_PF) { + val = readq(qm->io_base + QM_IFC_INT_SOURCE_P); + if (!val) + return; + + for (i = 1; i <= vfs_num; i++) { + if (val & BIT(i)) + qm_handle_cmd_msg(qm, i); + } + + return; + } + + qm_handle_cmd_msg(qm, 0); +} + /** * hisi_qm_alg_register() - Register alg to crypto and add qm to qm_list. * @qm: The qm needs add. From 3d2a429271bb622da48983631625c20de3b5f1e5 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Fri, 11 Jun 2021 17:06:48 +0800 Subject: [PATCH 129/142] crypto: hisilicon/sec - adds the max shaper type rate The SEC driver support configure each function's QoS in the Host for Kunpeng930. The SEC driver needs to configure the maximum shaper type rate. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec_main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 8ab4e67b8a417..d120ce3e34eda 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -98,6 +98,7 @@ #define SEC_SQE_MASK_OFFSET 64 #define SEC_SQE_MASK_LEN 48 +#define SEC_SHAPER_TYPE_RATE 128 struct sec_hw_error { u32 int_msk; @@ -874,6 +875,7 @@ static void sec_qm_uninit(struct hisi_qm *qm) static int sec_probe_init(struct sec_dev *sec) { + u32 type_rate = SEC_SHAPER_TYPE_RATE; struct hisi_qm *qm = &sec->qm; int ret; @@ -881,6 +883,11 @@ static int sec_probe_init(struct sec_dev *sec) ret = sec_pf_probe_init(sec); if (ret) return ret; + /* enable shaper type 0 */ + if (qm->ver >= QM_HW_V3) { + type_rate |= QM_SHAPER_ENABLE; + qm->type_rate = type_rate; + } } return 0; From c02f5302e46a2505cb0a6170470759a7db929979 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Fri, 11 Jun 2021 17:06:49 +0800 Subject: [PATCH 130/142] crypto: hisilicon/hpre - adds the max shaper type rate The HPRE driver support configure each function's QoS in the Host for Kunpeng930. The HPRE driver needs to configure the maximum shaper type rate. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index 37c5296008474..8b0640fb04be6 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -92,6 +92,7 @@ #define HPRE_QM_PM_FLR BIT(11) #define HPRE_QM_SRIOV_FLR BIT(12) +#define HPRE_SHAPER_TYPE_RATE 128 #define HPRE_VIA_MSI_DSM 1 #define HPRE_SQE_MASK_OFFSET 8 #define HPRE_SQE_MASK_LEN 24 @@ -947,6 +948,7 @@ static int hpre_pf_probe_init(struct hpre *hpre) static int hpre_probe_init(struct hpre *hpre) { + u32 type_rate = HPRE_SHAPER_TYPE_RATE; struct hisi_qm *qm = &hpre->qm; int ret; @@ -954,6 +956,11 @@ static int hpre_probe_init(struct hpre *hpre) ret = hpre_pf_probe_init(hpre); if (ret) return ret; + /* Enable shaper type 0 */ + if (qm->ver >= QM_HW_V3) { + type_rate |= QM_SHAPER_ENABLE; + qm->type_rate = type_rate; + } } return 0; From 38a9eb8182a24c7ef2dbe82ab46cafe8f8e9b271 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Fri, 11 Jun 2021 17:06:50 +0800 Subject: [PATCH 131/142] crypto: hisilicon/zip - adds the max shaper type rate The ZIP driver support configure each function's QoS in the Host for Kunpeng930. The ZIP driver needs to configure the maximum shaper type rate. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/zip/zip_main.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index 9e4c49cd6f3ab..f8482ceebf2ab 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -102,6 +102,8 @@ #define HZIP_PREFETCH_ENABLE (~(BIT(26) | BIT(17) | BIT(0))) #define HZIP_SVA_PREFETCH_DISABLE BIT(26) #define HZIP_SVA_DISABLE_READY (BIT(26) | BIT(30)) +#define HZIP_SHAPER_RATE_COMPRESS 252 +#define HZIP_SHAPER_RATE_DECOMPRESS 229 #define HZIP_DELAY_1_US 1 #define HZIP_POLL_TIMEOUT_US 1000 @@ -823,6 +825,7 @@ static void hisi_zip_qm_uninit(struct hisi_qm *qm) static int hisi_zip_probe_init(struct hisi_zip *hisi_zip) { + u32 type_rate = HZIP_SHAPER_RATE_COMPRESS; struct hisi_qm *qm = &hisi_zip->qm; int ret; @@ -830,6 +833,14 @@ static int hisi_zip_probe_init(struct hisi_zip *hisi_zip) ret = hisi_zip_pf_probe_init(hisi_zip); if (ret) return ret; + /* enable shaper type 0 */ + if (qm->ver >= QM_HW_V3) { + type_rate |= QM_SHAPER_ENABLE; + + /* ZIP need to enable shaper type 1 */ + type_rate |= HZIP_SHAPER_RATE_DECOMPRESS << QM_SHAPER_TYPE1_OFFSET; + qm->type_rate = type_rate; + } } return 0; From 533d87fbb82583d37e4af7bbab26d070523b48ee Mon Sep 17 00:00:00 2001 From: kernel test robot Date: Tue, 15 Jun 2021 09:31:03 +0800 Subject: [PATCH 132/142] crypto: sl3516 - fix duplicated inclusion drivers/crypto/gemini/sl3516-ce-cipher.c: linux/io.h is included more than once. Generated by: scripts/checkincludes.pl Reported-by: kernel test robot Signed-off-by: kernel test robot Signed-off-by: Herbert Xu --- drivers/crypto/gemini/sl3516-ce-cipher.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/crypto/gemini/sl3516-ce-cipher.c b/drivers/crypto/gemini/sl3516-ce-cipher.c index 0b34a4971e498..b41c2f5fc495a 100644 --- a/drivers/crypto/gemini/sl3516-ce-cipher.c +++ b/drivers/crypto/gemini/sl3516-ce-cipher.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include From d886d55f4c7345ea1628ecc49eaea3f496f8d3cb Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 15 Jun 2021 14:14:52 +0800 Subject: [PATCH 133/142] crypto: sa2ul - Remove unused auth_len variable This patch removes the unused auth_len variable from sa_aead_dma_in_callback. Reported-by: kernel test robot Signed-off-by: Herbert Xu --- drivers/crypto/sa2ul.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/crypto/sa2ul.c b/drivers/crypto/sa2ul.c index 51bb69bc573c3..544d7040cfc58 100644 --- a/drivers/crypto/sa2ul.c +++ b/drivers/crypto/sa2ul.c @@ -1698,7 +1698,6 @@ static void sa_aead_dma_in_callback(void *data) size_t pl, ml; int i; int err = 0; - u16 auth_len; u32 *mdptr; sa_sync_from_device(rxd); @@ -1711,13 +1710,10 @@ static void sa_aead_dma_in_callback(void *data) for (i = 0; i < (authsize / 4); i++) mdptr[i + 4] = swab32(mdptr[i + 4]); - auth_len = req->assoclen + req->cryptlen; - if (rxd->enc) { scatterwalk_map_and_copy(&mdptr[4], req->dst, start, authsize, 1); } else { - auth_len -= authsize; start -= authsize; scatterwalk_map_and_copy(auth_tag, req->src, start, authsize, 0); From 84c2c729eabda52a2f6caa087d51f0d7420bca0c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 15 Jun 2021 11:11:53 +0100 Subject: [PATCH 134/142] crypto: hisilicon/sec - Fix spelling mistake "fallbcak" -> "fallback" There is a spelling mistake in a dev_err message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec_crypto.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index f23af61661dea..6a45bd23b3635 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -2290,7 +2290,7 @@ static int sec_aead_soft_crypto(struct sec_ctx *ctx, /* Kunpeng920 aead mode not support input 0 size */ if (!a_ctx->fallback_aead_tfm) { - dev_err(dev, "aead fallbcak tfm is NULL!\n"); + dev_err(dev, "aead fallback tfm is NULL!\n"); return -EINVAL; } From 74c66120fda6596ad57f41e1607b3a5d51ca143d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 16 Jun 2021 13:34:59 -0700 Subject: [PATCH 135/142] crypto: nx - Fix memcpy() over-reading in nonce Fix typo in memcpy() where size should be CTR_RFC3686_NONCE_SIZE. Fixes: 030f4e968741 ("crypto: nx - Fix reentrancy bugs") Cc: stable@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Herbert Xu --- drivers/crypto/nx/nx-aes-ctr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/nx/nx-aes-ctr.c b/drivers/crypto/nx/nx-aes-ctr.c index 13f518802343d..6120e350ff71d 100644 --- a/drivers/crypto/nx/nx-aes-ctr.c +++ b/drivers/crypto/nx/nx-aes-ctr.c @@ -118,7 +118,7 @@ static int ctr3686_aes_nx_crypt(struct skcipher_request *req) struct nx_crypto_ctx *nx_ctx = crypto_skcipher_ctx(tfm); u8 iv[16]; - memcpy(iv, nx_ctx->priv.ctr.nonce, CTR_RFC3686_IV_SIZE); + memcpy(iv, nx_ctx->priv.ctr.nonce, CTR_RFC3686_NONCE_SIZE); memcpy(iv + CTR_RFC3686_NONCE_SIZE, req->iv, CTR_RFC3686_IV_SIZE); iv[12] = iv[13] = iv[14] = 0; iv[15] = 1; From 5163ab505e489400b4738b2a5547ec83d2dff7bb Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 17 Jun 2021 15:28:10 +0800 Subject: [PATCH 136/142] crypto: api - Move crypto attr definitions out of crypto.h The definitions for crypto_attr-related types and enums are not needed by most Crypto API users. This patch moves them out of crypto.h and into algapi.h/internal.h depending on the extent of their use. Signed-off-by: Herbert Xu --- crypto/internal.h | 12 ++++++++++++ include/crypto/algapi.h | 9 +++++++++ include/linux/crypto.h | 21 --------------------- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/crypto/internal.h b/crypto/internal.h index 976ec9dfc76db..f00869af689f5 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -29,6 +29,18 @@ struct crypto_larval { u32 mask; }; +enum { + CRYPTOA_UNSPEC, + CRYPTOA_ALG, + CRYPTOA_TYPE, + __CRYPTOA_MAX, +}; + +#define CRYPTOA_MAX (__CRYPTOA_MAX - 1) + +/* Maximum number of (rtattr) parameters for each template. */ +#define CRYPTO_MAX_ATTRS 32 + extern struct list_head crypto_alg_list; extern struct rw_semaphore crypto_alg_sem; extern struct blocking_notifier_head crypto_chain; diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index 41d42e649da4f..5f6841c73e5a7 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -96,6 +96,15 @@ struct scatter_walk { unsigned int offset; }; +struct crypto_attr_alg { + char name[CRYPTO_MAX_ALG_NAME]; +}; + +struct crypto_attr_type { + u32 type; + u32 mask; +}; + void crypto_mod_put(struct crypto_alg *alg); int crypto_register_template(struct crypto_template *tmpl); diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 3b9263d6122fd..855869e1fd327 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -643,27 +643,6 @@ struct crypto_comp { struct crypto_tfm base; }; -enum { - CRYPTOA_UNSPEC, - CRYPTOA_ALG, - CRYPTOA_TYPE, - __CRYPTOA_MAX, -}; - -#define CRYPTOA_MAX (__CRYPTOA_MAX - 1) - -/* Maximum number of (rtattr) parameters for each template. */ -#define CRYPTO_MAX_ATTRS 32 - -struct crypto_attr_alg { - char name[CRYPTO_MAX_ALG_NAME]; -}; - -struct crypto_attr_type { - u32 type; - u32 mask; -}; - /* * Transform user interface. */ From 2a96726bd0ccde4f12b9b9a9f61f7b1ac5af7e10 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 17 Jun 2021 15:57:12 +0800 Subject: [PATCH 137/142] crypto: nx - Fix RCU warning in nx842_OF_upd_status The function nx842_OF_upd_status triggers a sparse RCU warning when it directly dereferences the RCU-protected devdata. This appears to be an accident as there was another variable of the same name that was passed in from the caller. After it was removed (because the main purpose of using it, to update the status member was itself removed) the global variable unintenionally stood in as its replacement. This patch restores the devdata parameter. Fixes: 90fd73f912f0 ("crypto: nx - remove pSeries NX 'status' field") Signed-off-by: Herbert Xu --- drivers/crypto/nx/nx-842-pseries.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/nx/nx-842-pseries.c b/drivers/crypto/nx/nx-842-pseries.c index 67caff73f058f..1491cbfbc071c 100644 --- a/drivers/crypto/nx/nx-842-pseries.c +++ b/drivers/crypto/nx/nx-842-pseries.c @@ -538,13 +538,15 @@ static int nx842_OF_set_defaults(struct nx842_devdata *devdata) * The status field indicates if the device is enabled when the status * is 'okay'. Otherwise the device driver will be disabled. * + * @devdata: struct nx842_devdata to use for dev_info * @prop: struct property point containing the maxsyncop for the update * * Returns: * 0 - Device is available * -ENODEV - Device is not available */ -static int nx842_OF_upd_status(struct property *prop) +static int nx842_OF_upd_status(struct nx842_devdata *devdata, + struct property *prop) { const char *status = (const char *)prop->value; @@ -757,7 +759,7 @@ static int nx842_OF_upd(struct property *new_prop) goto out; /* Perform property updates */ - ret = nx842_OF_upd_status(status); + ret = nx842_OF_upd_status(new_devdata, status); if (ret) goto error_out; From b20d9a73a3b2a859d32ae569588557bc47c87a1e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 17 Jun 2021 16:00:12 +0800 Subject: [PATCH 138/142] crypto: nx - Fix numerous sparse byte-order warnings The nx driver started out its life as a BE-only driver. However, somewhere along the way LE support was partially added. This never seems to have been extended all the way but it does trigger numerous warnings during build. This patch fixes all those warnings, but it doesn't mean that the driver will work on LE. Signed-off-by: Herbert Xu --- drivers/crypto/nx/nx-aes-cbc.c | 2 +- drivers/crypto/nx/nx-aes-ccm.c | 4 ++-- drivers/crypto/nx/nx-aes-ctr.c | 2 +- drivers/crypto/nx/nx-aes-ecb.c | 2 +- drivers/crypto/nx/nx-aes-gcm.c | 2 +- drivers/crypto/nx/nx-sha256.c | 19 ++++++++++++------- drivers/crypto/nx/nx-sha512.c | 19 ++++++++++++------- drivers/crypto/nx/nx_csbcpb.h | 4 ++-- 8 files changed, 32 insertions(+), 22 deletions(-) diff --git a/drivers/crypto/nx/nx-aes-cbc.c b/drivers/crypto/nx/nx-aes-cbc.c index d6314ea9ae896..0e440f704a8f9 100644 --- a/drivers/crypto/nx/nx-aes-cbc.c +++ b/drivers/crypto/nx/nx-aes-cbc.c @@ -88,7 +88,7 @@ static int cbc_aes_nx_crypt(struct skcipher_request *req, memcpy(req->iv, csbcpb->cpb.aes_cbc.cv, AES_BLOCK_SIZE); atomic_inc(&(nx_ctx->stats->aes_ops)); - atomic64_add(csbcpb->csb.processed_byte_count, + atomic64_add(be32_to_cpu(csbcpb->csb.processed_byte_count), &(nx_ctx->stats->aes_bytes)); processed += to_process; diff --git a/drivers/crypto/nx/nx-aes-ccm.c b/drivers/crypto/nx/nx-aes-ccm.c index e7384d1075739..3793885f928dd 100644 --- a/drivers/crypto/nx/nx-aes-ccm.c +++ b/drivers/crypto/nx/nx-aes-ccm.c @@ -391,7 +391,7 @@ static int ccm_nx_decrypt(struct aead_request *req, /* update stats */ atomic_inc(&(nx_ctx->stats->aes_ops)); - atomic64_add(csbcpb->csb.processed_byte_count, + atomic64_add(be32_to_cpu(csbcpb->csb.processed_byte_count), &(nx_ctx->stats->aes_bytes)); processed += to_process; @@ -460,7 +460,7 @@ static int ccm_nx_encrypt(struct aead_request *req, /* update stats */ atomic_inc(&(nx_ctx->stats->aes_ops)); - atomic64_add(csbcpb->csb.processed_byte_count, + atomic64_add(be32_to_cpu(csbcpb->csb.processed_byte_count), &(nx_ctx->stats->aes_bytes)); processed += to_process; diff --git a/drivers/crypto/nx/nx-aes-ctr.c b/drivers/crypto/nx/nx-aes-ctr.c index 6120e350ff71d..dfa3ad1a12f28 100644 --- a/drivers/crypto/nx/nx-aes-ctr.c +++ b/drivers/crypto/nx/nx-aes-ctr.c @@ -102,7 +102,7 @@ static int ctr_aes_nx_crypt(struct skcipher_request *req, u8 *iv) memcpy(iv, csbcpb->cpb.aes_cbc.cv, AES_BLOCK_SIZE); atomic_inc(&(nx_ctx->stats->aes_ops)); - atomic64_add(csbcpb->csb.processed_byte_count, + atomic64_add(be32_to_cpu(csbcpb->csb.processed_byte_count), &(nx_ctx->stats->aes_bytes)); processed += to_process; diff --git a/drivers/crypto/nx/nx-aes-ecb.c b/drivers/crypto/nx/nx-aes-ecb.c index 7a729dc2bc17a..502a565074e98 100644 --- a/drivers/crypto/nx/nx-aes-ecb.c +++ b/drivers/crypto/nx/nx-aes-ecb.c @@ -86,7 +86,7 @@ static int ecb_aes_nx_crypt(struct skcipher_request *req, goto out; atomic_inc(&(nx_ctx->stats->aes_ops)); - atomic64_add(csbcpb->csb.processed_byte_count, + atomic64_add(be32_to_cpu(csbcpb->csb.processed_byte_count), &(nx_ctx->stats->aes_bytes)); processed += to_process; diff --git a/drivers/crypto/nx/nx-aes-gcm.c b/drivers/crypto/nx/nx-aes-gcm.c index fc9baca13920c..4a796318b4306 100644 --- a/drivers/crypto/nx/nx-aes-gcm.c +++ b/drivers/crypto/nx/nx-aes-gcm.c @@ -382,7 +382,7 @@ static int gcm_aes_nx_crypt(struct aead_request *req, int enc, NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION; atomic_inc(&(nx_ctx->stats->aes_ops)); - atomic64_add(csbcpb->csb.processed_byte_count, + atomic64_add(be32_to_cpu(csbcpb->csb.processed_byte_count), &(nx_ctx->stats->aes_bytes)); processed += to_process; diff --git a/drivers/crypto/nx/nx-sha256.c b/drivers/crypto/nx/nx-sha256.c index b0ad665e4bda8..c3bebf0feabe1 100644 --- a/drivers/crypto/nx/nx-sha256.c +++ b/drivers/crypto/nx/nx-sha256.c @@ -16,6 +16,11 @@ #include "nx_csbcpb.h" #include "nx.h" +struct sha256_state_be { + __be32 state[SHA256_DIGEST_SIZE / 4]; + u64 count; + u8 buf[SHA256_BLOCK_SIZE]; +}; static int nx_crypto_ctx_sha256_init(struct crypto_tfm *tfm) { @@ -36,7 +41,7 @@ static int nx_crypto_ctx_sha256_init(struct crypto_tfm *tfm) } static int nx_sha256_init(struct shash_desc *desc) { - struct sha256_state *sctx = shash_desc_ctx(desc); + struct sha256_state_be *sctx = shash_desc_ctx(desc); memset(sctx, 0, sizeof *sctx); @@ -56,7 +61,7 @@ static int nx_sha256_init(struct shash_desc *desc) { static int nx_sha256_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - struct sha256_state *sctx = shash_desc_ctx(desc); + struct sha256_state_be *sctx = shash_desc_ctx(desc); struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; struct nx_sg *out_sg; @@ -175,7 +180,7 @@ static int nx_sha256_update(struct shash_desc *desc, const u8 *data, static int nx_sha256_final(struct shash_desc *desc, u8 *out) { - struct sha256_state *sctx = shash_desc_ctx(desc); + struct sha256_state_be *sctx = shash_desc_ctx(desc); struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; struct nx_sg *in_sg, *out_sg; @@ -245,7 +250,7 @@ static int nx_sha256_final(struct shash_desc *desc, u8 *out) static int nx_sha256_export(struct shash_desc *desc, void *out) { - struct sha256_state *sctx = shash_desc_ctx(desc); + struct sha256_state_be *sctx = shash_desc_ctx(desc); memcpy(out, sctx, sizeof(*sctx)); @@ -254,7 +259,7 @@ static int nx_sha256_export(struct shash_desc *desc, void *out) static int nx_sha256_import(struct shash_desc *desc, const void *in) { - struct sha256_state *sctx = shash_desc_ctx(desc); + struct sha256_state_be *sctx = shash_desc_ctx(desc); memcpy(sctx, in, sizeof(*sctx)); @@ -268,8 +273,8 @@ struct shash_alg nx_shash_sha256_alg = { .final = nx_sha256_final, .export = nx_sha256_export, .import = nx_sha256_import, - .descsize = sizeof(struct sha256_state), - .statesize = sizeof(struct sha256_state), + .descsize = sizeof(struct sha256_state_be), + .statesize = sizeof(struct sha256_state_be), .base = { .cra_name = "sha256", .cra_driver_name = "sha256-nx", diff --git a/drivers/crypto/nx/nx-sha512.c b/drivers/crypto/nx/nx-sha512.c index c29103a1a0b6c..1ffb40d2c3245 100644 --- a/drivers/crypto/nx/nx-sha512.c +++ b/drivers/crypto/nx/nx-sha512.c @@ -15,6 +15,11 @@ #include "nx_csbcpb.h" #include "nx.h" +struct sha512_state_be { + __be64 state[SHA512_DIGEST_SIZE / 8]; + u64 count[2]; + u8 buf[SHA512_BLOCK_SIZE]; +}; static int nx_crypto_ctx_sha512_init(struct crypto_tfm *tfm) { @@ -36,7 +41,7 @@ static int nx_crypto_ctx_sha512_init(struct crypto_tfm *tfm) static int nx_sha512_init(struct shash_desc *desc) { - struct sha512_state *sctx = shash_desc_ctx(desc); + struct sha512_state_be *sctx = shash_desc_ctx(desc); memset(sctx, 0, sizeof *sctx); @@ -56,7 +61,7 @@ static int nx_sha512_init(struct shash_desc *desc) static int nx_sha512_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - struct sha512_state *sctx = shash_desc_ctx(desc); + struct sha512_state_be *sctx = shash_desc_ctx(desc); struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; struct nx_sg *out_sg; @@ -178,7 +183,7 @@ static int nx_sha512_update(struct shash_desc *desc, const u8 *data, static int nx_sha512_final(struct shash_desc *desc, u8 *out) { - struct sha512_state *sctx = shash_desc_ctx(desc); + struct sha512_state_be *sctx = shash_desc_ctx(desc); struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; struct nx_sg *in_sg, *out_sg; @@ -251,7 +256,7 @@ static int nx_sha512_final(struct shash_desc *desc, u8 *out) static int nx_sha512_export(struct shash_desc *desc, void *out) { - struct sha512_state *sctx = shash_desc_ctx(desc); + struct sha512_state_be *sctx = shash_desc_ctx(desc); memcpy(out, sctx, sizeof(*sctx)); @@ -260,7 +265,7 @@ static int nx_sha512_export(struct shash_desc *desc, void *out) static int nx_sha512_import(struct shash_desc *desc, const void *in) { - struct sha512_state *sctx = shash_desc_ctx(desc); + struct sha512_state_be *sctx = shash_desc_ctx(desc); memcpy(sctx, in, sizeof(*sctx)); @@ -274,8 +279,8 @@ struct shash_alg nx_shash_sha512_alg = { .final = nx_sha512_final, .export = nx_sha512_export, .import = nx_sha512_import, - .descsize = sizeof(struct sha512_state), - .statesize = sizeof(struct sha512_state), + .descsize = sizeof(struct sha512_state_be), + .statesize = sizeof(struct sha512_state_be), .base = { .cra_name = "sha512", .cra_driver_name = "sha512-nx", diff --git a/drivers/crypto/nx/nx_csbcpb.h b/drivers/crypto/nx/nx_csbcpb.h index 493f8490ff942..e64f7e36fb929 100644 --- a/drivers/crypto/nx/nx_csbcpb.h +++ b/drivers/crypto/nx/nx_csbcpb.h @@ -140,8 +140,8 @@ struct cop_status_block { u8 crb_seq_number; u8 completion_code; u8 completion_extension; - u32 processed_byte_count; - u64 address; + __be32 processed_byte_count; + __be64 address; } __packed; /* Nest accelerator workbook section 4.4 */ From f873a4d650399ba5af20460f650fa7ea530cbf9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Thu, 17 Jun 2021 12:19:26 +0300 Subject: [PATCH 139/142] MAINTAINERS: update caam crypto driver maintainers list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Aymen steps down as caam maintainer, being replaced by Pankaj. Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 388924c2d23ac..690e54bf7e236 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7159,7 +7159,7 @@ F: include/video/ FREESCALE CAAM (Cryptographic Acceleration and Assurance Module) DRIVER M: Horia Geantă -M: Aymen Sghaier +M: Pankaj Gupta L: linux-crypto@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/crypto/fsl-sec4.txt From cac6f1b87b1f7feafb7db349a2b1ca86634bc950 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 18 Jun 2021 17:35:51 +0800 Subject: [PATCH 140/142] crypto: sl3516 - Fix build warning without CONFIG_PM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/crypto/gemini/sl3516-ce-core.c:345:12: warning: ‘sl3516_ce_pm_resume’ defined but not used [-Wunused-function] static int sl3516_ce_pm_resume(struct device *dev) ^~~~~~~~~~~~~~~~~~~ The driver needs PM, otherwise clock and resets are never set. So make it depends on PM to fix this warning. Signed-off-by: YueHaibing Suggested-by: LABBE Corentin Signed-off-by: Herbert Xu --- drivers/crypto/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 99b0907901788..6f14f39d32e3e 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -273,6 +273,7 @@ config CRYPTO_DEV_SL3516 select CRYPTO_ECB select CRYPTO_AES select HW_RANDOM + depends on PM help This option allows you to have support for SL3516 crypto offloader. From d18344c0d095df544bd7174b8fae2cba523dd4a4 Mon Sep 17 00:00:00 2001 From: Wenkai Lin Date: Fri, 18 Jun 2021 17:36:06 +0800 Subject: [PATCH 141/142] crypto: hisilicon/qm - implement for querying hardware tasks status. This patch adds a function hisi_qm_is_q_updated to check if the task is ready in hardware queue when user polls an UACCE queue.This prevents users from repeatedly querying whether the accelerator has completed tasks, which wastes CPU resources. Signed-off-by: Wenkai Lin Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 580709408cfc7..1d67f94a1d568 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -2926,6 +2926,23 @@ static void hisi_qm_uacce_stop_queue(struct uacce_queue *q) hisi_qm_stop_qp(q->priv); } +static int hisi_qm_is_q_updated(struct uacce_queue *q) +{ + struct hisi_qp *qp = q->priv; + struct qm_cqe *cqe = qp->cqe + qp->qp_status.cq_head; + int updated = 0; + + while (QM_CQE_PHASE(cqe) == qp->qp_status.cqc_phase) { + /* make sure to read data from memory */ + dma_rmb(); + qm_cq_head_update(qp); + cqe = qp->cqe + qp->qp_status.cq_head; + updated = 1; + } + + return updated; +} + static void qm_set_sqctype(struct uacce_queue *q, u16 type) { struct hisi_qm *qm = q->uacce->priv; @@ -2971,6 +2988,7 @@ static const struct uacce_ops uacce_qm_ops = { .stop_queue = hisi_qm_uacce_stop_queue, .mmap = hisi_qm_uacce_mmap, .ioctl = hisi_qm_uacce_ioctl, + .is_q_updated = hisi_qm_is_q_updated, }; static int qm_alloc_uacce(struct hisi_qm *qm) From 9f38b678ffc4e2ccf167a1131c0403dc4f5e1bb7 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 21 Jun 2021 18:59:26 +0000 Subject: [PATCH 142/142] crypto: sl3516 - depends on HAS_IOMEM The sl3516 driver need to depend on HAS_IOMEM. This fixes a build error: ERROR: modpost: "devm_platform_ioremap_resource" [drivers/crypto/gemini/sl3516-ce.ko] undefined! Reported-by: kernel test robot Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 6f14f39d32e3e..ebcec460c0457 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -268,6 +268,7 @@ config CRYPTO_DEV_NIAGARA2 config CRYPTO_DEV_SL3516 tristate "Stormlink SL3516 crypto offloader" + depends on HAS_IOMEM select CRYPTO_SKCIPHER select CRYPTO_ENGINE select CRYPTO_ECB