Skip to content

Commit

Permalink
crypto: x86/aesni-xts - deduplicate aesni_xts_enc() and aesni_xts_dec()
Browse files Browse the repository at this point in the history
Since aesni_xts_enc() and aesni_xts_dec() are very similar, generate
them from a macro that's passed an argument enc=1 or enc=0.  This
reduces the length of aesni-intel_asm.S by 112 lines while still
producing the exact same object file in both 32-bit and 64-bit mode.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
  • Loading branch information
ebiggers authored and herbertx committed Apr 19, 2024
1 parent 1d27e1f commit ea9459e
Showing 1 changed file with 79 additions and 191 deletions.
270 changes: 79 additions & 191 deletions arch/x86/crypto/aesni-intel_asm.S
Original file line number Diff line number Diff line change
Expand Up @@ -2825,28 +2825,24 @@ SYM_FUNC_END(aesni_ctr_enc)
.previous

/*
* _aesni_gf128mul_x_ble: internal ABI
* Multiply in GF(2^128) for XTS IVs
* _aesni_gf128mul_x_ble: Multiply in GF(2^128) for XTS IVs
* input:
* IV: current IV
* GF128MUL_MASK == mask with 0x87 and 0x01
* output:
* IV: next IV
* changed:
* CTR: == temporary value
* KEY: == temporary value
*/
#define _aesni_gf128mul_x_ble() \
pshufd $0x13, IV, KEY; \
paddq IV, IV; \
psrad $31, KEY; \
pand GF128MUL_MASK, KEY; \
pxor KEY, IV;
.macro _aesni_gf128mul_x_ble
pshufd $0x13, IV, KEY
paddq IV, IV
psrad $31, KEY
pand GF128MUL_MASK, KEY
pxor KEY, IV
.endm

/*
* void aesni_xts_enc(const struct crypto_aes_ctx *ctx, u8 *dst,
* const u8 *src, unsigned int len, le128 *iv)
*/
SYM_FUNC_START(aesni_xts_enc)
.macro _aesni_xts_crypt enc
FRAME_BEGIN
#ifndef __x86_64__
pushl IVP
Expand All @@ -2865,35 +2861,46 @@ SYM_FUNC_START(aesni_xts_enc)
movups (IVP), IV

mov 480(KEYP), KLEN
.if !\enc
add $240, KEYP

test $15, LEN
jz .Lxts_loop4\@
sub $16, LEN
.endif

.Lxts_enc_loop4:
.Lxts_loop4\@:
sub $64, LEN
jl .Lxts_enc_1x
jl .Lxts_1x\@

movdqa IV, STATE1
movdqu 0x00(INP), IN
pxor IN, STATE1
movdqu IV, 0x00(OUTP)

_aesni_gf128mul_x_ble()
_aesni_gf128mul_x_ble
movdqa IV, STATE2
movdqu 0x10(INP), IN
pxor IN, STATE2
movdqu IV, 0x10(OUTP)

_aesni_gf128mul_x_ble()
_aesni_gf128mul_x_ble
movdqa IV, STATE3
movdqu 0x20(INP), IN
pxor IN, STATE3
movdqu IV, 0x20(OUTP)

_aesni_gf128mul_x_ble()
_aesni_gf128mul_x_ble
movdqa IV, STATE4
movdqu 0x30(INP), IN
pxor IN, STATE4
movdqu IV, 0x30(OUTP)

.if \enc
call _aesni_enc4
.else
call _aesni_dec4
.endif

movdqu 0x00(OUTP), IN
pxor IN, STATE1
Expand All @@ -2911,17 +2918,17 @@ SYM_FUNC_START(aesni_xts_enc)
pxor IN, STATE4
movdqu STATE4, 0x30(OUTP)

_aesni_gf128mul_x_ble()
_aesni_gf128mul_x_ble

add $64, INP
add $64, OUTP
test LEN, LEN
jnz .Lxts_enc_loop4
jnz .Lxts_loop4\@

.Lxts_enc_ret_iv:
.Lxts_ret_iv\@:
movups IV, (IVP)

.Lxts_enc_ret:
.Lxts_ret\@:
#ifndef __x86_64__
popl KLEN
popl KEYP
Expand All @@ -2931,39 +2938,60 @@ SYM_FUNC_START(aesni_xts_enc)
FRAME_END
RET

.Lxts_enc_1x:
.Lxts_1x\@:
add $64, LEN
jz .Lxts_enc_ret_iv
jz .Lxts_ret_iv\@
.if \enc
sub $16, LEN
jl .Lxts_enc_cts4
jl .Lxts_cts4\@
.endif

.Lxts_enc_loop1:
.Lxts_loop1\@:
movdqu (INP), STATE
.if \enc
pxor IV, STATE
call _aesni_enc1
.else
add $16, INP
sub $16, LEN
jl .Lxts_cts1\@
pxor IV, STATE
_aesni_gf128mul_x_ble()
call _aesni_dec1
.endif
pxor IV, STATE
_aesni_gf128mul_x_ble

test LEN, LEN
jz .Lxts_enc_out
jz .Lxts_out\@

.if \enc
add $16, INP
sub $16, LEN
jl .Lxts_enc_cts1
jl .Lxts_cts1\@
.endif

movdqu STATE, (OUTP)
add $16, OUTP
jmp .Lxts_enc_loop1
jmp .Lxts_loop1\@

.Lxts_enc_out:
.Lxts_out\@:
movdqu STATE, (OUTP)
jmp .Lxts_enc_ret_iv
jmp .Lxts_ret_iv\@

.Lxts_enc_cts4:
.if \enc
.Lxts_cts4\@:
movdqa STATE4, STATE
sub $16, OUTP
.Lxts_cts1\@:
.else
.Lxts_cts1\@:
movdqa IV, STATE4
_aesni_gf128mul_x_ble

.Lxts_enc_cts1:
pxor IV, STATE
call _aesni_dec1
pxor IV, STATE
.endif
#ifndef __x86_64__
lea .Lcts_permute_table, T1
#else
Expand All @@ -2989,172 +3017,32 @@ SYM_FUNC_START(aesni_xts_enc)
pblendvb IN2, IN1
movaps IN1, STATE

.if \enc
pxor IV, STATE
call _aesni_enc1
pxor IV, STATE
.else
pxor STATE4, STATE
call _aesni_dec1
pxor STATE4, STATE
.endif

movups STATE, (OUTP)
jmp .Lxts_enc_ret
jmp .Lxts_ret\@
.endm

/*
* void aesni_xts_enc(const struct crypto_aes_ctx *ctx, u8 *dst,
* const u8 *src, unsigned int len, le128 *iv)
*/
SYM_FUNC_START(aesni_xts_enc)
_aesni_xts_crypt 1
SYM_FUNC_END(aesni_xts_enc)

/*
* void aesni_xts_dec(const struct crypto_aes_ctx *ctx, u8 *dst,
* const u8 *src, unsigned int len, le128 *iv)
*/
SYM_FUNC_START(aesni_xts_dec)
FRAME_BEGIN
#ifndef __x86_64__
pushl IVP
pushl LEN
pushl KEYP
pushl KLEN
movl (FRAME_OFFSET+20)(%esp), KEYP # ctx
movl (FRAME_OFFSET+24)(%esp), OUTP # dst
movl (FRAME_OFFSET+28)(%esp), INP # src
movl (FRAME_OFFSET+32)(%esp), LEN # len
movl (FRAME_OFFSET+36)(%esp), IVP # iv
movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
#else
movdqa .Lgf128mul_x_ble_mask(%rip), GF128MUL_MASK
#endif
movups (IVP), IV

mov 480(KEYP), KLEN
add $240, KEYP

test $15, LEN
jz .Lxts_dec_loop4
sub $16, LEN

.Lxts_dec_loop4:
sub $64, LEN
jl .Lxts_dec_1x

movdqa IV, STATE1
movdqu 0x00(INP), IN
pxor IN, STATE1
movdqu IV, 0x00(OUTP)

_aesni_gf128mul_x_ble()
movdqa IV, STATE2
movdqu 0x10(INP), IN
pxor IN, STATE2
movdqu IV, 0x10(OUTP)

_aesni_gf128mul_x_ble()
movdqa IV, STATE3
movdqu 0x20(INP), IN
pxor IN, STATE3
movdqu IV, 0x20(OUTP)

_aesni_gf128mul_x_ble()
movdqa IV, STATE4
movdqu 0x30(INP), IN
pxor IN, STATE4
movdqu IV, 0x30(OUTP)

call _aesni_dec4

movdqu 0x00(OUTP), IN
pxor IN, STATE1
movdqu STATE1, 0x00(OUTP)

movdqu 0x10(OUTP), IN
pxor IN, STATE2
movdqu STATE2, 0x10(OUTP)

movdqu 0x20(OUTP), IN
pxor IN, STATE3
movdqu STATE3, 0x20(OUTP)

movdqu 0x30(OUTP), IN
pxor IN, STATE4
movdqu STATE4, 0x30(OUTP)

_aesni_gf128mul_x_ble()

add $64, INP
add $64, OUTP
test LEN, LEN
jnz .Lxts_dec_loop4

.Lxts_dec_ret_iv:
movups IV, (IVP)

.Lxts_dec_ret:
#ifndef __x86_64__
popl KLEN
popl KEYP
popl LEN
popl IVP
#endif
FRAME_END
RET

.Lxts_dec_1x:
add $64, LEN
jz .Lxts_dec_ret_iv

.Lxts_dec_loop1:
movdqu (INP), STATE

add $16, INP
sub $16, LEN
jl .Lxts_dec_cts1

pxor IV, STATE
call _aesni_dec1
pxor IV, STATE
_aesni_gf128mul_x_ble()

test LEN, LEN
jz .Lxts_dec_out

movdqu STATE, (OUTP)
add $16, OUTP
jmp .Lxts_dec_loop1

.Lxts_dec_out:
movdqu STATE, (OUTP)
jmp .Lxts_dec_ret_iv

.Lxts_dec_cts1:
movdqa IV, STATE4
_aesni_gf128mul_x_ble()

pxor IV, STATE
call _aesni_dec1
pxor IV, STATE

#ifndef __x86_64__
lea .Lcts_permute_table, T1
#else
lea .Lcts_permute_table(%rip), T1
#endif
add LEN, INP /* rewind input pointer */
add $16, LEN /* # bytes in final block */
movups (INP), IN1

mov T1, IVP
add $32, IVP
add LEN, T1
sub LEN, IVP
add OUTP, LEN

movups (T1), %xmm4
movaps STATE, IN2
pshufb %xmm4, STATE
movups STATE, (LEN)

movups (IVP), %xmm0
pshufb %xmm0, IN1
pblendvb IN2, IN1
movaps IN1, STATE

pxor STATE4, STATE
call _aesni_dec1
pxor STATE4, STATE

movups STATE, (OUTP)
jmp .Lxts_dec_ret
_aesni_xts_crypt 0
SYM_FUNC_END(aesni_xts_dec)

0 comments on commit ea9459e

Please sign in to comment.