Skip to content

Commit

Permalink
Optimized asm code for Bulldozer
Browse files Browse the repository at this point in the history
Small tweak, 0.1% faster. Requires SSE 4.1 support.
  • Loading branch information
SChernykh committed Oct 20, 2018
1 parent f4dfc2b commit 20ba4c9
Showing 1 changed file with 4 additions and 6 deletions.
10 changes: 4 additions & 6 deletions crypto/asm/cnv2_main_loop_bulldozer.inc
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,8 @@
ALIGN 64
$main_loop_bulldozer:
movdqa xmm5, XMMWORD PTR [r10+rbx]
movq xmm0, r11
movq xmm6, r8
punpcklqdq xmm6, xmm0
movq xmm6, r8
pinsrq xmm6, r11, 1
lea rdx, QWORD PTR [r10+rbx]
lea r9, QWORD PTR [rdi+rdi]
shl rdi, 32
Expand Down Expand Up @@ -79,10 +78,9 @@ $main_loop_bulldozer:
shl rdi, 52

movq r14, xmm5
pextrq rax, xmm5, 1

movdqa xmm0, xmm5
movdqa xmm1, xmm5
psrldq xmm1, 8
movq rax, xmm1
pxor xmm0, xmm3
mov r10, r14
and r10d, 2097136
Expand Down

0 comments on commit 20ba4c9

Please sign in to comment.