Skip to content

Commit

Permalink
Asm optimized variant 1
Browse files Browse the repository at this point in the history
  • Loading branch information
SChernykh committed Sep 25, 2018
1 parent 30b8c61 commit 6eb5c37
Show file tree
Hide file tree
Showing 8 changed files with 143 additions and 24 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ target_link_libraries(xmr-stak-c ${LIBS})

add_library(xmr-stak-asm
STATIC
"crypto/asm/cnv2_main_loop.S"
"crypto/asm/cn_main_loop.S"
)
set_property(TARGET xmr-stak-asm PROPERTY LINKER_LANGUAGE C)

Expand Down
9 changes: 9 additions & 0 deletions crypto/asm/cnv2_main_loop.S → crypto/asm/cn_main_loop.S
Original file line number Diff line number Diff line change
@@ -1,10 +1,19 @@
#define ALIGN .align
.intel_syntax noprefix
.section .text
.global cnv1_mainloop_sandybridge_asm
.global cnv2_mainloop_ivybridge_asm
.global cnv2_mainloop_ryzen_asm
.global cnv2_double_mainloop_sandybridge_asm

ALIGN 64
cnv1_mainloop_sandybridge_asm:
sub rsp, 48
mov rcx, rdi
#include "cnv1_mainloop_sandybridge.inc"
add rsp, 48
ret 0

ALIGN 64
cnv2_mainloop_ivybridge_asm:
sub rsp, 48
Expand Down
7 changes: 7 additions & 0 deletions crypto/asm/cnv2_main_loop.asm → crypto/asm/cn_main_loop.asm
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
_TEXT_CNV2_MAINLOOP SEGMENT PAGE READ EXECUTE
PUBLIC cnv1_mainloop_sandybridge_asm
PUBLIC cnv2_mainloop_ivybridge_asm
PUBLIC cnv2_mainloop_ryzen_asm
PUBLIC cnv2_double_mainloop_sandybridge_asm

ALIGN 64
cnv1_mainloop_sandybridge_asm PROC
INCLUDE cnv1_mainloop_sandybridge.inc
ret 0
cnv1_mainloop_sandybridge_asm ENDP

ALIGN 64
cnv2_mainloop_ivybridge_asm PROC
INCLUDE cnv2_main_loop_ivybridge.inc
Expand Down
70 changes: 70 additions & 0 deletions crypto/asm/cnv1_mainloop_sandybridge.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
mov QWORD PTR [rsp+8], rbx
mov QWORD PTR [rsp+16], rbp
mov QWORD PTR [rsp+24], rsi
mov QWORD PTR [rsp+32], rdi
push r14
push r15
mov rax, QWORD PTR [rcx+48]
mov ebp, 524288
xor rax, QWORD PTR [rcx+16]
mov rdx, QWORD PTR [rcx+56]
xor rdx, QWORD PTR [rcx+24]
mov r8, QWORD PTR [rcx+32]
xor r8, QWORD PTR [rcx]
movq xmm3, rax
mov rax, QWORD PTR [rcx+256]
mov rdi, QWORD PTR [rcx+40]
movq xmm0, rdx
xor rdi, QWORD PTR [rcx+8]
mov rdx, r8
mov r15, QWORD PTR [rcx+264]
and edx, 2097136
mov r14, QWORD PTR [rax+35]
xor r14, QWORD PTR [rcx+192]
mov rsi, QWORD PTR [rcx+224]
punpcklqdq xmm3, xmm0
movdqu xmm2, XMMWORD PTR [rdx+rsi]

ALIGN 64
cnv1_mainloop_sandybridge:
movq xmm0, rdi
movq xmm1, r8
punpcklqdq xmm1, xmm0
aesenc xmm2, xmm1
movq r10, xmm2
mov r9d, r10d
and r9d, 2097136
add r9, rsi
movdqa xmm0, xmm2
pxor xmm0, xmm3
movdqa xmm3, xmm2
movdqu XMMWORD PTR [rdx+rsi], xmm0
psrldq xmm0, 11
movq rax, xmm0
movzx eax, al
movzx eax, BYTE PTR [rax+r15]
mov BYTE PTR [rsi+rdx+11], al
mov rbx, QWORD PTR [r9]
mov r11, QWORD PTR [r9+8]
mov rax, rbx
mul r10
add r8, rdx
mov QWORD PTR [r9], r8
add rdi, rax
mov rax, r14
xor rax, rdi
mov QWORD PTR [r9+8], rax
xor r8, rbx
mov rdx, r8
and edx, 2097136
movdqu xmm2, XMMWORD PTR [rdx+rsi]
xor rdi, r11
dec ebp
jne cnv1_mainloop_sandybridge

mov rbx, QWORD PTR [rsp+24]
mov rbp, QWORD PTR [rsp+32]
mov rsi, QWORD PTR [rsp+40]
mov rdi, QWORD PTR [rsp+48]
pop r15
pop r14
2 changes: 2 additions & 0 deletions crypto/cryptonight.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ typedef struct {
uint8_t hash_state[224]; // Need only 200, explicit align
uint8_t* long_state;
uint8_t ctx_info[24]; //Use some of the extra memory for flags
const void* input;
uint8_t* variant1_table;
} cryptonight_ctx;

typedef struct {
Expand Down
65 changes: 48 additions & 17 deletions minethd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -344,12 +344,12 @@ bool minethd::self_test()
}
}

if (jconf::inst()->HaveHardwareAes() && (i == 2))
if (jconf::inst()->HaveHardwareAes() && (i > 0))
{
for (int j = 1; j <= 2; ++j)
{
char hash[32];
cn_hash_fun hash_fun = func_selector(true, 2, j);
cn_hash_fun hash_fun = func_selector(true, i, j);
cn_hash_fun_dbl hash_fun_dbl = func_dbl_selector(jconf::inst()->HaveHardwareAes(), i, j);

hash_fun(input.c_str(), input.length(), hash, ctx0);
Expand All @@ -361,7 +361,7 @@ bool minethd::self_test()
if (memcmp(hash, reference_hash[i], HASH_SIZE) != 0)
{
print_hash(input.c_str(), hash);
printer::inst()->print_msg(L0, "Cryptonight hash self-test (variant 2, asm version %d) failed.", j);
printer::inst()->print_msg(L0, "Cryptonight hash self-test (variant %d, asm version %d) failed.", i, j);
return false;
}
if (!prev_input.empty())
Expand All @@ -370,7 +370,7 @@ bool minethd::self_test()
{
print_hash(prev_input.c_str(), hash_dbl);
print_hash(input.c_str(), hash_dbl + HASH_SIZE);
printer::inst()->print_msg(L0, "Cryptonight double hash self-test (variant 2, asm version %d) failed.", j);
printer::inst()->print_msg(L0, "Cryptonight double hash self-test (variant %d, asm version %d) failed.", i, j);
return false;
}
}
Expand Down Expand Up @@ -413,12 +413,15 @@ int minethd::pgo_instrument()
}
}

for (int i = 1; i <= 2; ++i)
for (int variant = 0; variant <= 2; ++variant)
{
hash_fun = func_selector(true, 2, i);
hash_fun_dbl = func_dbl_selector(true, 2, i);
hash_fun(input, sizeof(input), hash, ctx0);
hash_fun_dbl(input, sizeof(input), hash, input, sizeof(input), hash + 32, ctx0, ctx1);
for (int i = 1; i <= 2; ++i)
{
hash_fun = func_selector(true, variant, i);
hash_fun_dbl = func_dbl_selector(true, variant, i);
hash_fun(input, sizeof(input), hash, ctx0);
hash_fun_dbl(input, sizeof(input), hash, input, sizeof(input), hash + 32, ctx0, ctx1);
}
}

cryptonight_free_ctx(ctx0);
Expand Down Expand Up @@ -479,6 +482,7 @@ void minethd::consume_work()
iConsumeCnt++;
}

extern "C" void cnv1_mainloop_sandybridge_asm(cryptonight_ctx* ctx0);
extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx* ctx0);
extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx* ctx0);
extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx* ctx0, cryptonight_ctx* ctx1);
Expand All @@ -490,6 +494,26 @@ uint64_t min_cycles = uint64_t(-1);

ALIGN(64) uint8_t variant1_table[256];

void cryptonight_hash_v1_asm(const void* input, size_t len, void* output, cryptonight_ctx* ctx0)
{
keccak((const uint8_t *)input, len, ctx0->hash_state, 200);
cn_explode_scratchpad<MEMORY, false>((__m128i*)ctx0->hash_state, (__m128i*)ctx0->long_state);

#ifdef PERFORMANCE_TUNING
t1 = __rdtsc();
#endif
ctx0->input = input;
ctx0->variant1_table = variant1_table;
cnv1_mainloop_sandybridge_asm(ctx0);
#ifdef PERFORMANCE_TUNING
t2 = __rdtsc();
#endif

cn_implode_scratchpad<MEMORY, false>((__m128i*)ctx0->long_state, (__m128i*)ctx0->hash_state);
keccakf((uint64_t*)ctx0->hash_state, 24);
extra_hashes[ctx0->hash_state[0] & 3](ctx0->hash_state, 200, (char*)output);
}

template<int asm_version>
void cryptonight_hash_v2_asm(const void* input, size_t len, void* output, cryptonight_ctx* ctx0)
{
Expand Down Expand Up @@ -548,16 +572,23 @@ minethd::cn_hash_fun minethd::func_selector(bool bHaveAes, int variant, int asm_
// function as a two digit binary
// Digit order SOFT_AES, NO_PREFETCH, SHUFFLE, INT_MATH

if (bHaveAes && (variant == 2) && (asm_version > 0))
if (bHaveAes && (asm_version > 0))
{
switch (asm_version)
if (variant == 1)
{
return cryptonight_hash_v1_asm;
}
else if (variant == 2)
{
case 1:
// Intel Ivy Bridge (Xeon v2, Core i7/i5/i3 3xxx, Pentium G2xxx, Celeron G1xxx)
return cryptonight_hash_v2_asm<1>;
case 2:
// AMD Ryzen (1xxx and 2xxx series)
return cryptonight_hash_v2_asm<2>;
switch (asm_version)
{
case 1:
// Intel Ivy Bridge (Xeon v2, Core i7/i5/i3 3xxx, Pentium G2xxx, Celeron G1xxx)
return cryptonight_hash_v2_asm<1>;
case 2:
// AMD Ryzen (1xxx and 2xxx series)
return cryptonight_hash_v2_asm<2>;
}
}
}

Expand Down
2 changes: 1 addition & 1 deletion xmr-stak-cpu.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@
<ClInclude Include="webdesign.h" />
</ItemGroup>
<ItemGroup>
<MASM Include="crypto\asm\cnv2_main_loop.asm" />
<MASM Include="crypto\asm\cn_main_loop.asm" />
</ItemGroup>
<ItemGroup>
<None Include="crypto\asm\cnv2_double_main_loop_sandybridge.inc" />
Expand Down
10 changes: 5 additions & 5 deletions xmr-stak-cpu.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -254,11 +254,6 @@
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<MASM Include="crypto\asm\cnv2_main_loop.asm">
<Filter>Source Files\asm</Filter>
</MASM>
</ItemGroup>
<ItemGroup>
<None Include="crypto\asm\cnv2_main_loop_ivybridge.inc">
<Filter>Source Files\asm</Filter>
Expand All @@ -270,4 +265,9 @@
<Filter>Source Files\asm</Filter>
</None>
</ItemGroup>
<ItemGroup>
<MASM Include="crypto\asm\cn_main_loop.asm">
<Filter>Source Files\asm</Filter>
</MASM>
</ItemGroup>
</Project>

0 comments on commit 6eb5c37

Please sign in to comment.