Skip to content

Commit

Permalink
assembly patching
Browse files Browse the repository at this point in the history
- add cryptonight_r assembly
- fix cryptobight_v8 assembly
  • Loading branch information
psychocrypt committed Mar 6, 2019
1 parent dc0d5e1 commit 03a0ca2
Show file tree
Hide file tree
Showing 26 changed files with 6,760 additions and 175 deletions.
5 changes: 5 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -508,18 +508,23 @@ if(CMAKE_C_COMPILER_ID MATCHES "MSVC")
# asm optimized monero v8 code
enable_language(ASM_MASM)
set_property(SOURCE "xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.asm" PROPERTY ASM_MASM)
set_property(SOURCE "xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template.asm" PROPERTY ASM_MASM)
add_library(xmr-stak-asm
STATIC
"xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.asm"
"xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template.asm"
)
else()
# asm optimized monero v8 code
enable_language(ASM)
set_property(SOURCE "xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.S" PROPERTY CPP)
set_property(SOURCE "xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template.S" PROPERTY CPP)
set_source_files_properties("xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.S" PROPERTIES COMPILE_FLAGS "-x assembler-with-cpp")
set_source_files_properties("xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template.S" PROPERTIES COMPILE_FLAGS "-x assembler-with-cpp")
add_library(xmr-stak-asm
STATIC
"xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.S"
"xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template.S"
)
endif()

Expand Down
2 changes: 1 addition & 1 deletion xmrstak/backend/amd/OclCryptonightR_gen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ cl_program CryptonightR_get_program(GpuContext* ctx, xmrstak_algo algo, uint64_t
code_size = v4_random_math_init<cryptonight_r>(code, height);
break;
default:
printer::inst()->print_msg(LDEBUG, "CryptonightR_get_program: invalid algo %d", algo);
printer::inst()->print_msg(L0, "CryptonightR_get_program: invalid algo %d", algo);
return nullptr;
}

Expand Down
9 changes: 4 additions & 5 deletions xmrstak/backend/amd/minethd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,8 +186,7 @@ void minethd::work_main()

cpu::minethd::cn_on_new_job set_job;

cn_hash_fun hash_fun;
cpu::minethd::func_multi_selector<1>(hash_fun, set_job, ::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo);
cpu::minethd::func_multi_selector<1>(&cpu_ctx, set_job, ::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo);

uint8_t version = 0;
size_t lastPoolId = 0;
Expand Down Expand Up @@ -228,12 +227,12 @@ void minethd::work_main()
if(new_version >= coinDesc.GetMiningForkVersion())
{
miner_algo = coinDesc.GetMiningAlgo();
cpu::minethd::func_multi_selector<1>(hash_fun, set_job, ::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo);
cpu::minethd::func_multi_selector<1>(&cpu_ctx, set_job, ::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo);
}
else
{
miner_algo = coinDesc.GetMiningAlgoRoot();
cpu::minethd::func_multi_selector<1>(hash_fun, set_job, ::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo);
cpu::minethd::func_multi_selector<1>(&cpu_ctx, set_job, ::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo);
}
lastPoolId = oWork.iPoolId;
version = new_version;
Expand Down Expand Up @@ -282,7 +281,7 @@ void minethd::work_main()

*(uint32_t*)(bWorkBlob + 39) = results[i];

hash_fun(bWorkBlob, oWork.iWorkSize, bResult, &cpu_ctx, miner_algo);
cpu_ctx->hash_fn(bWorkBlob, oWork.iWorkSize, bResult, &cpu_ctx, miner_algo);
if ( (*((uint64_t*)(bResult + 24))) < oWork.iTarget)
executor::inst()->push_event(ex_event(job_result(oWork.sJobID, results[i], bResult, iThreadNo, miner_algo), oWork.iPoolId));
else
Expand Down
107 changes: 107 additions & 0 deletions xmrstak/backend/cpu/crypto/CryptonightR_gen.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
#include <cstring>

typedef void(*void_func)();

#include "xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template.h"
#include "cryptonight_aesni.h"
#include "cryptonight.h"
#include "xmrstak/misc/console.hpp"

static inline void add_code(uint8_t* &p, void (*p1)(), void (*p2)())
{
const ptrdiff_t size = reinterpret_cast<const uint8_t*>(p2) - reinterpret_cast<const uint8_t*>(p1);
if (size > 0) {
memcpy(p, reinterpret_cast<void*>(p1), size);
p += size;
}
}

static inline void add_random_math(uint8_t* &p, const V4_Instruction* code, int code_size, const void_func* instructions, const void_func* instructions_mov, bool is_64_bit, int selected_asm)
{
uint32_t prev_rot_src = (uint32_t)(-1);

for (int i = 0;; ++i) {
const V4_Instruction inst = code[i];
if (inst.opcode == RET) {
break;
}

uint8_t opcode = (inst.opcode == MUL) ? inst.opcode : (inst.opcode + 2);
uint8_t dst_index = inst.dst_index;
uint8_t src_index = inst.src_index;

const uint32_t a = inst.dst_index;
const uint32_t b = inst.src_index;
const uint8_t c = opcode | (dst_index << V4_OPCODE_BITS) | (((src_index == 8) ? dst_index : src_index) << (V4_OPCODE_BITS + V4_DST_INDEX_BITS));

switch (inst.opcode) {
case ROR:
case ROL:
if (b != prev_rot_src) {
prev_rot_src = b;
add_code(p, instructions_mov[c], instructions_mov[c + 1]);
}
break;
}

if (a == prev_rot_src) {
prev_rot_src = (uint32_t)(-1);
}

void_func begin = instructions[c];

// AMD == 2
if ((selected_asm == 2) && (inst.opcode == MUL && !is_64_bit)) {
// AMD Bulldozer has latency 4 for 32-bit IMUL and 6 for 64-bit IMUL
// Always use 32-bit IMUL for AMD Bulldozer in 32-bit mode - skip prefix 0x48 and change 0x49 to 0x41
uint8_t* prefix = reinterpret_cast<uint8_t*>(begin);

if (*prefix == 0x49) {
*(p++) = 0x41;
}

begin = reinterpret_cast<void_func>(prefix + 1);
}

add_code(p, begin, instructions[c + 1]);

if (inst.opcode == ADD) {
*(uint32_t*)(p - sizeof(uint32_t) - (is_64_bit ? 3 : 0)) = inst.C;
if (is_64_bit) {
prev_rot_src = (uint32_t)(-1);
}
}
}
}

void v4_compile_code(cryptonight_ctx* ctx, int code_size)
{
printer::inst()->print_msg(LDEBUG, "CryptonightR update ASM code");
const int allocation_size = 65536;

if(ctx->fun_data == nullptr)
ctx->fun_data = static_cast<uint8_t*>(allocateExecutableMemory(allocation_size));
else
unprotectExecutableMemory(ctx->fun_data, allocation_size);

uint8_t* p0 = ctx->fun_data;
uint8_t* p = p0;
if(ctx->fun_data != nullptr)
{

add_code(p, CryptonightR_template_part1, CryptonightR_template_part2);
add_random_math(p, ctx->cn_r_ctx.code, code_size, instructions, instructions_mov, false, ctx->asm_version);
add_code(p, CryptonightR_template_part2, CryptonightR_template_part3);
*(int*)(p - 4) = static_cast<int>((((const uint8_t*)CryptonightR_template_mainloop) - ((const uint8_t*)CryptonightR_template_part1)) - (p - p0));
add_code(p, CryptonightR_template_part3, CryptonightR_template_end);


ctx->loop_fn = reinterpret_cast<cn_mainloop_fun>(ctx->fun_data);
protectExecutableMemory(ctx->fun_data, allocation_size);
flushInstructionCache(ctx->fun_data, p - p0);
}
else
{
printer::inst()->print_msg(L0, "Error: CPU CryptonightR update ASM code ctx->fun_data is a nullptr");
}
}
Loading

0 comments on commit 03a0ca2

Please sign in to comment.