Skip to content

Commit

Permalink
streebog: apply skunk improvements to veltor
Browse files Browse the repository at this point in the history
  • Loading branch information
tpruvot committed Aug 16, 2017
1 parent 6055af1 commit 1e71dc5
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 17 deletions.
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
AC_INIT([ccminer], [2.2], [], [ccminer], [http://github.com/tpruvot/ccminer])
AC_INIT([ccminer], [2.2.1], [], [ccminer], [http://github.com/tpruvot/ccminer])

AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM
Expand Down
2 changes: 1 addition & 1 deletion skunk/cuda_skunk_streebog.cu
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ static void GOST_E12(const uint2 shared[8][256],uint2 *const __restrict__ K, uin
__constant__ uint64_t target64[4];

__host__
void skunk_set_target(uint32_t* ptarget)
void skunk_streebog_set_target(uint32_t* ptarget)
{
cudaMemcpyToSymbol(target64, ptarget, 4*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
}
Expand Down
12 changes: 6 additions & 6 deletions skunk/skunk.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@ extern void x11_cubehash512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t s
extern void x13_fugue512_cpu_init(int thr_id, uint32_t threads);
extern void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x13_fugue512_cpu_free(int thr_id);
extern void streebog_cpu_hash_64_final(int thr_id, uint32_t threads, uint32_t *d_hash, uint32_t* d_resNonce);
extern void streebog_set_target(const uint32_t* ptarget);
extern void streebog_sm3_set_target(uint32_t* ptarget);
extern void streebog_sm3_hash_64_final(int thr_id, uint32_t threads, uint32_t *d_hash, uint32_t* d_resNonce);

// krnlx merged kernel (for high-end cards only)
extern void skunk_cpu_init(int thr_id, uint32_t threads);
extern void skunk_set_target(uint32_t* ptarget);
extern void skunk_streebog_set_target(uint32_t* ptarget);
extern void skunk_setBlock_80(int thr_id, void *pdata);
extern void skunk_cuda_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash);
extern void skunk_cuda_streebog(int thr_id, uint32_t threads, uint32_t *d_hash, uint32_t* d_resNonce);
Expand Down Expand Up @@ -117,10 +117,10 @@ extern "C" int scanhash_skunk(int thr_id, struct work* work, uint32_t max_nonce,
cudaMemset(d_resNonce[thr_id], 0xff, NBN*sizeof(uint32_t));
if (use_compat_kernels[thr_id]) {
skein512_cpu_setBlock_80(endiandata);
streebog_set_target(ptarget);
streebog_sm3_set_target(ptarget);
} else {
skunk_setBlock_80(thr_id, endiandata);
skunk_set_target(ptarget);
skunk_streebog_set_target(ptarget);
}

do {
Expand All @@ -129,7 +129,7 @@ extern "C" int scanhash_skunk(int thr_id, struct work* work, uint32_t max_nonce,
skein512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
streebog_cpu_hash_64_final(thr_id, throughput, d_hash[thr_id], d_resNonce[thr_id]);
streebog_sm3_hash_64_final(thr_id, throughput, d_hash[thr_id], d_resNonce[thr_id]);
} else {
skunk_cuda_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id]);
skunk_cuda_streebog(thr_id, throughput, d_hash[thr_id], d_resNonce[thr_id]);
Expand Down
5 changes: 3 additions & 2 deletions x11/cuda_streebog.cu
Original file line number Diff line number Diff line change
Expand Up @@ -806,10 +806,11 @@ void streebog_cpu_hash_64(int thr_id, uint32_t threads, uint32_t *d_hash)
#define T6(x) shared[6][x]
#define T7(x) shared[7][x]

// Streebog final for Veltor and skunk on SM 3.x
__constant__ uint64_t target64[4];

__host__
void streebog_set_target(const uint32_t* ptarget)
void streebog_sm3_set_target(uint32_t* ptarget)
{
cudaMemcpyToSymbol(target64,ptarget,4*sizeof(uint64_t),0,cudaMemcpyHostToDevice);
}
Expand Down Expand Up @@ -995,7 +996,7 @@ void streebog_gpu_hash_64_final(uint64_t *g_hash, uint32_t* resNonce)
}

__host__
void streebog_cpu_hash_64_final(int thr_id, uint32_t threads, uint32_t *d_hash,uint32_t* d_resNonce)
void streebog_sm3_hash_64_final(int thr_id, uint32_t threads, uint32_t *d_hash,uint32_t* d_resNonce)
{
dim3 grid((threads + TPB-1) / TPB);
dim3 block(TPB);
Expand Down
29 changes: 22 additions & 7 deletions x11/veltor.cu
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,17 @@ extern "C" {
#include "cuda_x11.h"

extern void skein512_cpu_setBlock_80(void *pdata);
extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
extern void skein512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int swap);
extern void x14_shabal512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void streebog_cpu_hash_64_final(int thr_id, uint32_t threads, uint32_t *d_hash, uint32_t* d_resNonce);
extern void streebog_set_target(const uint32_t* ptarget);

// for SM3.x
extern void streebog_sm3_set_target(uint32_t* ptarget);
extern void streebog_sm3_hash_64_final(int thr_id, uint32_t threads, uint32_t *d_hash, uint32_t* d_resNonce);

// for latest cards only
extern void skunk_cpu_init(int thr_id, uint32_t threads);
extern void skunk_streebog_set_target(uint32_t* ptarget);
extern void skunk_cuda_streebog(int thr_id, uint32_t threads, uint32_t *d_hash, uint32_t* d_resNonce);

#include <stdio.h>
#include <memory.h>
Expand All @@ -23,7 +29,7 @@ extern void streebog_set_target(const uint32_t* ptarget);
static uint32_t *d_hash[MAX_GPUS];
static uint32_t *d_resNonce[MAX_GPUS];

// veltorcoin CPU Hash
// veltor CPU Hash
extern "C" void veltorhash(void *output, const void *input)
{
unsigned char _ALIGN(128) hash[128] = { 0 };
Expand Down Expand Up @@ -53,6 +59,7 @@ extern "C" void veltorhash(void *output, const void *input)
}

static bool init[MAX_GPUS] = { 0 };
static bool use_compat_kernels[MAX_GPUS] = { 0 };

extern "C" int scanhash_veltor(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
{
Expand Down Expand Up @@ -80,7 +87,9 @@ extern "C" int scanhash_veltor(int thr_id, struct work* work, uint32_t max_nonce
}
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);

quark_skein512_cpu_init(thr_id, throughput);
skunk_cpu_init(thr_id, throughput);
use_compat_kernels[thr_id] = (cuda_arch[dev_id] < 500);

x11_shavite512_cpu_init(thr_id, throughput);

CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput), 0);
Expand All @@ -97,14 +106,20 @@ extern "C" int scanhash_veltor(int thr_id, struct work* work, uint32_t max_nonce
skein512_cpu_setBlock_80(endiandata);

cudaMemset(d_resNonce[thr_id], 0xff, NBN*sizeof(uint32_t));
streebog_set_target(ptarget);
if(use_compat_kernels[thr_id])
streebog_sm3_set_target(ptarget);
else
skunk_streebog_set_target(ptarget);

do {
int order = 0;
skein512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], 1); order++;
x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
x14_shabal512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
streebog_cpu_hash_64_final(thr_id, throughput, d_hash[thr_id], d_resNonce[thr_id]);
if(use_compat_kernels[thr_id])
streebog_sm3_hash_64_final(thr_id, throughput, d_hash[thr_id], d_resNonce[thr_id]);
else
skunk_cuda_streebog(thr_id, throughput, d_hash[thr_id], d_resNonce[thr_id]);

cudaMemcpy(h_resNonce, d_resNonce[thr_id], NBN*sizeof(uint32_t), cudaMemcpyDeviceToHost);

Expand Down

0 comments on commit 1e71dc5

Please sign in to comment.