From 7a051638949a01c4cc22fae3850176c4e9bfdcd3 Mon Sep 17 00:00:00 2001
From: Tanguy Pruvot <tanguy.pruvot@gmail.com>
Date: Mon, 9 Apr 2018 18:22:07 +0200
Subject: [PATCH] TITAN V fix + code style + spaces (to be continued)

revert some useless changes, to continue...
---
 ccminer.cpp                 |  66 ++++-----
 crypto/cn_blake.cuh         | 269 ++++++++++++++++++------------------
 crypto/cn_keccak.cuh        |  14 +-
 crypto/cryptonight-core.cu  |  33 +++--
 crypto/cryptonight-extra.cu |  93 +++++++------
 crypto/cryptonight.cu       |   6 +-
 crypto/xmr-rpc.cpp          |   9 +-
 7 files changed, 249 insertions(+), 241 deletions(-)

diff --git a/ccminer.cpp b/ccminer.cpp
index 344b364f1c..89a4f2f502 100644
--- a/ccminer.cpp
+++ b/ccminer.cpp
@@ -269,7 +269,7 @@ Options:\n\
 			lyra2v2     VertCoin\n\
 			lyra2z      ZeroCoin (3rd impl)\n\
 			myr-gr      Myriad-Groestl\n\
-			monero      XMR cryptonight v7 (new)\n\
+			monero      XMR cryptonight (v7)\n\
 			neoscrypt   FeatherCoin, Phoenix, UFO...\n\
 			nist5       NIST5 (TalkCoin)\n\
 			penta       Pentablake hash (5x Blake 512)\n\
@@ -577,13 +577,10 @@ static bool get_blocktemplate(CURL *curl, struct work *work);
 
 void get_currentalgo(char* buf, int sz)
 {
-  int algo = opt_algo;
-
-  if (algo == ALGO_CRYPTONIGHT) {
-    algo = get_cryptonight_algo(cryptonight_fork);
-  }
-
-  snprintf(buf, sz, "%s", algo_names[algo]);
+	int algo = opt_algo;
+	if (algo == ALGO_CRYPTONIGHT)
+		algo = get_cryptonight_algo(cryptonight_fork);
+	snprintf(buf, sz, "%s", algo_names[algo]);
 }
 
 void format_hashrate(double hashrate, char *output)
@@ -1828,7 +1825,6 @@ static void *miner_thread(void *userdata)
 	bool extrajob = false;
 	char s[16];
 	int rc = 0;
-  int variant;
 
 	memset(&work, 0, sizeof(work)); // prevent work from being used uninitialized
 
@@ -2366,14 +2362,13 @@ static void *miner_thread(void *userdata)
 			rc = scanhash_cryptolight(thr_id, &work, max_nonce, &hashes_done);
 			break;
 		case ALGO_CRYPTONIGHT:
-      variant = 0;
-
-      if (cryptonight_fork > 1) {
-        variant = ((((unsigned char*)work.data)[0] >= cryptonight_fork) ? ((unsigned char*)work.data)[0] - cryptonight_fork + 1 : 0 );
-      }
-
-			rc = scanhash_cryptonight(thr_id, &work, max_nonce, &hashes_done, variant);
+		{
+			int cn_variant = 0;
+			if (cryptonight_fork > 1 && ((unsigned char*)work.data)[0] >= cryptonight_fork)
+				cn_variant = ((unsigned char*)work.data)[0] - cryptonight_fork + 1;
+			rc = scanhash_cryptonight(thr_id, &work, max_nonce, &hashes_done, cn_variant);
 			break;
+		}
 		case ALGO_DECRED:
 			rc = scanhash_decred(thr_id, &work, max_nonce, &hashes_done);
 			break;
@@ -3136,27 +3131,24 @@ void parse_arg(int key, char *arg)
 			}
 		}
 
-    //fix cryptonight
-    switch (opt_algo) {
-      case ALGO_MONERO:
-        opt_algo = ALGO_CRYPTONIGHT;
-        cryptonight_fork = 7;
-        break;
-
-      case ALGO_GRAFT:
-        opt_algo = ALGO_CRYPTONIGHT;
-        cryptonight_fork = 8;
-        break;
-
-      case ALGO_STELLITE:
-        opt_algo = ALGO_CRYPTONIGHT;
-        cryptonight_fork = 3;
-        break;
-
-      case ALGO_CRYPTONIGHT:
-        cryptonight_fork = 1;
-        break;
-    }
+		// cryptonight variants
+		switch (opt_algo) {
+		case ALGO_MONERO:
+			opt_algo = ALGO_CRYPTONIGHT;
+			cryptonight_fork = 7;
+			break;
+		case ALGO_GRAFT:
+			opt_algo = ALGO_CRYPTONIGHT;
+			cryptonight_fork = 8;
+			break;
+		case ALGO_STELLITE:
+			opt_algo = ALGO_CRYPTONIGHT;
+			cryptonight_fork = 3;
+			break;
+		case ALGO_CRYPTONIGHT:
+			cryptonight_fork = 1;
+			break;
+		}
 
 		break;
 	case 'b':
diff --git a/crypto/cn_blake.cuh b/crypto/cn_blake.cuh
index 3f410f73d6..5c0d09f27a 100644
--- a/crypto/cn_blake.cuh
+++ b/crypto/cn_blake.cuh
@@ -1,3 +1,4 @@
+//#include <cuda_helper.h>
 
 typedef struct {
   uint32_t h[8], s[4], t[2];
@@ -6,24 +7,25 @@ typedef struct {
 } blake_state;
 
 #define U8TO32(p) \
-    (((uint32_t)((p)[0]) << 24) | ((uint32_t)((p)[1]) << 16) |    \
-     ((uint32_t)((p)[2]) <<  8) | ((uint32_t)((p)[3])      ))
+	(((uint32_t)((p)[0]) << 24) | ((uint32_t)((p)[1]) << 16) | \
+	 ((uint32_t)((p)[2]) <<  8) | ((uint32_t)((p)[3])      ))
+
 #define U32TO8(p, v) \
-    (p)[0] = (uint8_t)((v) >> 24); (p)[1] = (uint8_t)((v) >> 16); \
-    (p)[2] = (uint8_t)((v) >>  8); (p)[3] = (uint8_t)((v)      );
+	(p)[0] = (uint8_t)((v) >> 24); (p)[1] = (uint8_t)((v) >> 16); \
+	(p)[2] = (uint8_t)((v) >>  8); (p)[3] = (uint8_t)((v)      );
+
 #define BLAKE_ROT(x,n) ROTR32(x, n)
 #define BLAKE_G(a,b,c,d,e)                                      \
-    v[a] += (m[d_blake_sigma[i][e]] ^ d_blake_cst[d_blake_sigma[i][e+1]]) + v[b]; \
-    v[d] = BLAKE_ROT(v[d] ^ v[a],16);                           \
-    v[c] += v[d];                                         \
-    v[b] = BLAKE_ROT(v[b] ^ v[c],12);                           \
-    v[a] += (m[d_blake_sigma[i][e+1]] ^ d_blake_cst[d_blake_sigma[i][e]])+v[b];   \
-    v[d] = BLAKE_ROT(v[d] ^ v[a], 8);                           \
-    v[c] += v[d];                                         \
-    v[b] = BLAKE_ROT(v[b] ^ v[c], 7);
-
-__constant__ uint8_t d_blake_sigma[14][16] =
-{
+	v[a] += (m[d_blake_sigma[i][e]] ^ d_blake_cst[d_blake_sigma[i][e+1]]) + v[b]; \
+	v[d] = BLAKE_ROT(v[d] ^ v[a],16); \
+	v[c] += v[d]; \
+	v[b] = BLAKE_ROT(v[b] ^ v[c],12); \
+	v[a] += (m[d_blake_sigma[i][e+1]] ^ d_blake_cst[d_blake_sigma[i][e]]) + v[b]; \
+	v[d] = BLAKE_ROT(v[d] ^ v[a], 8); \
+	v[c] += v[d]; \
+	v[b] = BLAKE_ROT(v[b] ^ v[c], 7);
+
+__constant__ uint8_t d_blake_sigma[14][16] = {
 	{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
 	{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
 	{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
@@ -39,137 +41,140 @@ __constant__ uint8_t d_blake_sigma[14][16] =
 	{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
 	{7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}
 };
-__constant__ uint32_t d_blake_cst[16]
-= {
+
+__constant__ uint32_t d_blake_cst[16] = {
 	0x243F6A88, 0x85A308D3, 0x13198A2E, 0x03707344,
 	0xA4093822, 0x299F31D0, 0x082EFA98, 0xEC4E6C89,
 	0x452821E6, 0x38D01377, 0xBE5466CF, 0x34E90C6C,
 	0xC0AC29B7, 0xC97C50DD, 0x3F84D5B5, 0xB5470917
 };
 
-__device__ void cn_blake_compress(blake_state * __restrict__ S, const uint8_t * __restrict__ block)
+__device__
+void cn_blake_compress(blake_state * __restrict__ S, const uint8_t * __restrict__ block)
 {
-    uint32_t v[16], m[16], i;
-
-    for (i = 0; i < 16; ++i) m[i] = U8TO32(block + i * 4);
-    for (i = 0; i < 8;  ++i) v[i] = S->h[i];
-    v[ 8] = S->s[0] ^ 0x243F6A88;
-    v[ 9] = S->s[1] ^ 0x85A308D3;
-    v[10] = S->s[2] ^ 0x13198A2E;
-    v[11] = S->s[3] ^ 0x03707344;
-    v[12] = 0xA4093822;
-    v[13] = 0x299F31D0;
-    v[14] = 0x082EFA98;
-    v[15] = 0xEC4E6C89;
-
-    if (S->nullt == 0) {
-        v[12] ^= S->t[0];
-        v[13] ^= S->t[0];
-        v[14] ^= S->t[1];
-        v[15] ^= S->t[1];
-    }
-
-    for (i = 0; i < 14; ++i) {
-        BLAKE_G(0, 4,  8, 12,  0);
-        BLAKE_G(1, 5,  9, 13,  2);
-        BLAKE_G(2, 6, 10, 14,  4);
-        BLAKE_G(3, 7, 11, 15,  6);
-        BLAKE_G(3, 4,  9, 14, 14);
-        BLAKE_G(2, 7,  8, 13, 12);
-        BLAKE_G(0, 5, 10, 15,  8);
-        BLAKE_G(1, 6, 11, 12, 10);
-    }
-
-    for (i = 0; i < 16; ++i) S->h[i % 8] ^= v[i];
-    for (i = 0; i < 8;  ++i) S->h[i] ^= S->s[i % 4];
+	uint32_t v[16], m[16], i;
+
+	for (i = 0; i < 16; ++i) m[i] = U8TO32(block + i * 4);
+	for (i = 0; i < 8;  ++i) v[i] = S->h[i];
+	v[ 8] = S->s[0] ^ 0x243F6A88;
+	v[ 9] = S->s[1] ^ 0x85A308D3;
+	v[10] = S->s[2] ^ 0x13198A2E;
+	v[11] = S->s[3] ^ 0x03707344;
+	v[12] = 0xA4093822;
+	v[13] = 0x299F31D0;
+	v[14] = 0x082EFA98;
+	v[15] = 0xEC4E6C89;
+
+	if (S->nullt == 0) {
+		v[12] ^= S->t[0];
+		v[13] ^= S->t[0];
+		v[14] ^= S->t[1];
+		v[15] ^= S->t[1];
+	}
+
+	for (i = 0; i < 14; ++i) {
+		BLAKE_G(0, 4,  8, 12,  0);
+		BLAKE_G(1, 5,  9, 13,  2);
+		BLAKE_G(2, 6, 10, 14,  4);
+		BLAKE_G(3, 7, 11, 15,  6);
+		BLAKE_G(3, 4,  9, 14, 14);
+		BLAKE_G(2, 7,  8, 13, 12);
+		BLAKE_G(0, 5, 10, 15,  8);
+		BLAKE_G(1, 6, 11, 12, 10);
+	}
+
+	for (i = 0; i < 16; ++i) S->h[i % 8] ^= v[i];
+	for (i = 0; i < 8;  ++i) S->h[i] ^= S->s[i % 4];
 }
 
 __device__ void cn_blake_update(blake_state * __restrict__ S, const uint8_t * __restrict__ data, uint64_t datalen)
 {
-    int left = S->buflen >> 3;
-    int fill = 64 - left;
-
-    if (left && (((datalen >> 3) & 0x3F) >= (unsigned) fill)) {
-        memcpy((void *) (S->buf + left), (void *) data, fill);
-        S->t[0] += 512;
-        if (S->t[0] == 0) S->t[1]++;
-        cn_blake_compress(S, S->buf);
-        data += fill;
-        datalen -= (fill << 3);
-        left = 0;
-    }
-
-    while (datalen >= 512) {
-        S->t[0] += 512;
-        if (S->t[0] == 0) S->t[1]++;
-        cn_blake_compress(S, data);
-        data += 64;
-        datalen -= 512;
-    }
-
-    if (datalen > 0) {
-        memcpy((void *) (S->buf + left), (void *) data, datalen >> 3);
-        S->buflen = (left << 3) + datalen;
-    } else {
-        S->buflen = 0;
-    }
+	int left = S->buflen >> 3;
+	int fill = 64 - left;
+
+	if (left && (((datalen >> 3) & 0x3F) >= (unsigned) fill)) {
+		memcpy((void *) (S->buf + left), (void *) data, fill);
+		S->t[0] += 512;
+		if (S->t[0] == 0) S->t[1]++;
+		cn_blake_compress(S, S->buf);
+		data += fill;
+		datalen -= (fill << 3);
+		left = 0;
+	}
+
+	while (datalen >= 512) {
+		S->t[0] += 512;
+		if (S->t[0] == 0) S->t[1]++;
+		cn_blake_compress(S, data);
+		data += 64;
+		datalen -= 512;
+	}
+
+	if (datalen > 0) {
+		memcpy((void *) (S->buf + left), (void *) data, datalen >> 3);
+		S->buflen = (left << 3) + datalen;
+	} else {
+		S->buflen = 0;
+	}
 }
 
-__device__ void cn_blake_final(blake_state * __restrict__ S, uint8_t * __restrict__ digest)
+__device__
+void cn_blake_final(blake_state * __restrict__ S, uint8_t * __restrict__ digest)
 {
-    const uint8_t padding[] = {
-        0x80,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
-    };
-    uint8_t pa = 0x81, pb = 0x01;
-    uint8_t msglen[8];
-    uint32_t lo = S->t[0] + S->buflen, hi = S->t[1];
-    if (lo < (unsigned) S->buflen) hi++;
-    U32TO8(msglen + 0, hi);
-    U32TO8(msglen + 4, lo);
-
-    if (S->buflen == 440) {
-        S->t[0] -= 8;
-        cn_blake_update(S, &pa, 8);
-    } else {
-        if (S->buflen < 440) {
-            if (S->buflen == 0) S->nullt = 1;
-            S->t[0] -= 440 - S->buflen;
-            cn_blake_update(S, padding, 440 - S->buflen);
-        } else {
-            S->t[0] -= 512 - S->buflen;
-            cn_blake_update(S, padding, 512 - S->buflen);
-            S->t[0] -= 440;
-            cn_blake_update(S, padding + 1, 440);
-            S->nullt = 1;
-        }
-        cn_blake_update(S, &pb, 8);
-        S->t[0] -= 8;
-    }
-    S->t[0] -= 64;
-    cn_blake_update(S, msglen, 64);
-
-    U32TO8(digest +  0, S->h[0]);
-    U32TO8(digest +  4, S->h[1]);
-    U32TO8(digest +  8, S->h[2]);
-    U32TO8(digest + 12, S->h[3]);
-    U32TO8(digest + 16, S->h[4]);
-    U32TO8(digest + 20, S->h[5]);
-    U32TO8(digest + 24, S->h[6]);
-    U32TO8(digest + 28, S->h[7]);
+	const uint8_t padding[] = {
+		0x80,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+	};
+	uint8_t pa = 0x81, pb = 0x01;
+	uint8_t msglen[8];
+	uint32_t lo = S->t[0] + S->buflen, hi = S->t[1];
+	if (lo < (unsigned) S->buflen) hi++;
+	U32TO8(msglen + 0, hi);
+	U32TO8(msglen + 4, lo);
+
+	if (S->buflen == 440) {
+		S->t[0] -= 8;
+		cn_blake_update(S, &pa, 8);
+	} else {
+		if (S->buflen < 440) {
+			if (S->buflen == 0) S->nullt = 1;
+			S->t[0] -= 440 - S->buflen;
+			cn_blake_update(S, padding, 440 - S->buflen);
+		} else {
+			S->t[0] -= 512 - S->buflen;
+			cn_blake_update(S, padding, 512 - S->buflen);
+			S->t[0] -= 440;
+			cn_blake_update(S, padding + 1, 440);
+			S->nullt = 1;
+		}
+		cn_blake_update(S, &pb, 8);
+		S->t[0] -= 8;
+	}
+	S->t[0] -= 64;
+	cn_blake_update(S, msglen, 64);
+
+	U32TO8(digest +  0, S->h[0]);
+	U32TO8(digest +  4, S->h[1]);
+	U32TO8(digest +  8, S->h[2]);
+	U32TO8(digest + 12, S->h[3]);
+	U32TO8(digest + 16, S->h[4]);
+	U32TO8(digest + 20, S->h[5]);
+	U32TO8(digest + 24, S->h[6]);
+	U32TO8(digest + 28, S->h[7]);
 }
 
-__device__ void cn_blake(const uint8_t * __restrict__ in, uint64_t inlen, uint8_t * __restrict__ out)
+__device__
+void cn_blake(const uint8_t * __restrict__ in, uint64_t inlen, uint8_t * __restrict__ out)
 {
-    blake_state bs;
-    blake_state *S = (blake_state *)&bs;
-
-    S->h[0] = 0x6A09E667; S->h[1] = 0xBB67AE85; S->h[2] = 0x3C6EF372;
-    S->h[3] = 0xA54FF53A; S->h[4] = 0x510E527F; S->h[5] = 0x9B05688C;
-    S->h[6] = 0x1F83D9AB; S->h[7] = 0x5BE0CD19;
-    S->t[0] = S->t[1] = S->buflen = S->nullt = 0;
-    S->s[0] = S->s[1] = S->s[2] = S->s[3] = 0;
-    
-    cn_blake_update(S, (uint8_t *)in, inlen * 8);
-    cn_blake_final(S, (uint8_t *)out);
-}
\ No newline at end of file
+	blake_state bs;
+	blake_state *S = (blake_state *)&bs;
+
+	S->h[0] = 0x6A09E667; S->h[1] = 0xBB67AE85; S->h[2] = 0x3C6EF372;
+	S->h[3] = 0xA54FF53A; S->h[4] = 0x510E527F; S->h[5] = 0x9B05688C;
+	S->h[6] = 0x1F83D9AB; S->h[7] = 0x5BE0CD19;
+	S->t[0] = S->t[1] = S->buflen = S->nullt = 0;
+	S->s[0] = S->s[1] = S->s[2] = S->s[3] = 0;
+
+	cn_blake_update(S, (uint8_t *)in, inlen * 8);
+	cn_blake_final(S, (uint8_t *)out);
+}
diff --git a/crypto/cn_keccak.cuh b/crypto/cn_keccak.cuh
index 985f798d60..c6f5908f61 100644
--- a/crypto/cn_keccak.cuh
+++ b/crypto/cn_keccak.cuh
@@ -1,5 +1,4 @@
-__constant__ uint64_t keccakf_rndc[24] =
-{
+__constant__ uint64_t keccakf_rndc[24] = {
 	0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
 	0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
 	0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
@@ -34,7 +33,8 @@ __constant__ uint64_t keccakf_rndc[24] =
 #define rotl64_2(x, y) rotl64_1(((x) >> 32) | ((x) << 32), (y))
 #define bitselect(a, b, c) ((a) ^ ((c) & ((b) ^ (a))))
 
-__device__ __forceinline__ void cn_keccakf2(uint64_t *s)
+__device__ __forceinline__
+void cn_keccakf2(uint64_t *s)
 {
 	uint8_t i;
 
@@ -91,7 +91,8 @@ __device__ __forceinline__ void cn_keccakf2(uint64_t *s)
 	}
 }
 
-__device__ __forceinline__ void cn_keccakf(uint64_t *s)
+__device__ __forceinline__
+void cn_keccakf(uint64_t *s)
 {
 	uint64_t bc[5], tmpxor[5], tmp1, tmp2;
 
@@ -193,7 +194,8 @@ __device__ __forceinline__ void cn_keccakf(uint64_t *s)
 	}
 }
 
-__device__ __forceinline__ void cn_keccak(const uint32_t * __restrict__ in, uint64_t * __restrict__ md)
+__device__ __forceinline__
+void cn_keccak(const uint32_t * __restrict__ in, uint64_t * __restrict__ md)
 {
 	uint64_t st[25];
 
@@ -206,4 +208,4 @@ __device__ __forceinline__ void cn_keccak(const uint32_t * __restrict__ in, uint
 
 	MEMCPY8(md, st, 25);
 	return;
-}
\ No newline at end of file
+}
diff --git a/crypto/cryptonight-core.cu b/crypto/cryptonight-core.cu
index 83a2098086..cce3e1422f 100644
--- a/crypto/cryptonight-core.cu
+++ b/crypto/cryptonight-core.cu
@@ -145,10 +145,11 @@ void cryptonight_core_gpu_phase2(uint32_t threads, int bfactor, int partidx, uin
 
 	a = ctx_a[sub];
 	d[1] = ctx_b[sub];
-#pragma unroll 2
+
+	#pragma unroll 2
 	for (i = start; i < end; ++i)
 	{
-#pragma unroll 2
+		#pragma unroll
 		for (int x = 0; x < 2; ++x)
 		{
 			j = ((__shfl(a, 0, 4) & 0x1FFFF0) >> 2) + sub;
@@ -180,7 +181,7 @@ void cryptonight_core_gpu_phase2(uint32_t threads, int bfactor, int partidx, uin
 			zz[1] = __shfl(yy[1], 0, 4);
 
 			t1[1] = __shfl(d[x], 1, 4);
-#pragma unroll
+			#pragma unroll
 			for (k = 0; k < 2; k++)
 				t2[k] = __shfl(a, k + sub2, 4);
 
@@ -214,14 +215,14 @@ void cryptonight_core_gpu_phase3(int threads, const uint32_t * __restrict__ long
 	{
 		const int sub = (threadIdx.x & 7) << 2;
 		const uint32_t *longstate = &long_state[(thread << 19) + sub];
-		uint32_t key[40], text[4], i, j;
+		uint32_t key[40], text[4];
 		MEMCPY8(key, d_ctx_key2 + thread * 40, 20);
 		MEMCPY8(text, d_ctx_state + thread * 50 + sub + 16, 2);
 
-		for(i = 0; i < 0x80000; i += 32)
+		for(int i = 0; i < 0x80000; i += 32)
 		{
-#pragma unroll
-			for(j = 0; j < 4; ++j)
+			#pragma unroll
+			for(int j = 0; j < 4; ++j)
 				text[j] ^= longstate[i + j];
 
 			cn_aes_pseudo_round_mut(sharedMemory, text, key);
@@ -239,26 +240,24 @@ void cryptonight_core_cpu_hash(int thr_id, int blocks, int threads, uint32_t *d_
 	dim3 block4(threads << 2);
 	dim3 block8(threads << 3);
 
-	uint32_t i;
 	const uint32_t bfactor = (uint32_t) device_bfactor[thr_id];
 	const uint32_t partcount = 1 << bfactor;
 	const uint32_t throughput = (uint32_t) (blocks*threads);
-
 	const int bsleep = bfactor ? 100 : 0;
-	//const int dev_id = device_map[thr_id];
+	uint32_t i;
 
-	cryptonight_core_gpu_phase1 <<< grid, block8 >>>(throughput, d_long_state, d_ctx_state, d_ctx_key1);
-	exit_if_cudaerror(thr_id, __FILE__, __LINE__);
+	cryptonight_core_gpu_phase1 <<<grid, block8>>> (throughput, d_long_state, d_ctx_state, d_ctx_key1);
+	exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
 	if(partcount > 1) usleep(bsleep);
 
 	for(i = 0; i < partcount; i++)
 	{
-		cryptonight_core_gpu_phase2 <<< grid, block4 >>>(throughput, bfactor, i, d_long_state, d_ctx_a, d_ctx_b, variant, d_ctx_tweak1_2);
-		exit_if_cudaerror(thr_id, __FILE__, __LINE__);
+		cryptonight_core_gpu_phase2 <<<grid, block4>>> (throughput, bfactor, i, d_long_state, d_ctx_a, d_ctx_b, variant, d_ctx_tweak1_2);
+		exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
 		if(partcount > 1) usleep(bsleep);
 	}
 	cudaDeviceSynchronize();
-	exit_if_cudaerror(thr_id, __FILE__, __LINE__);
-	cryptonight_core_gpu_phase3 <<< grid, block8 >>>(throughput, d_long_state, d_ctx_state, d_ctx_key2);
-	exit_if_cudaerror(thr_id, __FILE__, __LINE__);
+	exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
+	cryptonight_core_gpu_phase3 <<<grid, block8>>> (throughput, d_long_state, d_ctx_state, d_ctx_key2);
+	exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
 }
diff --git a/crypto/cryptonight-extra.cu b/crypto/cryptonight-extra.cu
index 31eaf4ed83..97c1c3991a 100644
--- a/crypto/cryptonight-extra.cu
+++ b/crypto/cryptonight-extra.cu
@@ -9,12 +9,12 @@
 #include <cuda_helper.h>
 #include "cryptonight.h"
 
-typedef unsigned char BitSequence;
-typedef unsigned long long DataLength;
+typedef uint8_t BitSequence;
+typedef uint64_t DataLength;
 
-static uint32_t *d_input[MAX_GPUS];
+static uint32_t *d_input[MAX_GPUS] = { 0 };
 static uint32_t *d_target[MAX_GPUS];
-static uint32_t *d_resultNonce[MAX_GPUS];
+static uint32_t *d_result[MAX_GPUS];
 
 #include "cn_keccak.cuh"
 #include "cn_blake.cuh"
@@ -22,8 +22,7 @@ static uint32_t *d_resultNonce[MAX_GPUS];
 #include "cn_jh.cuh"
 #include "cn_skein.cuh"
 
-__constant__ uint8_t d_sub_byte[16][16] =
-{
+__constant__ uint8_t d_sub_byte[16][16] = {
 	{0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76},
 	{0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0},
 	{0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15},
@@ -42,37 +41,40 @@ __constant__ uint8_t d_sub_byte[16][16] =
 	{0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16}
 };
 
-__device__ __forceinline__ void cryptonight_aes_set_key(uint32_t * __restrict__ key, const uint32_t * __restrict__ data)
+__device__ __forceinline__
+void cryptonight_aes_set_key(uint32_t * __restrict__ key, const uint32_t * __restrict__ data)
 {
-	int i, j;
-	uint8_t temp[4];
-	const uint32_t aes_gf[10] =
-	{
+	const uint32_t aes_gf[10] = {
 		0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36
 	};
 
 	MEMCPY4(key, data, 8);
-#pragma unroll
-	for(i = 8; i < 40; i++)
+	#pragma unroll
+	for(int i = 8; i < 40; i++)
 	{
+		uint8_t temp[4];
 		*(uint32_t *)temp = key[i - 1];
-		if(i % 8 == 0)
-		{
+
+		if(i % 8 == 0) {
 			*(uint32_t *)temp = ROTR32(*(uint32_t *)temp, 8);
-			for(j = 0; j < 4; j++)
+			for(int j = 0; j < 4; j++)
 				temp[j] = d_sub_byte[(temp[j] >> 4) & 0x0f][temp[j] & 0x0f];
 			*(uint32_t *)temp ^= aes_gf[i / 8 - 1];
 		}
-		else
-			if(i % 8 == 4)
-#pragma unroll
-				for(j = 0; j < 4; j++)
-					temp[j] = d_sub_byte[(temp[j] >> 4) & 0x0f][temp[j] & 0x0f];
+		else if(i % 8 == 4) {
+			#pragma unroll
+			for(int j = 0; j < 4; j++)
+				temp[j] = d_sub_byte[(temp[j] >> 4) & 0x0f][temp[j] & 0x0f];
+		}
+
 		key[i] = key[(i - 8)] ^ *(uint32_t *)temp;
 	}
 }
 
-__global__ void cryptonight_extra_gpu_prepare(int threads, const uint32_t * __restrict__ d_input, uint32_t startNonce, uint32_t * __restrict__ d_ctx_state, uint32_t * __restrict__ d_ctx_a, uint32_t * __restrict__ d_ctx_b, uint32_t * __restrict__ d_ctx_key1, uint32_t * __restrict__ d_ctx_key2, int variant, uint32_t * d_ctx_tweak1_2)
+__global__
+void cryptonight_extra_gpu_prepare(int threads, const uint32_t * __restrict__ d_input, uint32_t startNonce,
+	uint32_t * __restrict__ d_ctx_state, uint32_t * __restrict__ d_ctx_a, uint32_t * __restrict__ d_ctx_b,
+	uint32_t * __restrict__ d_ctx_key1, uint32_t * __restrict__ d_ctx_key2, int variant, uint32_t * d_ctx_tweak1_2)
 {
 	int thread = (blockDim.x * blockIdx.x + threadIdx.x);
 
@@ -117,10 +119,12 @@ __global__ void cryptonight_extra_gpu_prepare(int threads, const uint32_t * __re
 	}
 }
 
-__global__ void cryptonight_extra_gpu_final(int threads, uint32_t startNonce, const uint32_t * __restrict__ d_target, uint32_t * __restrict__ resNonce, uint32_t * __restrict__ d_ctx_state)
+__global__
+void cryptonight_extra_gpu_final(int threads, uint32_t startNonce, const uint32_t * __restrict__ d_target,
+	uint32_t * __restrict__ resNonce, uint32_t * __restrict__ d_ctx_state)
 {
 	const int thread = blockDim.x * blockIdx.x + threadIdx.x;
-	
+
 	if(thread < threads)
 	{
 		int i;
@@ -172,59 +176,64 @@ __global__ void cryptonight_extra_gpu_final(int threads, uint32_t startNonce, co
 		if(rc == true)
 		{
 			uint32_t tmp = atomicExch(resNonce, nonce);
-			if(tmp != 0xffffffff)
+			if(tmp != UINT32_MAX)
 				resNonce[1] = tmp;
 		}
 	}
 }
 
-__host__ void cryptonight_extra_cpu_setData(int thr_id, const void *data, const void *pTargetIn)
+__host__
+void cryptonight_extra_cpu_setData(int thr_id, const void *data, const void *pTargetIn)
 {
 	cudaMemcpy(d_input[thr_id], data, 19 * sizeof(uint32_t), cudaMemcpyHostToDevice);
 	cudaMemcpy(d_target[thr_id], pTargetIn, 8 * sizeof(uint32_t), cudaMemcpyHostToDevice);
-	cudaMemset(d_resultNonce[thr_id], 0xFF, 2 * sizeof(uint32_t));
-	exit_if_cudaerror(thr_id, __FILE__, __LINE__);
+	cudaMemset(d_result[thr_id], 0xFF, 2 * sizeof(uint32_t));
+	exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
 }
 
-__host__ void cryptonight_extra_cpu_init(int thr_id)
+__host__
+void cryptonight_extra_cpu_init(int thr_id)
 {
 	cudaMalloc(&d_input[thr_id], 19 * sizeof(uint32_t));
 	cudaMalloc(&d_target[thr_id], 8 * sizeof(uint32_t));
-	cudaMalloc(&d_resultNonce[thr_id], 2*sizeof(uint32_t));
-	exit_if_cudaerror(thr_id, __FILE__, __LINE__);
+	cudaMalloc(&d_result[thr_id], 2 * sizeof(uint32_t));
+	exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
 }
 
-__host__ void cryptonight_extra_cpu_prepare(int thr_id, int threads, uint32_t startNonce, uint32_t *d_ctx_state, uint32_t *d_ctx_a, uint32_t *d_ctx_b, uint32_t *d_ctx_key1, uint32_t *d_ctx_key2, int variant, uint32_t *d_ctx_tweak1_2)
+__host__
+void cryptonight_extra_cpu_prepare(int thr_id, int threads, uint32_t startNonce, uint32_t *d_ctx_state, uint32_t *d_ctx_a, uint32_t *d_ctx_b, uint32_t *d_ctx_key1, uint32_t *d_ctx_key2, int variant, uint32_t *d_ctx_tweak1_2)
 {
 	int threadsperblock = 128;
 
 	dim3 grid((threads + threadsperblock - 1) / threadsperblock);
 	dim3 block(threadsperblock);
 
-	cryptonight_extra_gpu_prepare << <grid, block >> >(threads, d_input[thr_id], startNonce, d_ctx_state, d_ctx_a, d_ctx_b, d_ctx_key1, d_ctx_key2, variant, d_ctx_tweak1_2);
-	exit_if_cudaerror(thr_id, __FILE__, __LINE__);
+	cryptonight_extra_gpu_prepare <<<grid, block>>> (threads, d_input[thr_id], startNonce, d_ctx_state, d_ctx_a, d_ctx_b, d_ctx_key1, d_ctx_key2, variant, d_ctx_tweak1_2);
+	exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
 }
 
-__host__ void cryptonight_extra_cpu_final(int thr_id, int threads, uint32_t startNonce, uint32_t *resnonce, uint32_t *d_ctx_state)
+__host__
+void cryptonight_extra_cpu_final(int thr_id, int threads, uint32_t startNonce, uint32_t *resnonce, uint32_t *d_ctx_state)
 {
 	int threadsperblock = 128;
 
 	dim3 grid((threads + threadsperblock - 1) / threadsperblock);
 	dim3 block(threadsperblock);
 
-	exit_if_cudaerror(thr_id, __FILE__, __LINE__);
-	cryptonight_extra_gpu_final << <grid, block >> >(threads, startNonce, d_target[thr_id], d_resultNonce[thr_id], d_ctx_state);
-	exit_if_cudaerror(thr_id, __FILE__, __LINE__);
-	cudaMemcpy(resnonce, d_resultNonce[thr_id], 2 * sizeof(uint32_t), cudaMemcpyDeviceToHost);
-	exit_if_cudaerror(thr_id, __FILE__, __LINE__);
+	exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
+	cryptonight_extra_gpu_final <<<grid, block>>> (threads, startNonce, d_target[thr_id], d_result[thr_id], d_ctx_state);
+	exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
+	cudaMemcpy(resnonce, d_result[thr_id], 2 * sizeof(uint32_t), cudaMemcpyDeviceToHost);
+	exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
 }
 
-__host__ void cryptonight_extra_cpu_free(int thr_id)
+__host__
+void cryptonight_extra_cpu_free(int thr_id)
 {
 	if (d_input[thr_id]) {
 		cudaFree(d_input[thr_id]);
 		cudaFree(d_target[thr_id]);
-		cudaFree(d_resultNonce[thr_id]);
+		cudaFree(d_result[thr_id]);
 		d_input[thr_id] = NULL;
 	}
 }
diff --git a/crypto/cryptonight.cu b/crypto/cryptonight.cu
index 2b82468d03..739321bf25 100644
--- a/crypto/cryptonight.cu
+++ b/crypto/cryptonight.cu
@@ -50,6 +50,10 @@ extern "C" int scanhash_cryptonight(int thr_id, struct work* work, uint32_t max_
 			gpulog_init(LOG_INFO, thr_id, "%s, %d MB available, %hd SMX", device_name[dev_id],
 				mem, device_mpcount[dev_id]);
 
+		if (!device_config[thr_id] && strcmp(device_name[dev_id], "TITAN V") == 0) {
+			device_config[thr_id] = strdup("80x24");
+		}
+
 		if (device_config[thr_id]) {
 			int res = sscanf(device_config[thr_id], "%ux%u", &cn_blocks, &cn_threads);
 			throughput = cuda_default_throughput(thr_id, cn_blocks*cn_threads);
@@ -71,7 +75,7 @@ extern "C" int scanhash_cryptonight(int thr_id, struct work* work, uint32_t max_
 			exit(1);
 		}
 
-		cudaSetDevice(device_map[thr_id]);
+		cudaSetDevice(dev_id);
 		if (opt_cudaschedule == -1 && gpu_threads == 1) {
 			cudaDeviceReset();
 			cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
diff --git a/crypto/xmr-rpc.cpp b/crypto/xmr-rpc.cpp
index 3c6118c44b..6d5f53b281 100644
--- a/crypto/xmr-rpc.cpp
+++ b/crypto/xmr-rpc.cpp
@@ -559,14 +559,11 @@ bool rpc2_stratum_submit(struct pool_infos *pool, struct work *work)
 
 	else if (opt_algo == ALGO_CRYPTONIGHT) {
 		uint32_t nonce = work->nonces[idnonce];
+		int variant = 0;
 		noncestr = bin2hex((unsigned char*) &nonce, 4);
 		last_found_nonce = nonce;
-		int variant = 0;
-
-		if (cryptonight_fork > 1) {
-			variant = ((((unsigned char*)work->data)[0] >= cryptonight_fork) ? ((unsigned char*)work->data)[0] - cryptonight_fork + 1 : 0 );
-		}
-
+		if (cryptonight_fork > 1 && ((unsigned char*)work->data)[0] >= cryptonight_fork)
+			variant = ((unsigned char*)work->data)[0] - cryptonight_fork + 1;
 		cryptonight_hash(hash, data, 76, variant);
 		work_set_target_ratio(work, (uint32_t*) hash);
 	}