Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mining verus #77

Open
wants to merge 76 commits into
base: windows
Choose a base branch
from
Open
Changes from 1 commit
Commits
Show all changes
76 commits
Select commit Hold shift + click to select a range
3e5b739
Add files via upload
monkins1010 Jun 29, 2018
8088127
ADded verus hash
monkins1010 Sep 8, 2018
849be5e
updated for linux build
monkins1010 Sep 8, 2018
c9afd74
Add files via upload
monkins1010 Sep 8, 2018
2e09238
updated for verus
monkins1010 Sep 8, 2018
448ce4d
Add files via upload
monkins1010 Sep 8, 2018
1a1b196
updated for verus linux build
Sep 9, 2018
5dc622f
new
monkins1010 Sep 12, 2018
b26016f
cuda 8v821 s
monkins1010 Oct 9, 2018
27837f3
Add files via upload
monkins1010 Oct 9, 2018
6b90d23
Update verusscan.cpp
monkins1010 Oct 9, 2018
c984f96
Add files via upload
monkins1010 Oct 13, 2018
3f7b327
Add files via upload
monkins1010 Oct 13, 2018
758b191
Add files via upload
monkins1010 Oct 13, 2018
c707c9e
removed cuda
monkins1010 Oct 13, 2018
1fed8aa
Add files via upload
monkins1010 Oct 13, 2018
3aaa57b
Add files via upload
monkins1010 Oct 13, 2018
f27fab5
Add files via upload
monkins1010 Oct 13, 2018
ea52914
Update ccminer.cpp
monkins1010 Oct 13, 2018
5b00b72
Update miner.h
monkins1010 Oct 13, 2018
6367278
Update verusscan.cpp
monkins1010 Oct 13, 2018
52617d6
nonce changed
monkins1010 Oct 14, 2018
eb5b707
Update verusscan.cpp
monkins1010 Oct 14, 2018
72e3a4a
updated to max 60 threads
monkins1010 Oct 14, 2018
2b66124
Add files via upload
monkins1010 Oct 16, 2018
502c9f4
updated to -marchnative compiler
monkins1010 Oct 28, 2018
62771eb
Update Makefile.am
monkins1010 Oct 28, 2018
89cf75e
re ordered for slight speed
monkins1010 Dec 9, 2018
736084d
update for v3.1
monkins1010 Dec 9, 2018
6dae1ca
Updated for verus2
monkins1010 Feb 3, 2019
bbf54c4
updated for verushashv2
monkins1010 Feb 3, 2019
1c31b30
updated to v2
monkins1010 Feb 3, 2019
f5e0be8
v2
monkins1010 Feb 3, 2019
bbcd94a
fixup key cache adjust
monkins1010 Feb 8, 2019
876c20e
v2 now with smaller fix key
monkins1010 Feb 8, 2019
d55c617
Add files via upload
monkins1010 Feb 27, 2019
f3d5365
Add files via upload
monkins1010 Feb 27, 2019
3e67c4d
fixed immintrin bug
monkins1010 Apr 9, 2019
07079c6
Tidy up and fix leak
monkins1010 Nov 5, 2019
2c97d7e
v2.1
Dec 11, 2019
0285c62
Add files via upload
monkins1010 Dec 12, 2019
ce8ad0e
Add files via upload
monkins1010 Dec 12, 2019
cc57d5b
added auto switch to verushash 2.1
monkins1010 Dec 14, 2019
b3e7a42
updated hash
monkins1010 Dec 14, 2019
8c515b0
Verus 2.1 only
monkins1010 Dec 16, 2019
a2ae360
prefetch
monkins1010 Dec 17, 2019
70c35f8
fixed boos
monkins1010 Dec 18, 2019
1b9ccbb
Added verus2.2
monkins1010 Jun 10, 2020
690a003
Bug fix for autoswitch
monkins1010 Jun 11, 2020
3909afe
Update ccminer-config.h
monkins1010 Jun 11, 2020
c9f500e
Update ccminer-config.h
monkins1010 Jun 11, 2020
5fa9aca
Update configure.ac
monkins1010 Jun 11, 2020
4ad7176
Delete verus.cu
monkins1010 Jun 14, 2020
b0f7f6f
Use all available cores to build
BloodyNora Aug 13, 2020
c4d7c74
Merge pull request #4 from BloodyNora/Verus2.2
monkins1010 Aug 17, 2020
44e8ea9
add support for native building on freebsd (#5)
BloodyNora Aug 30, 2020
7bc126b
3.7.1
monkins1010 Dec 1, 2020
de0e712
Update configure.ac
monkins1010 Dec 1, 2020
115a524
Update ccminer.cpp
monkins1010 Dec 1, 2020
bffb815
Update run
monkins1010 Dec 5, 2020
8e3c4fa
redundant code removed
monkins1010 Nov 29, 2021
8e98718
change to make API show global hashrate
monkins1010 Jul 12, 2022
7a911a0
Merge branch 'Verus2.2' of https://github.com/monkins1010/ccminer int…
monkins1010 Jul 12, 2022
1bd9340
updates for correct solution parsing
monkins1010 May 19, 2023
374d327
Add PBaas/Merged Mining Support (#12)
hellcatz May 20, 2023
7c739a0
Update version
monkins1010 May 20, 2023
1d50a3c
Merge branch 'Verus2.2' of https://github.com/monkins1010/ccminer int…
monkins1010 May 20, 2023
3b78957
Updates for pbaas Activation
monkins1010 May 20, 2023
ac12b21
fix verus difficulty (#10)
avaziman May 20, 2023
9704c1c
Better organize last 15 bytes of solution (#15)
hellcatz May 26, 2023
7af01b3
MACOS UPDATES
monkins1010 May 26, 2023
a09e143
updates for libcurl
monkins1010 May 26, 2023
6e925a0
better nonce randomness
monkins1010 May 26, 2023
7c48189
Allow 3 byte extranonce
monkins1010 May 27, 2023
1997eda
restart on new notify
monkins1010 May 27, 2023
1ba0bf3
optimizations speed increase
monkins1010 Nov 11, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
updated hash
  • Loading branch information
monkins1010 committed Dec 14, 2019
commit b3e7a4239fc05982bcc4fbd2c4fc861f9751b674
296 changes: 275 additions & 21 deletions verus/verus_clhash_portable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,9 @@ uint64_t precompReduction64_port(__m128i A) {

// verus intermediate hash extra
__inline __m128i __verusclmulwithoutreduction64alignedrepeat_port(__m128i *randomsource, const __m128i buf[4], uint64_t keyMask,
uint16_t * __restrict fixrand, uint16_t * __restrict fixrandex, uchar version)
uint16_t * __restrict fixrand, uint16_t * __restrict fixrandex)
{
const __m128i *pbuf;

const __m128i pbuf_copy[4] = { _mm_xor_si128(buf[0], buf[2]), _mm_xor_si128(buf[1], buf[3]), buf[2], buf[3] };

// divide key mask by 16 from bytes to __m128i
keyMask >>= 4;
Expand All @@ -115,9 +113,7 @@ __inline __m128i __verusclmulwithoutreduction64alignedrepeat_port(__m128i *rand


// select random start and order of pbuf processing
if (version)
pbuf = pbuf_copy + (selector & 3);
else

pbuf = buf + (selector & 3);
uint32_t prand_idx = (selector >> 5) & keyMask;
uint32_t prandex_idx = (selector >> 32) & keyMask;
Expand Down Expand Up @@ -284,13 +280,8 @@ __inline __m128i __verusclmulwithoutreduction64alignedrepeat_port(__m128i *rand

do
{
uint64_t temp_v;
if(version == 0x30)
temp_v = selector & ((uint64_t)0x10000000) << rounds;
else
temp_v = selector & (0x10000000) << rounds;

if (temp_v)

if (selector & (0x10000000) << rounds)
{
onekey = _mm_load_si128(rc++);
const __m128i temp2 = _mm_load_si128(rounds & 1 ? pbuf : buftmp);
Expand Down Expand Up @@ -321,8 +312,7 @@ __inline __m128i __verusclmulwithoutreduction64alignedrepeat_port(__m128i *rand
}
case 0x18:
{
if (version == 0)
{

const __m128i temp1 = _mm_load_si128(pbuf - (((selector & 1) << 1) - 1));
const __m128i temp2 = _mm_load_si128(prand);
const __m128i add1 = _mm_xor_si128(temp1, temp2);
Expand All @@ -337,9 +327,271 @@ __inline __m128i __verusclmulwithoutreduction64alignedrepeat_port(__m128i *rand
_mm_store_si128(prand, tempb3);

break;

}
case 0x1c:
{
const __m128i temp1 = _mm_load_si128(pbuf);
const __m128i temp2 = _mm_load_si128(prandex);
const __m128i add1 = _mm_xor_si128(temp1, temp2);
const __m128i clprod1 = _mm_clmulepi64_si128(add1, add1, 0x10);
acc = _mm_xor_si128(clprod1, acc);

const __m128i tempa1 = _mm_mulhrs_epi16(acc, temp2);
const __m128i tempa2 = _mm_xor_si128(tempa1, temp2);

const __m128i tempa3 = _mm_load_si128(prand);
_mm_store_si128(prand, tempa2);

acc = _mm_xor_si128(tempa3, acc);

const __m128i tempb1 = _mm_mulhrs_epi16(acc, tempa3);
const __m128i tempb2 = _mm_xor_si128(tempb1, tempa3);
_mm_store_si128(prandex, tempb2);
break;
}
}
fixrand[i] = prand_idx;
fixrandex[i] = prandex_idx;

}
// printf("acc = %08x\n", _mm_cvtsi128_si64(acc));

// exit(0);
return acc;
}


__inline __m128i __verusclmulwithoutreduction64alignedrepeat_port2(__m128i *randomsource, const __m128i buf[4], uint64_t keyMask,
uint16_t * __restrict fixrand, uint16_t * __restrict fixrandex)
{
const __m128i *pbuf;

const __m128i pbuf_copy[4] = { _mm_xor_si128(buf[0], buf[2]), _mm_xor_si128(buf[1], buf[3]), buf[2], buf[3] };

// divide key mask by 16 from bytes to __m128i
keyMask >>= 4;

// the random buffer must have at least 32 16 byte dwords after the keymask to work with this
// algorithm. we take the value from the last element inside the keyMask + 2, as that will never
// be used to xor into the accumulator before it is hashed with other values first
__m128i acc = _mm_load_si128(randomsource + (keyMask + 2));

for (int64_t i = 0; i < 32; i++)
{
//std::cout << "LOOP " << i << " acc: " << LEToHex(acc) << std::endl;

const uint64_t selector = _mm_cvtsi128_si64(acc);

// get two random locations in the key, which will be mutated and swapped
__m128i *prand = randomsource + ((selector >> 5) & keyMask);
__m128i *prandex = randomsource + ((selector >> 32) & keyMask);



// select random start and order of pbuf processing

pbuf = pbuf_copy + (selector & 3);

uint32_t prand_idx = (selector >> 5) & keyMask;
uint32_t prandex_idx = (selector >> 32) & keyMask;

// printf("[i]=%d \t acc = %08x, prand_idx = %d\t, prandex_idx = %d\t selector = %d prand %08x, prandex %08x\n", i, _mm_cvtsi128_si64(acc), prand_idx, prandex_idx, (selector & 0x1c)>>2, _mm_cvtsi128_si64(prand[0]), _mm_cvtsi128_si64(prandex[0]));
//printf("pbuf %08x%08x%08x%08x\n", _mm_cvtsi128_si64(buf[0]), _mm_cvtsi128_si64(buf[1]), _mm_cvtsi128_si64(buf[2]), _mm_cvtsi128_si64(buf[3]));
switch (selector & 0x1c)
{
case 0:
{
const __m128i temp1 = _mm_load_si128(prandex);
const __m128i temp2 = _mm_load_si128(pbuf - (((selector & 1) << 1) - 1));
const __m128i add1 = _mm_xor_si128(temp1, temp2);
const __m128i clprod1 = _mm_clmulepi64_si128(add1, add1, 0x10);
acc = _mm_xor_si128(clprod1, acc);

const __m128i tempa1 = _mm_mulhrs_epi16(acc, temp1);
const __m128i tempa2 = _mm_xor_si128(tempa1, temp1);

const __m128i temp12 = _mm_load_si128(prand);
_mm_store_si128(prand, tempa2);

const __m128i temp22 = _mm_load_si128(pbuf);
const __m128i add12 = _mm_xor_si128(temp12, temp22);
const __m128i clprod12 = _mm_clmulepi64_si128(add12, add12, 0x10);
acc = _mm_xor_si128(clprod12, acc);

const __m128i tempb1 = _mm_mulhrs_epi16(acc, temp12);
const __m128i tempb2 = _mm_xor_si128(tempb1, temp12);
_mm_store_si128(prandex, tempb2);
break;
}
case 4:
{
const __m128i temp1 = _mm_load_si128(prand);
const __m128i temp2 = _mm_load_si128(pbuf);
const __m128i add1 = _mm_xor_si128(temp1, temp2);
const __m128i clprod1 = _mm_clmulepi64_si128(add1, add1, 0x10);
acc = _mm_xor_si128(clprod1, acc);
const __m128i clprod2 = _mm_clmulepi64_si128(temp2, temp2, 0x10);
acc = _mm_xor_si128(clprod2, acc);

const __m128i tempa1 = _mm_mulhrs_epi16(acc, temp1);
const __m128i tempa2 = _mm_xor_si128(tempa1, temp1);

const __m128i temp12 = _mm_load_si128(prandex);
_mm_store_si128(prandex, tempa2);

const __m128i temp22 = _mm_load_si128(pbuf - (((selector & 1) << 1) - 1));
const __m128i add12 = _mm_xor_si128(temp12, temp22);
acc = _mm_xor_si128(add12, acc);

const __m128i tempb1 = _mm_mulhrs_epi16(acc, temp12);
const __m128i tempb2 = _mm_xor_si128(tempb1, temp12);
_mm_store_si128(prand, tempb2);
break;
}
case 8:
{
const __m128i temp1 = _mm_load_si128(prandex);
const __m128i temp2 = _mm_load_si128(pbuf);
const __m128i add1 = _mm_xor_si128(temp1, temp2);
acc = _mm_xor_si128(add1, acc);

const __m128i tempa1 = _mm_mulhrs_epi16(acc, temp1);
const __m128i tempa2 = _mm_xor_si128(tempa1, temp1);

const __m128i temp12 = _mm_load_si128(prand);
_mm_store_si128(prand, tempa2);

const __m128i temp22 = _mm_load_si128(pbuf - (((selector & 1) << 1) - 1));
const __m128i add12 = _mm_xor_si128(temp12, temp22);
const __m128i clprod12 = _mm_clmulepi64_si128(add12, add12, 0x10);
acc = _mm_xor_si128(clprod12, acc);
const __m128i clprod22 = _mm_clmulepi64_si128(temp22, temp22, 0x10);
acc = _mm_xor_si128(clprod22, acc);

const __m128i tempb1 = _mm_mulhrs_epi16(acc, temp12);
const __m128i tempb2 = _mm_xor_si128(tempb1, temp12);
_mm_store_si128(prandex, tempb2);
break;
}
case 0xc:
{
const __m128i temp1 = _mm_load_si128(prand);
const __m128i temp2 = _mm_load_si128(pbuf - (((selector & 1) << 1) - 1));
const __m128i add1 = _mm_xor_si128(temp1, temp2);

// cannot be zero here
const int32_t divisor = (uint32_t)selector;

acc = _mm_xor_si128(add1, acc);

const int64_t dividend = _mm_cvtsi128_si64(acc);
const __m128i modulo = _mm_cvtsi32_si128(dividend % divisor);
acc = _mm_xor_si128(modulo, acc);

const __m128i tempa1 = _mm_mulhrs_epi16(acc, temp1);
const __m128i tempa2 = _mm_xor_si128(tempa1, temp1);

if (dividend & 1)
{
const __m128i temp12 = _mm_load_si128(prandex);
_mm_store_si128(prandex, tempa2);

const __m128i temp22 = _mm_load_si128(pbuf);
const __m128i add12 = _mm_xor_si128(temp12, temp22);
const __m128i clprod12 = _mm_clmulepi64_si128(add12, add12, 0x10);
acc = _mm_xor_si128(clprod12, acc);
const __m128i clprod22 = _mm_clmulepi64_si128(temp22, temp22, 0x10);
acc = _mm_xor_si128(clprod22, acc);

const __m128i tempb1 = _mm_mulhrs_epi16(acc, temp12);
const __m128i tempb2 = _mm_xor_si128(tempb1, temp12);
_mm_store_si128(prand, tempb2);
}
else
{
const __m128i tempb3 = _mm_load_si128(prandex);
_mm_store_si128(prandex, tempa2);
_mm_store_si128(prand, tempb3);
}
break;
}
case 0x10:
{
// a few AES operations
const __m128i *rc = prand;
__m128i tmp;

__m128i temp1 = _mm_load_si128(pbuf - (((selector & 1) << 1) - 1));
__m128i temp2 = _mm_load_si128(pbuf);

AES2(temp1, temp2, 0);
MIX2_EMU(temp1, temp2);

AES2(temp1, temp2, 4);
MIX2_EMU(temp1, temp2);

AES2(temp1, temp2, 8);
MIX2_EMU(temp1, temp2);

acc = _mm_xor_si128(temp2, _mm_xor_si128(temp1, acc));

const __m128i tempa1 = _mm_load_si128(prand);
const __m128i tempa2 = _mm_mulhrs_epi16(acc, tempa1);
const __m128i tempa3 = _mm_xor_si128(tempa1, tempa2);

const __m128i tempa4 = _mm_load_si128(prandex);
_mm_store_si128(prandex, tempa3);
_mm_store_si128(prand, tempa4);
break;
}
case 0x14:
{
// we'll just call this one the monkins loop, inspired by Chris - modified to cast to uint64_t on shift for more variability in the loop
const __m128i *buftmp = pbuf - (((selector & 1) << 1) - 1);
__m128i tmp; // used by MIX2

uint64_t rounds = selector >> 61; // loop randomly between 1 and 8 times
__m128i *rc = prand;
uint64_t aesroundoffset = 0;
__m128i onekey;

do
{


if (selector & ((uint64_t)0x10000000) << rounds)
{
onekey = _mm_load_si128(rc++);
const __m128i temp2 = _mm_load_si128(rounds & 1 ? pbuf : buftmp);
const __m128i add1 = _mm_xor_si128(onekey, temp2);
const __m128i clprod1 = _mm_clmulepi64_si128(add1, add1, 0x10);
acc = _mm_xor_si128(clprod1, acc);
}
else
{
onekey = _mm_load_si128(rc++);
__m128i temp2 = _mm_load_si128(rounds & 1 ? buftmp : pbuf);
AES2(onekey, temp2, aesroundoffset);
aesroundoffset += 4;
MIX2_EMU(onekey, temp2);
acc = _mm_xor_si128(onekey, acc);
acc = _mm_xor_si128(temp2, acc);
}
} while (rounds--);

const __m128i tempa1 = _mm_load_si128(prand);
const __m128i tempa2 = _mm_mulhrs_epi16(acc, tempa1);
const __m128i tempa3 = _mm_xor_si128(tempa1, tempa2);

const __m128i tempa4 = _mm_load_si128(prandex);
_mm_store_si128(prandex, tempa3);
_mm_store_si128(prand, tempa4);
break;
}
case 0x18:
{

const __m128i *buftmp = pbuf - (((selector & 1) << 1) - 1);
__m128i tmp; // used by MIX2

Expand Down Expand Up @@ -377,8 +629,8 @@ __inline __m128i __verusclmulwithoutreduction64alignedrepeat_port(__m128i *rand
_mm_store_si128(prandex, tempa4);
_mm_store_si128(prand, onekey);
break;

}
}
case 0x1c:
{
const __m128i temp1 = _mm_load_si128(pbuf);
Expand All @@ -405,20 +657,22 @@ __inline __m128i __verusclmulwithoutreduction64alignedrepeat_port(__m128i *rand
fixrandex[i] = prandex_idx;

}
// printf("acc = %08x\n", _mm_cvtsi128_si64(acc));
// printf("acc = %08x\n", _mm_cvtsi128_si64(acc));

// exit(0);
// exit(0);
return acc;
}

// hashes 64 bytes only by doing a carryless multiplication and reduction of the repeated 64 byte sequence 16 times,
// returning a 64 bit hash value
uint64_t verusclhash_port(void * random, const unsigned char buf[64], uint64_t keyMask, uint16_t * __restrict fixrand, uint16_t * __restrict fixrandex, uchar version) {
const unsigned int m = 128;// we process the data in chunks of 16 cache lines
__m128i * rs64 = (__m128i *)random;
const __m128i * string = (const __m128i *) buf;

__m128i acc = __verusclmulwithoutreduction64alignedrepeat_port(rs64, string, keyMask, fixrand, fixrandex, version);
__m128i acc;
if (version)
acc = __verusclmulwithoutreduction64alignedrepeat_port2(rs64, string, keyMask, fixrand, fixrandex);
else
acc = __verusclmulwithoutreduction64alignedrepeat_port(rs64, string, keyMask, fixrand, fixrandex);
acc = _mm_xor_si128(acc, lazyLengthHash_port(1024, 64));
return precompReduction64_port(acc);
}