Skip to content

Commit

Permalink
Merge pull request fireice-uk#2326 from psychocrypt/topic-supportNonA…
Browse files Browse the repository at this point in the history
…mdBfe

cryptonight_r without mediaops
  • Loading branch information
fireice-uk authored Mar 11, 2019
2 parents 2276470 + bd96c36 commit f8aa3b2
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 67 deletions.
69 changes: 2 additions & 67 deletions xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl
Original file line number Diff line number Diff line change
Expand Up @@ -32,69 +32,6 @@ R"===(
#define cryptonight_conceal 14
#define cryptonight_v8_reversewaltz 17

/* For Mesa clover support */
#ifdef cl_clang_storage_class_specifiers
# pragma OPENCL EXTENSION cl_clang_storage_class_specifiers : enable
#endif

#ifdef cl_amd_media_ops
#pragma OPENCL EXTENSION cl_amd_media_ops : enable
#else
/* taken from https://www.khronos.org/registry/OpenCL/extensions/amd/cl_amd_media_ops.txt
* Build-in Function
* uintn amd_bitalign (uintn src0, uintn src1, uintn src2)
* Description
* dst.s0 = (uint) (((((ulong)src0.s0) << 32) | (ulong)src1.s0) >> (src2.s0 & 31))
* similar operation applied to other components of the vectors.
*
* The implemented function is modified because the last is in our case always a scalar.
* We can ignore the bitwise AND operation.
*/
inline uint2 amd_bitalign( const uint2 src0, const uint2 src1, const uint src2)
{
uint2 result;
result.s0 = (uint) (((((ulong)src0.s0) << 32) | (ulong)src1.s0) >> (src2));
result.s1 = (uint) (((((ulong)src0.s1) << 32) | (ulong)src1.s1) >> (src2));
return result;
}
#endif

#ifdef cl_amd_media_ops2
#pragma OPENCL EXTENSION cl_amd_media_ops2 : enable
#else
/* taken from: https://www.khronos.org/registry/OpenCL/extensions/amd/cl_amd_media_ops2.txt
* Built-in Function:
* uintn amd_bfe (uintn src0, uintn src1, uintn src2)
* Description
* NOTE: operator >> below represent logical right shift
* offset = src1.s0 & 31;
* width = src2.s0 & 31;
* if width = 0
* dst.s0 = 0;
* else if (offset + width) < 32
* dst.s0 = (src0.s0 << (32 - offset - width)) >> (32 - width);
* else
* dst.s0 = src0.s0 >> offset;
* similar operation applied to other components of the vectors
*/
inline int amd_bfe(const uint src0, const uint offset, const uint width)
{
/* casts are removed because we can implement everything as uint
* int offset = src1;
* int width = src2;
* remove check for edge case, this function is always called with
* `width==8`
* @code
* if ( width == 0 )
* return 0;
* @endcode
*/
if ( (offset + width) < 32u )
return (src0 << (32u - offset - width)) >> (32u - width);

return src0 >> offset;
}
#endif

static const __constant ulong keccakf_rndc[24] =
{
Expand Down Expand Up @@ -128,6 +65,8 @@ static const __constant uchar sbox[256] =
0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16
};

//#include "opencl/wolf-aes.cl"
XMRSTAK_INCLUDE_WOLF_AES

void keccakf1600(ulong *s)
{
Expand Down Expand Up @@ -355,8 +294,6 @@ inline uint getIdx()
XMRSTAK_INCLUDE_FAST_INT_MATH_V2
//#include "fast_div_heavy.cl"
XMRSTAK_INCLUDE_FAST_DIV_HEAVY
//#include "opencl/wolf-aes.cl"
XMRSTAK_INCLUDE_WOLF_AES
//#include "opencl/wolf-skein.cl"
XMRSTAK_INCLUDE_WOLF_SKEIN
//#include "opencl/jh.cl"
Expand Down Expand Up @@ -461,8 +398,6 @@ void CNKeccak(ulong *output, ulong *input)

static const __constant uchar rcon[8] = { 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40 };

#define BYTE(x, y) (amd_bfe((x), (y) << 3U, 8U))

#define SubWord(inw) ((sbox[BYTE(inw, 3)] << 24) | (sbox[BYTE(inw, 2)] << 16) | (sbox[BYTE(inw, 1)] << 8) | sbox[BYTE(inw, 0)])

void AESExpandKey256(uint *keybuf)
Expand Down
64 changes: 64 additions & 0 deletions xmrstak/backend/amd/amd_gpu/opencl/wolf-aes.cl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,70 @@ R"===(
#ifndef WOLF_AES_CL
#define WOLF_AES_CL

/* For Mesa clover support */
#ifdef cl_clang_storage_class_specifiers
# pragma OPENCL EXTENSION cl_clang_storage_class_specifiers : enable
#endif

#ifdef cl_amd_media_ops
#pragma OPENCL EXTENSION cl_amd_media_ops : enable
#else
/* taken from https://www.khronos.org/registry/OpenCL/extensions/amd/cl_amd_media_ops.txt
* Build-in Function
* uintn amd_bitalign (uintn src0, uintn src1, uintn src2)
* Description
* dst.s0 = (uint) (((((ulong)src0.s0) << 32) | (ulong)src1.s0) >> (src2.s0 & 31))
* similar operation applied to other components of the vectors.
*
* The implemented function is modified because the last is in our case always a scalar.
* We can ignore the bitwise AND operation.
*/
inline uint2 amd_bitalign( const uint2 src0, const uint2 src1, const uint src2)
{
uint2 result;
result.s0 = (uint) (((((ulong)src0.s0) << 32) | (ulong)src1.s0) >> (src2));
result.s1 = (uint) (((((ulong)src0.s1) << 32) | (ulong)src1.s1) >> (src2));
return result;
}
#endif

#ifdef cl_amd_media_ops2
#pragma OPENCL EXTENSION cl_amd_media_ops2 : enable
#else
/* taken from: https://www.khronos.org/registry/OpenCL/extensions/amd/cl_amd_media_ops2.txt
* Built-in Function:
* uintn amd_bfe (uintn src0, uintn src1, uintn src2)
* Description
* NOTE: operator >> below represent logical right shift
* offset = src1.s0 & 31;
* width = src2.s0 & 31;
* if width = 0
* dst.s0 = 0;
* else if (offset + width) < 32
* dst.s0 = (src0.s0 << (32 - offset - width)) >> (32 - width);
* else
* dst.s0 = src0.s0 >> offset;
* similar operation applied to other components of the vectors
*/
inline int amd_bfe(const uint src0, const uint offset, const uint width)
{
/* casts are removed because we can implement everything as uint
* int offset = src1;
* int width = src2;
* remove check for edge case, this function is always called with
* `width==8`
* @code
* if ( width == 0 )
* return 0;
* @endcode
*/
if ( (offset + width) < 32u )
return (src0 << (32u - offset - width)) >> (32u - width);

return src0 >> offset;
}
#endif

// AES table - the other three are generated on the fly

static const __constant uint AES0_C[256] =
Expand Down

0 comments on commit f8aa3b2

Please sign in to comment.