From e8fe8b0de9f2a1bc2a14bb469129697070f9077a Mon Sep 17 00:00:00 2001 From: Rex Xu Date: Tue, 26 Sep 2017 15:42:56 +0800 Subject: [PATCH] Implement extension GL_NV_shader_atomic_int64 --- SPIRV/GlslangToSpv.cpp | 7 +- Test/baseResults/spv.atomicInt64.comp.out | 215 +++++++++++++++++++++ Test/spv.atomicInt64.comp | 79 ++++++++ glslang/MachineIndependent/Initialize.cpp | 26 +++ glslang/MachineIndependent/ParseHelper.cpp | 17 ++ glslang/MachineIndependent/Versions.cpp | 2 + glslang/MachineIndependent/Versions.h | 1 + gtests/Spv.FromFile.cpp | 3 +- 8 files changed, 347 insertions(+), 3 deletions(-) create mode 100644 Test/baseResults/spv.atomicInt64.comp.out create mode 100644 Test/spv.atomicInt64.comp diff --git a/SPIRV/GlslangToSpv.cpp b/SPIRV/GlslangToSpv.cpp index 9dec286cd0..54dc61f356 100755 --- a/SPIRV/GlslangToSpv.cpp +++ b/SPIRV/GlslangToSpv.cpp @@ -4749,12 +4749,12 @@ spv::Id TGlslangToSpvTraverser::createAtomicOperation(glslang::TOperator op, spv case glslang::EOpAtomicMin: case glslang::EOpImageAtomicMin: case glslang::EOpAtomicCounterMin: - opCode = typeProxy == glslang::EbtUint ? spv::OpAtomicUMin : spv::OpAtomicSMin; + opCode = (typeProxy == glslang::EbtUint || typeProxy == glslang::EbtUint64) ? spv::OpAtomicUMin : spv::OpAtomicSMin; break; case glslang::EOpAtomicMax: case glslang::EOpImageAtomicMax: case glslang::EOpAtomicCounterMax: - opCode = typeProxy == glslang::EbtUint ? spv::OpAtomicUMax : spv::OpAtomicSMax; + opCode = (typeProxy == glslang::EbtUint || typeProxy == glslang::EbtUint64) ? spv::OpAtomicUMax : spv::OpAtomicSMax; break; case glslang::EOpAtomicAnd: case glslang::EOpImageAtomicAnd: @@ -4795,6 +4795,9 @@ spv::Id TGlslangToSpvTraverser::createAtomicOperation(glslang::TOperator op, spv break; } + if (typeProxy == glslang::EbtInt64 || typeProxy == glslang::EbtUint64) + builder.addCapability(spv::CapabilityInt64Atomics); + // Sort out the operands // - mapping from glslang -> SPV // - there are extra SPV operands with no glslang source diff --git a/Test/baseResults/spv.atomicInt64.comp.out b/Test/baseResults/spv.atomicInt64.comp.out new file mode 100644 index 0000000000..31a1d632f4 --- /dev/null +++ b/Test/baseResults/spv.atomicInt64.comp.out @@ -0,0 +1,215 @@ +spv.atomicInt64.comp +// Module Version 10000 +// Generated by (magic number): 80001 +// Id's are bound by 149 + + Capability Shader + Capability Int64 + Capability Int64Atomics + 1: ExtInstImport "GLSL.std.450" + MemoryModel Logical GLSL450 + EntryPoint GLCompute 4 "main" + ExecutionMode 4 LocalSize 16 16 1 + Source GLSL 450 + SourceExtension "GL_ARB_gpu_shader_int64" + SourceExtension "GL_NV_shader_atomic_int64" + Name 4 "main" + Name 8 "i64" + Name 12 "u64" + Name 14 "Buffer" + MemberName 14(Buffer) 0 "i64" + MemberName 14(Buffer) 1 "u64" + Name 16 "buf" + Name 84 "Struct" + MemberName 84(Struct) 0 "i64" + MemberName 84(Struct) 1 "u64" + Name 86 "s" + MemberDecorate 14(Buffer) 0 Offset 0 + MemberDecorate 14(Buffer) 1 Offset 8 + Decorate 14(Buffer) BufferBlock + Decorate 16(buf) DescriptorSet 0 + Decorate 16(buf) Binding 0 + Decorate 148 BuiltIn WorkgroupSize + 2: TypeVoid + 3: TypeFunction 2 + 6: TypeInt 64 1 + 7: TypePointer Function 6(int) + 9: 6(int) Constant 0 0 + 10: TypeInt 64 0 + 11: TypePointer Function 10(int) + 13: 10(int) Constant 0 0 + 14(Buffer): TypeStruct 6(int) 10(int) + 15: TypePointer Uniform 14(Buffer) + 16(buf): 15(ptr) Variable Uniform + 17: TypeInt 32 1 + 18: 17(int) Constant 0 + 19: TypePointer Uniform 6(int) + 21: 6(int) Constant 4294967272 4294967295 + 22: TypeInt 32 0 + 23: 22(int) Constant 1 + 24: 22(int) Constant 0 + 28: 17(int) Constant 1 + 29: TypePointer Uniform 10(int) + 31: 10(int) Constant 15 0 + 84(Struct): TypeStruct 6(int) 10(int) + 85: TypePointer Workgroup 84(Struct) + 86(s): 85(ptr) Variable Workgroup + 87: TypePointer Workgroup 6(int) + 92: TypePointer Workgroup 10(int) + 146: TypeVector 22(int) 3 + 147: 22(int) Constant 16 + 148: 146(ivec3) ConstantComposite 147 147 23 + 4(main): 2 Function None 3 + 5: Label + 8(i64): 7(ptr) Variable Function + 12(u64): 11(ptr) Variable Function + Store 8(i64) 9 + Store 12(u64) 13 + 20: 19(ptr) AccessChain 16(buf) 18 + 25: 6(int) AtomicSMin 20 23 24 21 + 26: 6(int) Load 8(i64) + 27: 6(int) IAdd 26 25 + Store 8(i64) 27 + 30: 29(ptr) AccessChain 16(buf) 28 + 32: 10(int) AtomicUMin 30 23 24 31 + 33: 10(int) Load 12(u64) + 34: 10(int) IAdd 33 32 + Store 12(u64) 34 + 35: 19(ptr) AccessChain 16(buf) 18 + 36: 6(int) AtomicSMax 35 23 24 21 + 37: 6(int) Load 8(i64) + 38: 6(int) IAdd 37 36 + Store 8(i64) 38 + 39: 29(ptr) AccessChain 16(buf) 28 + 40: 10(int) AtomicUMax 39 23 24 31 + 41: 10(int) Load 12(u64) + 42: 10(int) IAdd 41 40 + Store 12(u64) 42 + 43: 19(ptr) AccessChain 16(buf) 18 + 44: 6(int) AtomicAnd 43 23 24 21 + 45: 6(int) Load 8(i64) + 46: 6(int) IAdd 45 44 + Store 8(i64) 46 + 47: 29(ptr) AccessChain 16(buf) 28 + 48: 10(int) AtomicAnd 47 23 24 31 + 49: 10(int) Load 12(u64) + 50: 10(int) IAdd 49 48 + Store 12(u64) 50 + 51: 19(ptr) AccessChain 16(buf) 18 + 52: 6(int) AtomicOr 51 23 24 21 + 53: 6(int) Load 8(i64) + 54: 6(int) IAdd 53 52 + Store 8(i64) 54 + 55: 29(ptr) AccessChain 16(buf) 28 + 56: 10(int) AtomicOr 55 23 24 31 + 57: 10(int) Load 12(u64) + 58: 10(int) IAdd 57 56 + Store 12(u64) 58 + 59: 19(ptr) AccessChain 16(buf) 18 + 60: 6(int) AtomicXor 59 23 24 21 + 61: 6(int) Load 8(i64) + 62: 6(int) IAdd 61 60 + Store 8(i64) 62 + 63: 29(ptr) AccessChain 16(buf) 28 + 64: 10(int) AtomicXor 63 23 24 31 + 65: 10(int) Load 12(u64) + 66: 10(int) IAdd 65 64 + Store 12(u64) 66 + 67: 19(ptr) AccessChain 16(buf) 18 + 68: 6(int) AtomicIAdd 67 23 24 21 + 69: 6(int) Load 8(i64) + 70: 6(int) IAdd 69 68 + Store 8(i64) 70 + 71: 19(ptr) AccessChain 16(buf) 18 + 72: 6(int) AtomicExchange 71 23 24 21 + 73: 6(int) Load 8(i64) + 74: 6(int) IAdd 73 72 + Store 8(i64) 74 + 75: 19(ptr) AccessChain 16(buf) 18 + 76: 6(int) Load 8(i64) + 77: 6(int) AtomicCompareExchange 75 23 24 24 76 21 + 78: 6(int) Load 8(i64) + 79: 6(int) IAdd 78 77 + Store 8(i64) 79 + 80: 6(int) Load 8(i64) + 81: 19(ptr) AccessChain 16(buf) 18 + Store 81 80 + 82: 10(int) Load 12(u64) + 83: 29(ptr) AccessChain 16(buf) 28 + Store 83 82 + Store 8(i64) 9 + Store 12(u64) 13 + 88: 87(ptr) AccessChain 86(s) 18 + 89: 6(int) AtomicSMin 88 23 24 21 + 90: 6(int) Load 8(i64) + 91: 6(int) IAdd 90 89 + Store 8(i64) 91 + 93: 92(ptr) AccessChain 86(s) 28 + 94: 10(int) AtomicUMin 93 23 24 31 + 95: 10(int) Load 12(u64) + 96: 10(int) IAdd 95 94 + Store 12(u64) 96 + 97: 87(ptr) AccessChain 86(s) 18 + 98: 6(int) AtomicSMax 97 23 24 21 + 99: 6(int) Load 8(i64) + 100: 6(int) IAdd 99 98 + Store 8(i64) 100 + 101: 92(ptr) AccessChain 86(s) 28 + 102: 10(int) AtomicUMax 101 23 24 31 + 103: 10(int) Load 12(u64) + 104: 10(int) IAdd 103 102 + Store 12(u64) 104 + 105: 87(ptr) AccessChain 86(s) 18 + 106: 6(int) AtomicAnd 105 23 24 21 + 107: 6(int) Load 8(i64) + 108: 6(int) IAdd 107 106 + Store 8(i64) 108 + 109: 92(ptr) AccessChain 86(s) 28 + 110: 10(int) AtomicAnd 109 23 24 31 + 111: 10(int) Load 12(u64) + 112: 10(int) IAdd 111 110 + Store 12(u64) 112 + 113: 87(ptr) AccessChain 86(s) 18 + 114: 6(int) AtomicOr 113 23 24 21 + 115: 6(int) Load 8(i64) + 116: 6(int) IAdd 115 114 + Store 8(i64) 116 + 117: 92(ptr) AccessChain 86(s) 28 + 118: 10(int) AtomicOr 117 23 24 31 + 119: 10(int) Load 12(u64) + 120: 10(int) IAdd 119 118 + Store 12(u64) 120 + 121: 87(ptr) AccessChain 86(s) 18 + 122: 6(int) AtomicXor 121 23 24 21 + 123: 6(int) Load 8(i64) + 124: 6(int) IAdd 123 122 + Store 8(i64) 124 + 125: 92(ptr) AccessChain 86(s) 28 + 126: 10(int) AtomicXor 125 23 24 31 + 127: 10(int) Load 12(u64) + 128: 10(int) IAdd 127 126 + Store 12(u64) 128 + 129: 87(ptr) AccessChain 86(s) 18 + 130: 6(int) AtomicIAdd 129 23 24 21 + 131: 6(int) Load 8(i64) + 132: 6(int) IAdd 131 130 + Store 8(i64) 132 + 133: 87(ptr) AccessChain 86(s) 18 + 134: 6(int) AtomicExchange 133 23 24 21 + 135: 6(int) Load 8(i64) + 136: 6(int) IAdd 135 134 + Store 8(i64) 136 + 137: 87(ptr) AccessChain 86(s) 18 + 138: 6(int) Load 8(i64) + 139: 6(int) AtomicCompareExchange 137 23 24 24 138 21 + 140: 6(int) Load 8(i64) + 141: 6(int) IAdd 140 139 + Store 8(i64) 141 + 142: 6(int) Load 8(i64) + 143: 87(ptr) AccessChain 86(s) 18 + Store 143 142 + 144: 10(int) Load 12(u64) + 145: 92(ptr) AccessChain 86(s) 28 + Store 145 144 + Return + FunctionEnd diff --git a/Test/spv.atomicInt64.comp b/Test/spv.atomicInt64.comp new file mode 100644 index 0000000000..a56c7ec030 --- /dev/null +++ b/Test/spv.atomicInt64.comp @@ -0,0 +1,79 @@ +#version 450 core + +#extension GL_ARB_gpu_shader_int64: enable +#extension GL_NV_shader_atomic_int64: enable + +layout(local_size_x = 16, local_size_y = 16) in; + +layout(binding = 0) buffer Buffer +{ + int64_t i64; + uint64_t u64; +} buf; + +struct Struct +{ + int64_t i64; + uint64_t u64; +}; + +shared Struct s; + +void main() +{ + const int64_t i64c = -24; + const uint64_t u64c = 0xF00000000F; + + // Test shader storage block + int64_t i64 = 0; + uint64_t u64 = 0; + + i64 += atomicMin(buf.i64, i64c); + u64 += atomicMin(buf.u64, u64c); + + i64 += atomicMax(buf.i64, i64c); + u64 += atomicMax(buf.u64, u64c); + + i64 += atomicAnd(buf.i64, i64c); + u64 += atomicAnd(buf.u64, u64c); + + i64 += atomicOr(buf.i64, i64c); + u64 += atomicOr(buf.u64, u64c); + + i64 += atomicXor(buf.i64, i64c); + u64 += atomicXor(buf.u64, u64c); + + i64 += atomicAdd(buf.i64, i64c); + i64 += atomicExchange(buf.i64, i64c); + i64 += atomicCompSwap(buf.i64, i64c, i64); + + buf.i64 = i64; + buf.u64 = u64; + + // Test shared variable + i64 = 0; + u64 = 0; + + i64 += atomicMin(s.i64, i64c); + u64 += atomicMin(s.u64, u64c); + + i64 += atomicMax(s.i64, i64c); + u64 += atomicMax(s.u64, u64c); + + i64 += atomicAnd(s.i64, i64c); + u64 += atomicAnd(s.u64, u64c); + + i64 += atomicOr(s.i64, i64c); + u64 += atomicOr(s.u64, u64c); + + i64 += atomicXor(s.i64, i64c); + u64 += atomicXor(s.u64, u64c); + + i64 += atomicAdd(s.i64, i64c); + i64 += atomicExchange(s.i64, i64c); + i64 += atomicCompSwap(s.i64, i64c, i64); + + s.i64 = i64; + s.u64 = u64; +} + diff --git a/glslang/MachineIndependent/Initialize.cpp b/glslang/MachineIndependent/Initialize.cpp index f8138ffc29..a837efddbe 100644 --- a/glslang/MachineIndependent/Initialize.cpp +++ b/glslang/MachineIndependent/Initialize.cpp @@ -923,6 +923,32 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV "\n"); } +#ifdef NV_EXTENSIONS + if (profile != EEsProfile && version >= 440) { + commonBuiltins.append( + "uint64_t atomicMin(coherent volatile inout uint64_t, uint64_t);" + " int64_t atomicMin(coherent volatile inout int64_t, int64_t);" + + "uint64_t atomicMax(coherent volatile inout uint64_t, uint64_t);" + " int64_t atomicMax(coherent volatile inout int64_t, int64_t);" + + "uint64_t atomicAnd(coherent volatile inout uint64_t, uint64_t);" + " int64_t atomicAnd(coherent volatile inout int64_t, int64_t);" + + "uint64_t atomicOr (coherent volatile inout uint64_t, uint64_t);" + " int64_t atomicOr (coherent volatile inout int64_t, int64_t);" + + "uint64_t atomicXor(coherent volatile inout uint64_t, uint64_t);" + " int64_t atomicXor(coherent volatile inout int64_t, int64_t);" + + " int64_t atomicAdd(coherent volatile inout int64_t, int64_t);" + " int64_t atomicExchange(coherent volatile inout int64_t, int64_t);" + " int64_t atomicCompSwap(coherent volatile inout int64_t, int64_t, int64_t);" + + "\n"); + } +#endif + if ((profile == EEsProfile && version >= 310) || (profile != EEsProfile && version >= 450)) { commonBuiltins.append( diff --git a/glslang/MachineIndependent/ParseHelper.cpp b/glslang/MachineIndependent/ParseHelper.cpp index 7f721a04fe..33e35e8752 100644 --- a/glslang/MachineIndependent/ParseHelper.cpp +++ b/glslang/MachineIndependent/ParseHelper.cpp @@ -1551,6 +1551,23 @@ void TParseContext::builtInOpCheck(const TSourceLoc& loc, const TFunction& fnCan break; } +#ifdef NV_EXTENSIONS + case EOpAtomicAdd: + case EOpAtomicMin: + case EOpAtomicMax: + case EOpAtomicAnd: + case EOpAtomicOr: + case EOpAtomicXor: + case EOpAtomicExchange: + case EOpAtomicCompSwap: + { + if (arg0->getType().getBasicType() == EbtInt64 || arg0->getType().getBasicType() == EbtUint64) + requireExtensions(loc, 1, &E_GL_NV_shader_atomic_int64, fnCandidate.getName().c_str()); + + break; + } +#endif + case EOpInterpolateAtCentroid: case EOpInterpolateAtSample: case EOpInterpolateAtOffset: diff --git a/glslang/MachineIndependent/Versions.cpp b/glslang/MachineIndependent/Versions.cpp index 807fe42b66..b1893b37ea 100644 --- a/glslang/MachineIndependent/Versions.cpp +++ b/glslang/MachineIndependent/Versions.cpp @@ -211,6 +211,7 @@ void TParseVersions::initializeExtensionBehavior() extensionBehavior[E_GL_NV_viewport_array2] = EBhDisable; extensionBehavior[E_GL_NV_stereo_view_rendering] = EBhDisable; extensionBehavior[E_GL_NVX_multiview_per_view_attributes] = EBhDisable; + extensionBehavior[E_GL_NV_shader_atomic_int64] = EBhDisable; #endif // AEP @@ -343,6 +344,7 @@ void TParseVersions::getPreamble(std::string& preamble) "#define GL_NV_sample_mask_override_coverage 1\n" "#define GL_NV_geometry_shader_passthrough 1\n" "#define GL_NV_viewport_array2 1\n" + "#define GL_NV_shader_atomic_int64 1\n" #endif ; diff --git a/glslang/MachineIndependent/Versions.h b/glslang/MachineIndependent/Versions.h index 9399e9dd20..bd57103afe 100644 --- a/glslang/MachineIndependent/Versions.h +++ b/glslang/MachineIndependent/Versions.h @@ -182,6 +182,7 @@ const char* const E_SPV_NV_geometry_shader_passthrough = "GL_NV_geometr const char* const E_GL_NV_viewport_array2 = "GL_NV_viewport_array2"; const char* const E_GL_NV_stereo_view_rendering = "GL_NV_stereo_view_rendering"; const char* const E_GL_NVX_multiview_per_view_attributes = "GL_NVX_multiview_per_view_attributes"; +const char* const E_GL_NV_shader_atomic_int64 = "GL_NV_shader_atomic_int64"; // Arrays of extensions for the above viewportEXTs duplications diff --git a/gtests/Spv.FromFile.cpp b/gtests/Spv.FromFile.cpp index 4cfcf48bcb..c37e460817 100644 --- a/gtests/Spv.FromFile.cpp +++ b/gtests/Spv.FromFile.cpp @@ -410,7 +410,7 @@ INSTANTIATE_TEST_CASE_P( "spv.int16.frag", "spv.shaderBallotAMD.comp", "spv.shaderFragMaskAMD.frag", - "spv.textureGatherBiasLod.frag" + "spv.textureGatherBiasLod.frag", })), FileNameAsCustomTestSuffix ); @@ -428,6 +428,7 @@ INSTANTIATE_TEST_CASE_P( "spv.stereoViewRendering.tesc", "spv.multiviewPerViewAttributes.vert", "spv.multiviewPerViewAttributes.tesc", + "spv.atomicInt64.comp", })), FileNameAsCustomTestSuffix );