From de0fd64bedd23660f557833cc0108c3fb2be3918 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Tue, 12 Nov 2024 23:11:05 -0500 Subject: [PATCH] [AMDGPU] Introduce a new generic target `gfx9-4-generic` (#115190) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch introduces a new generic target, `gfx9-4-generic`. Since it doesn’t support FP8 and XF32-related instructions, the patch includes several code reorganizations to accommodate these changes. --- clang/include/clang/Basic/Cuda.h | 1 + clang/lib/Basic/Cuda.cpp | 1 + clang/lib/Basic/Targets/NVPTX.cpp | 1 + clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 1 + clang/test/CodeGenOpenCL/amdgpu-features.cl | 9 +- .../builtins-amdgcn-gfx9-4-generic-err.cl | 38 +++++++ clang/test/Driver/amdgpu-macros.cl | 1 + clang/test/Driver/amdgpu-mcpu.cl | 2 + .../Misc/target-invalid-cpu-note/amdgcn.c | 1 + .../test/Misc/target-invalid-cpu-note/nvptx.c | 1 + llvm/docs/AMDGPUUsage.rst | 7 ++ llvm/include/llvm/BinaryFormat/ELF.h | 3 +- llvm/include/llvm/TargetParser/TargetParser.h | 3 +- llvm/lib/Object/ELFObjectFile.cpp | 2 + llvm/lib/ObjectYAML/ELFYAML.cpp | 1 + llvm/lib/Target/AMDGPU/AMDGPU.td | 31 ++++-- llvm/lib/Target/AMDGPU/GCNProcessors.td | 5 + .../MCTargetDesc/AMDGPUTargetStreamer.cpp | 5 + llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 1 + llvm/lib/TargetParser/TargetParser.cpp | 8 +- .../CodeGen/AMDGPU/directive-amdgcn-target.ll | 4 + .../CodeGen/AMDGPU/div-rem-by-constant-64.ll | 1 + llvm/test/CodeGen/AMDGPU/dst-sel-hazard.mir | 1 + .../CodeGen/AMDGPU/elf-header-flags-mach.ll | 2 + .../AMDGPU/generic-targets-require-v6.ll | 3 + .../AMDGPU/llvm.amdgcn.iglp.opt.exp.large.mir | 1 + .../AMDGPU/llvm.amdgcn.iglp.opt.exp.small.mir | 1 + .../AMDGPU/llvm.amdgcn.update.dpp.gfx90a.ll | 1 + .../AMDGPU/no-corresponding-integer-type.ll | 1 + .../MC/AMDGPU/gfx9_4_generic_unsupported.s | 104 ++++++++++++++++++ .../Object/AMDGPU/elf-header-flags-mach.yaml | 7 ++ .../llvm-objdump/ELF/AMDGPU/subtarget.ll | 6 + .../llvm-readobj/ELF/AMDGPU/elf-headers.test | 3 + llvm/tools/llvm-readobj/ELFDumper.cpp | 1 + 34 files changed, 245 insertions(+), 13 deletions(-) create mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-gfx9-4-generic-err.cl create mode 100644 llvm/test/MC/AMDGPU/gfx9_4_generic_unsupported.s diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h index 7b4f435dc39f29..721e8981af6ffc 100644 --- a/clang/include/clang/Basic/Cuda.h +++ b/clang/include/clang/Basic/Cuda.h @@ -103,6 +103,7 @@ enum class OffloadArch { GFX909, GFX90a, GFX90c, + GFX9_4_GENERIC, GFX940, GFX941, GFX942, diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp index d765baef913e2f..59c932468cd891 100644 --- a/clang/lib/Basic/Cuda.cpp +++ b/clang/lib/Basic/Cuda.cpp @@ -121,6 +121,7 @@ static const OffloadArchToStringMap arch_names[] = { GFX(909), // gfx909 GFX(90a), // gfx90a GFX(90c), // gfx90c + {OffloadArch::GFX9_4_GENERIC, "gfx9-4-generic", "compute_amdgcn"}, GFX(940), // gfx940 GFX(941), // gfx941 GFX(942), // gfx942 diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index e0bd0b096324d8..0897032c4b8546 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -205,6 +205,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, case OffloadArch::GFX909: case OffloadArch::GFX90a: case OffloadArch::GFX90c: + case OffloadArch::GFX9_4_GENERIC: case OffloadArch::GFX940: case OffloadArch::GFX941: case OffloadArch::GFX942: diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index 598b946ad88dbb..43dc0e62284602 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -2301,6 +2301,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) { case OffloadArch::GFX909: case OffloadArch::GFX90a: case OffloadArch::GFX90c: + case OffloadArch::GFX9_4_GENERIC: case OffloadArch::GFX940: case OffloadArch::GFX941: case OffloadArch::GFX942: diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl index f3473346baae5a..8b56ec94f2c4ee 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-features.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl @@ -56,6 +56,8 @@ // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1103 -target-feature +wavefrontsize64 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1103-W64 %s +// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx9-4-generic -emit-llvm -o - %s | FileCheck --check-prefix=GFX9_4_Generic %s + // NOCPU-NOT: "target-features" // NOCPU-WAVE32: "target-features"="+wavefrontsize32" // NOCPU-WAVE64: "target-features"="+wavefrontsize64" @@ -82,9 +84,10 @@ // GFX909: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" // GFX90A: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" // GFX90C: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" -// GFX940: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" -// GFX941: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" -// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" +// GFX940: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts" +// GFX941: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts" +// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts" +// GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" // GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" // GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" // GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx9-4-generic-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx9-4-generic-err.cl new file mode 100644 index 00000000000000..dbeb8a6f2c1664 --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx9-4-generic-err.cl @@ -0,0 +1,38 @@ +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx9-4-generic -verify -emit-llvm -o - %s + +typedef unsigned int uint; +typedef float float2 __attribute__((ext_vector_type(2))); +typedef float float4 __attribute__((ext_vector_type(4))); +typedef float float16 __attribute__((ext_vector_type(16))); +typedef int int2 __attribute__((ext_vector_type(2))); +typedef int int4 __attribute__((ext_vector_type(4))); + +void builtin_test_unsupported(uint a, uint b, int a_int, long a_long, float a_float, float b_float, + int2 a_int2, int4 a_int4, float2 a_float2, float4 a_float4, float16 a_float16) { + a_float4 = __builtin_amdgcn_mfma_f32_16x16x32_bf8_bf8(a_long, a_long, a_float4, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_16x16x32_bf8_bf8' needs target feature fp8-insts}} + a_float4 = __builtin_amdgcn_mfma_f32_16x16x32_bf8_fp8(a_long, a_long, a_float4, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_16x16x32_bf8_fp8' needs target feature fp8-insts}} + a_float4 = __builtin_amdgcn_mfma_f32_16x16x32_fp8_bf8(a_long, a_long, a_float4, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_16x16x32_fp8_bf8' needs target feature fp8-insts}} + a_float4 = __builtin_amdgcn_mfma_f32_16x16x32_fp8_fp8(a_long, a_long, a_float4, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_16x16x32_fp8_fp8' needs target feature fp8-insts}} + a_float16 = __builtin_amdgcn_mfma_f32_32x32x16_bf8_bf8(a_long, a_long, a_float16, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_32x32x16_bf8_bf8' needs target feature fp8-insts}} + a_float16 = __builtin_amdgcn_mfma_f32_32x32x16_bf8_fp8(a_long, a_long, a_float16, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_32x32x16_bf8_fp8' needs target feature fp8-insts}} + a_float16 = __builtin_amdgcn_mfma_f32_32x32x16_fp8_bf8(a_long, a_long, a_float16, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_32x32x16_fp8_bf8' needs target feature fp8-insts}} + a_float16 = __builtin_amdgcn_mfma_f32_32x32x16_fp8_fp8(a_long, a_long, a_float16, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_32x32x16_fp8_fp8' needs target feature fp8-insts}} + a_float4 = __builtin_amdgcn_smfmac_f32_16x16x64_bf8_bf8(a_int2, a_int4, a_float4, a_int, 0, 0); // expected-error {{'__builtin_amdgcn_smfmac_f32_16x16x64_bf8_bf8' needs target feature fp8-insts}} + a_float4 = __builtin_amdgcn_smfmac_f32_16x16x64_bf8_fp8(a_int2, a_int4, a_float4, a_int, 0, 0); // expected-error {{'__builtin_amdgcn_smfmac_f32_16x16x64_bf8_fp8' needs target feature fp8-insts}} + a_float4 = __builtin_amdgcn_smfmac_f32_16x16x64_fp8_bf8(a_int2, a_int4, a_float4, a_int, 0, 0); // expected-error {{'__builtin_amdgcn_smfmac_f32_16x16x64_fp8_bf8' needs target feature fp8-insts}} + a_float4 = __builtin_amdgcn_smfmac_f32_16x16x64_fp8_fp8(a_int2, a_int4, a_float4, a_int, 0, 0); // expected-error {{'__builtin_amdgcn_smfmac_f32_16x16x64_fp8_fp8' needs target feature fp8-insts}} + a_float16 = __builtin_amdgcn_smfmac_f32_32x32x32_bf8_bf8(a_int2, a_int4, a_float16, a_int, 0, 0); // expected-error {{'__builtin_amdgcn_smfmac_f32_32x32x32_bf8_bf8' needs target feature fp8-insts}} + a_float16 = __builtin_amdgcn_smfmac_f32_32x32x32_bf8_fp8(a_int2, a_int4, a_float16, a_int, 0, 0); // expected-error {{'__builtin_amdgcn_smfmac_f32_32x32x32_bf8_fp8' needs target feature fp8-insts}} + a_float16 = __builtin_amdgcn_smfmac_f32_32x32x32_fp8_bf8(a_int2, a_int4, a_float16, a_int, 0, 0); // expected-error {{'__builtin_amdgcn_smfmac_f32_32x32x32_fp8_bf8' needs target feature fp8-insts}} + a_float16 = __builtin_amdgcn_smfmac_f32_32x32x32_fp8_fp8(a_int2, a_int4, a_float16, a_int, 0, 0); // expected-error {{'__builtin_amdgcn_smfmac_f32_32x32x32_fp8_fp8' needs target feature fp8-insts}} + b = __builtin_amdgcn_cvt_f32_bf8(a, 0); // expected-error {{'__builtin_amdgcn_cvt_f32_bf8' needs target feature fp8-conversion-insts}} + b = __builtin_amdgcn_cvt_f32_fp8(a, 1); // expected-error {{'__builtin_amdgcn_cvt_f32_fp8' needs target feature fp8-conversion-insts}} + a_float2 = __builtin_amdgcn_cvt_pk_f32_bf8(a, false); // expected-error {{'__builtin_amdgcn_cvt_pk_f32_bf8' needs target feature fp8-conversion-insts}} + a_float2 = __builtin_amdgcn_cvt_pk_f32_fp8(a, true); // expected-error {{'__builtin_amdgcn_cvt_pk_f32_fp8' needs target feature fp8-conversion-insts}} + b = __builtin_amdgcn_cvt_pk_bf8_f32(a_float, b_float, a, false); // expected-error {{'__builtin_amdgcn_cvt_pk_bf8_f32' needs target feature fp8-conversion-insts}} + b = __builtin_amdgcn_cvt_pk_fp8_f32(a_float, b_float, a, true); // expected-error {{'__builtin_amdgcn_cvt_pk_fp8_f32' needs target feature fp8-conversion-insts}} + b = __builtin_amdgcn_cvt_sr_bf8_f32(a_float, b_float, a, 2); // expected-error {{'__builtin_amdgcn_cvt_sr_bf8_f32' needs target feature fp8-conversion-insts}} + b = __builtin_amdgcn_cvt_sr_fp8_f32(a_float, b_float, a, 3); // expected-error {{'__builtin_amdgcn_cvt_sr_fp8_f32' needs target feature fp8-conversion-insts}} +} diff --git a/clang/test/Driver/amdgpu-macros.cl b/clang/test/Driver/amdgpu-macros.cl index 2fedd10bb53445..d354f933c5ad78 100644 --- a/clang/test/Driver/amdgpu-macros.cl +++ b/clang/test/Driver/amdgpu-macros.cl @@ -133,6 +133,7 @@ // RUN: %clang -E -dM -target amdgcn -mcpu=gfx1201 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1201 -DFAMILY=GFX12 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx9-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx9_generic -DFAMILY=GFX9 +// RUN: %clang -E -dM -target amdgcn -mcpu=gfx9-4-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx9_4_generic -DFAMILY=GFX9 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx10-1-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx10_1_generic -DFAMILY=GFX10 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx10-3-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx10_3_generic -DFAMILY=GFX10 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx11-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx11_generic -DFAMILY=GFX11 diff --git a/clang/test/Driver/amdgpu-mcpu.cl b/clang/test/Driver/amdgpu-mcpu.cl index 42ce33db6eec07..ba578435072985 100644 --- a/clang/test/Driver/amdgpu-mcpu.cl +++ b/clang/test/Driver/amdgpu-mcpu.cl @@ -118,6 +118,7 @@ // RUN: %clang -### -target amdgcn -mcpu=gfx1201 %s 2>&1 | FileCheck --check-prefix=GFX1201 %s // RUN: %clang -### -target amdgcn -mcpu=gfx9-generic %s 2>&1 | FileCheck --check-prefix=GFX9_GENERIC %s +// RUN: %clang -### -target amdgcn -mcpu=gfx9-4-generic %s 2>&1 | FileCheck --check-prefix=GFX9_4_GENERIC %s // RUN: %clang -### -target amdgcn -mcpu=gfx10-1-generic %s 2>&1 | FileCheck --check-prefix=GFX10_1_GENERIC %s // RUN: %clang -### -target amdgcn -mcpu=gfx10-3-generic %s 2>&1 | FileCheck --check-prefix=GFX10_3_GENERIC %s // RUN: %clang -### -target amdgcn -mcpu=gfx11-generic %s 2>&1 | FileCheck --check-prefix=GFX11_GENERIC %s @@ -172,6 +173,7 @@ // GFX1201: "-target-cpu" "gfx1201" // GFX9_GENERIC: "-target-cpu" "gfx9-generic" +// GFX9_4_GENERIC: "-target-cpu" "gfx9-4-generic" // GFX10_1_GENERIC: "-target-cpu" "gfx10-1-generic" // GFX10_3_GENERIC: "-target-cpu" "gfx10-3-generic" // GFX11_GENERIC: "-target-cpu" "gfx11-generic" diff --git a/clang/test/Misc/target-invalid-cpu-note/amdgcn.c b/clang/test/Misc/target-invalid-cpu-note/amdgcn.c index b3ddbd53a0391b..4e675871f1e5bd 100644 --- a/clang/test/Misc/target-invalid-cpu-note/amdgcn.c +++ b/clang/test/Misc/target-invalid-cpu-note/amdgcn.c @@ -74,4 +74,5 @@ // CHECK-SAME: {{^}}, gfx10-3-generic // CHECK-SAME: {{^}}, gfx11-generic // CHECK-SAME: {{^}}, gfx12-generic +// CHECK-SAME: {{^}}, gfx9-4-generic // CHECK-SAME: {{$}} diff --git a/clang/test/Misc/target-invalid-cpu-note/nvptx.c b/clang/test/Misc/target-invalid-cpu-note/nvptx.c index a59e1c6fab1c49..44fe07065b2428 100644 --- a/clang/test/Misc/target-invalid-cpu-note/nvptx.c +++ b/clang/test/Misc/target-invalid-cpu-note/nvptx.c @@ -50,6 +50,7 @@ // CHECK-SAME: {{^}}, gfx909 // CHECK-SAME: {{^}}, gfx90a // CHECK-SAME: {{^}}, gfx90c +// CHECK-SAME: {{^}}, gfx9-4-generic // CHECK-SAME: {{^}}, gfx940 // CHECK-SAME: {{^}}, gfx941 // CHECK-SAME: {{^}}, gfx942 diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 5b83ea428c0bff..b76ebe3ec94d12 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -576,6 +576,12 @@ Generic processor code objects are versioned. See :ref:`amdgpu-generic-processor - ``v_dot2_f32_f16`` + ``gfx9-4-generic`` ``amdgcn`` - ``gfx940`` - xnack - Absolute flat FP8 and BF8 instructions, + - ``gfx941`` - sramecc scratch FP8 and BF8 conversion instructions, + - ``gfx942`` as well as instructions with XF32 format support + are not available. + + ``gfx10-1-generic`` ``amdgcn`` - ``gfx1010`` - xnack - Absolute flat - The following instructions are - ``gfx1011`` - wavefrontsize64 scratch not available on ``gfx1011`` - ``gfx1012`` - cumode and ``gfx1012`` @@ -2180,6 +2186,7 @@ The AMDGPU backend uses the following ELF header: *reserved* 0x057 Reserved. ``EF_AMDGPU_MACH_AMDGCN_GFX1153`` 0x058 ``gfx1153``. ``EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC`` 0x059 ``gfx12-generic`` + ``EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC`` 0x05f ``gfx9-4-generic`` ========================================== ========== ============================= Sections diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index c591a96232f115..6c05ea7208e1f1 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -822,11 +822,12 @@ enum : unsigned { EF_AMDGPU_MACH_AMDGCN_RESERVED_0X57 = 0x057, EF_AMDGPU_MACH_AMDGCN_GFX1153 = 0x058, EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC = 0x059, + EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC = 0x05f, // clang-format on // First/last AMDGCN-based processors. EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600, - EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC, + EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC, // Indicates if the "xnack" target feature is enabled for all code contained // in the object. diff --git a/llvm/include/llvm/TargetParser/TargetParser.h b/llvm/include/llvm/TargetParser/TargetParser.h index ae86ff39083d89..c6db4dfd7f5159 100644 --- a/llvm/include/llvm/TargetParser/TargetParser.h +++ b/llvm/include/llvm/TargetParser/TargetParser.h @@ -119,9 +119,10 @@ enum GPUKind : uint32_t { GK_GFX10_3_GENERIC = 194, GK_GFX11_GENERIC = 195, GK_GFX12_GENERIC = 196, + GK_GFX9_4_GENERIC = 197, GK_AMDGCN_GENERIC_FIRST = GK_GFX9_GENERIC, - GK_AMDGCN_GENERIC_LAST = GK_GFX12_GENERIC, + GK_AMDGCN_GENERIC_LAST = GK_GFX9_4_GENERIC, }; /// Instruction set architecture version. diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp index 5096877d2a4b00..9dc39936ffd8bb 100644 --- a/llvm/lib/Object/ELFObjectFile.cpp +++ b/llvm/lib/Object/ELFObjectFile.cpp @@ -602,6 +602,8 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const { // Generic AMDGCN targets case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC: return "gfx9-generic"; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC: + return "gfx9-4-generic"; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC: return "gfx10-1-generic"; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC: diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index bd816a6d704d44..130b8798ab4a46 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -631,6 +631,7 @@ void ScalarBitSetTraits::bitset(IO &IO, BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1200, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1201, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC, EF_AMDGPU_MACH); + BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC, EF_AMDGPU_MACH); diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index bde61a1f7e58df..c8ae010414dc40 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1451,11 +1451,7 @@ def FeatureISAVersion9_4_Common : FeatureSet< FeatureDPALU_DPP, FeaturePackedFP32Ops, FeatureMAIInsts, - FeatureFP8Insts, - FeatureFP8ConversionInsts, - FeatureCvtFP8VOP1Bug, FeaturePkFmacF16Inst, - FeatureXF32Insts, FeatureAtomicFaddRtnInsts, FeatureAtomicFaddNoRtnInsts, FeatureAtomicBufferGlobalPkAddF16Insts, @@ -1476,15 +1472,36 @@ def FeatureISAVersion9_4_Common : FeatureSet< def FeatureISAVersion9_4_0 : FeatureSet< !listconcat(FeatureISAVersion9_4_Common.Features, - [FeatureForceStoreSC0SC1])>; + [ + FeatureForceStoreSC0SC1, + FeatureFP8Insts, + FeatureFP8ConversionInsts, + FeatureCvtFP8VOP1Bug, + FeatureXF32Insts + ])>; def FeatureISAVersion9_4_1 : FeatureSet< !listconcat(FeatureISAVersion9_4_Common.Features, - [FeatureForceStoreSC0SC1])>; + [ + FeatureForceStoreSC0SC1, + FeatureFP8Insts, + FeatureFP8ConversionInsts, + FeatureCvtFP8VOP1Bug, + FeatureXF32Insts + ])>; def FeatureISAVersion9_4_2 : FeatureSet< !listconcat(FeatureISAVersion9_4_Common.Features, - [])>; + [ + FeatureFP8Insts, + FeatureFP8ConversionInsts, + FeatureCvtFP8VOP1Bug, + FeatureXF32Insts + ])>; + +def FeatureISAVersion9_4_Generic : FeatureSet< + !listconcat(FeatureISAVersion9_4_Common.Features, + [FeatureRequiresCOV6])>; def FeatureISAVersion10_Common : FeatureSet< [FeatureGFX10, diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td index 547941633fda61..067043d290b760 100644 --- a/llvm/lib/Target/AMDGPU/GCNProcessors.td +++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td @@ -209,6 +209,11 @@ def : ProcessorModel<"gfx9-generic", SIQuarterSpeedModel, FeatureISAVersion9_Generic.Features >; +// [gfx940, gfx941, gfx942] +def : ProcessorModel<"gfx9-4-generic", SIDPGFX940FullSpeedModel, + FeatureISAVersion9_4_Generic.Features +>; + //===----------------------------------------------------------------------===// // GCN GFX10. //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 5c625c3d83ff1b..507725b91a9bee 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -119,6 +119,7 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) { case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200: AK = GK_GFX1200; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201: AK = GK_GFX1201; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC: AK = GK_GFX9_GENERIC; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC: AK = GK_GFX9_4_GENERIC; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC: AK = GK_GFX10_1_GENERIC; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC: AK = GK_GFX10_3_GENERIC; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC: AK = GK_GFX11_GENERIC; break; @@ -204,6 +205,7 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) { case GK_GFX1200: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200; case GK_GFX1201: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201; case GK_GFX9_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC; + case GK_GFX9_4_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC; case GK_GFX10_1_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC; case GK_GFX10_3_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC; case GK_GFX11_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC; @@ -821,6 +823,9 @@ unsigned AMDGPUTargetELFStreamer::getEFlagsV6() { case AMDGPU::GK_GFX9_GENERIC: Version = GenericVersion::GFX9; break; + case AMDGPU::GK_GFX9_4_GENERIC: + Version = GenericVersion::GFX9_4; + break; case AMDGPU::GK_GFX10_1_GENERIC: Version = GenericVersion::GFX10_1; break; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index beebe320b2cf3a..88a6d75b72c7d0 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -46,6 +46,7 @@ struct IsaVersion; /// within a generic family. namespace GenericVersion { static constexpr unsigned GFX9 = 1; +static constexpr unsigned GFX9_4 = 1; static constexpr unsigned GFX10_1 = 1; static constexpr unsigned GFX10_3 = 1; static constexpr unsigned GFX11 = 1; diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp index 059d7b4f5ff2d0..7dfb8c021a8a5f 100644 --- a/llvm/lib/TargetParser/TargetParser.cpp +++ b/llvm/lib/TargetParser/TargetParser.cpp @@ -134,6 +134,7 @@ constexpr GPUInfo AMDGCNGPUs[] = { {{"gfx10-3-generic"}, {"gfx10-3-generic"}, GK_GFX10_3_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, {{"gfx11-generic"}, {"gfx11-generic"}, GK_GFX11_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, {{"gfx12-generic"}, {"gfx12-generic"}, GK_GFX12_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, + {{"gfx9-4-generic"}, {"gfx9-4-generic"}, GK_GFX9_4_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC}, // clang-format on }; @@ -155,6 +156,7 @@ const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef Table) { StringRef llvm::AMDGPU::getArchFamilyNameAMDGCN(GPUKind AK) { switch (AK) { case AMDGPU::GK_GFX9_GENERIC: + case AMDGPU::GK_GFX9_4_GENERIC: return "gfx9"; case AMDGPU::GK_GFX10_1_GENERIC: case AMDGPU::GK_GFX10_3_GENERIC: @@ -296,6 +298,7 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) { // TODO: Split up this API depending on its caller so // generic target handling is more obvious and less risky. case GK_GFX9_GENERIC: return {9, 0, 0}; + case GK_GFX9_4_GENERIC: return {9, 4, 0}; case GK_GFX10_1_GENERIC: return {10, 1, 0}; case GK_GFX10_3_GENERIC: return {10, 3, 0}; case GK_GFX11_GENERIC: return {11, 0, 3}; @@ -466,9 +469,12 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, case GK_GFX942: case GK_GFX941: case GK_GFX940: - Features["gfx940-insts"] = true; Features["fp8-insts"] = true; Features["fp8-conversion-insts"] = true; + Features["xf32-insts"] = true; + [[fallthrough]]; + case GK_GFX9_4_GENERIC: + Features["gfx940-insts"] = true; Features["atomic-ds-pk-add-16-insts"] = true; Features["atomic-flat-pk-add-16-insts"] = true; Features["atomic-global-pk-add-bf16-inst"] = true; diff --git a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll index 5986d2d38ef1ad..4eac26e853c2a0 100644 --- a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll +++ b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll @@ -112,6 +112,8 @@ ; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-generic -mattr=-xnack < %s | FileCheck --check-prefixes=GFX9_GENERIC_NOXNACK %s ; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-generic -mattr=+xnack < %s | FileCheck --check-prefixes=GFX9_GENERIC_XNACK %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-4-generic -mattr=-xnack < %s | FileCheck --check-prefixes=GFX9_4_GENERIC_NOXNACK %s +; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-4-generic -mattr=+xnack < %s | FileCheck --check-prefixes=GFX9_4_GENERIC_XNACK %s ; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10-1-generic -mattr=-xnack < %s | FileCheck --check-prefixes=GFX10_1_GENERIC_NOXNACK %s ; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10-1-generic -mattr=+xnack < %s | FileCheck --check-prefixes=GFX10_1_GENERIC_XNACK %s ; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10-3-generic < %s | FileCheck --check-prefixes=GFX10_3_GENERIC %s @@ -210,6 +212,8 @@ ; GFX9_GENERIC_NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx9-generic:xnack-" ; GFX9_GENERIC_XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx9-generic:xnack+" +; GFX9_4_GENERIC_NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx9-4-generic:xnack-" +; GFX9_4_GENERIC_XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx9-4-generic:xnack+" ; GFX10_1_GENERIC_NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx10-1-generic:xnack-" ; GFX10_1_GENERIC_XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx10-1-generic:xnack+" ; GFX10_3_GENERIC: .amdgcn_target "amdgcn-amd-amdhsa--gfx10-3-generic" diff --git a/llvm/test/CodeGen/AMDGPU/div-rem-by-constant-64.ll b/llvm/test/CodeGen/AMDGPU/div-rem-by-constant-64.ll index 662de47413654f..1c6808b613427e 100644 --- a/llvm/test/CodeGen/AMDGPU/div-rem-by-constant-64.ll +++ b/llvm/test/CodeGen/AMDGPU/div-rem-by-constant-64.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck -check-prefixes=GFX9 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -o - %s | FileCheck -check-prefixes=GFX942 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-4-generic --amdhsa-code-object-version=6 -o - %s | FileCheck -check-prefixes=GFX942 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -o - %s | FileCheck -check-prefixes=GFX1030 %s ; Sample test to check how we deal with division/modulos by 64 bit constants. diff --git a/llvm/test/CodeGen/AMDGPU/dst-sel-hazard.mir b/llvm/test/CodeGen/AMDGPU/dst-sel-hazard.mir index e24817078d8bc9..524e074bb69de4 100644 --- a/llvm/test/CodeGen/AMDGPU/dst-sel-hazard.mir +++ b/llvm/test/CodeGen/AMDGPU/dst-sel-hazard.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 # RUN: llc -mtriple=amdgcn -mcpu=gfx942 -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=HAZARD %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx9-4-generic -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=HAZARD %s # RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=NOHAZARD %s --- diff --git a/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll b/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll index f293c52bf6bfb2..f1f4edb94a6178 100644 --- a/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll +++ b/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll @@ -80,6 +80,7 @@ ; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1201 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1201 %s ; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx9-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX9_GENERIC %s +; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx9-4-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX9_4_GENERIC %s ; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx10-1-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX10_1_GENERIC %s ; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx10-3-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX10_3_GENERIC %s ; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx11-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX11_GENERIC %s @@ -161,6 +162,7 @@ ; GFX1201: EF_AMDGPU_MACH_AMDGCN_GFX1201 (0x4E) ; GFX9_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC (0x51) +; GFX9_4_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC (0x5F) ; GFX10_1_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC (0x52) ; GFX10_3_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC (0x53) ; GFX11_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC (0x54) diff --git a/llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll b/llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll index d30cf1531a06b1..d58dc5db8c80f1 100644 --- a/llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll +++ b/llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll @@ -1,16 +1,19 @@ ; RUN: not llc -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX9-V5 %s +; RUN: not llc -mtriple=amdgcn -mcpu=gfx9-4-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX9-4-V5 %s ; RUN: not llc -mtriple=amdgcn -mcpu=gfx10-1-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX101-V5 %s ; RUN: not llc -mtriple=amdgcn -mcpu=gfx10-3-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX103-V5 %s ; RUN: not llc -mtriple=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX11-V5 %s ; RUN: not llc -mtriple=amdgcn -mcpu=gfx12-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX12-V5 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -o - %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx9-4-generic --amdhsa-code-object-version=6 -o - %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx10-1-generic --amdhsa-code-object-version=6 -o - %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx10-3-generic --amdhsa-code-object-version=6 -o - %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=6 -o - %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx12-generic --amdhsa-code-object-version=6 -o - %s ; GFX9-V5: gfx9-generic is only available on code object version 6 or better +; GFX9-4-V5: gfx9-4-generic is only available on code object version 6 or better ; GFX101-V5: gfx10-1-generic is only available on code object version 6 or better ; GFX103-V5: gfx10-3-generic is only available on code object version 6 or better ; GFX11-V5: gfx11-generic is only available on code object version 6 or better diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.exp.large.mir b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.exp.large.mir index da1d9972e42dcf..c3562708b15d88 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.exp.large.mir +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.exp.large.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -start-before=machine-scheduler -verify-misched -o - %s | FileCheck -check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-4-generic --amdhsa-code-object-version=6 -start-before=machine-scheduler -verify-misched -o - %s | FileCheck -check-prefix=GCN %s --- | define amdgpu_kernel void @largeInterleave() #0 { ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.exp.small.mir b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.exp.small.mir index 0473e017f193cb..6150cb5cd947ec 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.exp.small.mir +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.exp.small.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -start-before=machine-scheduler -verify-misched -o - %s | FileCheck -check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-4-generic --amdhsa-code-object-version=6 -start-before=machine-scheduler -verify-misched -o - %s | FileCheck -check-prefix=GCN %s --- | define amdgpu_kernel void @smallInterleave() #0 { ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.gfx90a.ll index a3d789c1ccc36f..daec7e9b91e71e 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.gfx90a.ll @@ -2,6 +2,7 @@ ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx90a < %s | FileCheck --check-prefixes=GCN,GFX90A %s ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx942 < %s | FileCheck --check-prefixes=GCN,GFX942 %s ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx942 < %s | FileCheck --check-prefixes=GCN,GFX942 %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx9-4-generic --amdhsa-code-object-version=6 < %s | FileCheck --check-prefixes=GCN,GFX942 %s ; DPP control value 337 is valid for 64-bit DPP on gfx942 diff --git a/llvm/test/CodeGen/AMDGPU/no-corresponding-integer-type.ll b/llvm/test/CodeGen/AMDGPU/no-corresponding-integer-type.ll index 5201f188afd5f8..df717d0ae497d3 100644 --- a/llvm/test/CodeGen/AMDGPU/no-corresponding-integer-type.ll +++ b/llvm/test/CodeGen/AMDGPU/no-corresponding-integer-type.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 %s -o - | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-4-generic --amdhsa-code-object-version=6 %s -o - | FileCheck %s define void @no_corresponding_integer_type(i8 %arg, ptr addrspace(1) %ptr) { ; CHECK-LABEL: no_corresponding_integer_type: diff --git a/llvm/test/MC/AMDGPU/gfx9_4_generic_unsupported.s b/llvm/test/MC/AMDGPU/gfx9_4_generic_unsupported.s new file mode 100644 index 00000000000000..6ade556f21a1d1 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx9_4_generic_unsupported.s @@ -0,0 +1,104 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx9-4-generic -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx9-4-generic -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --implicit-check-not=error: %s + +v_mfma_f32_16x16x8_xf32 a[0:3], v[2:3], v[4:5], a[2:5] +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_mfma_f32_16x16x8xf32 a[0:3], v[2:3], v[4:5], a[2:5] +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_mfma_f32_32x32x4_xf32 a[0:15], v[2:3], v[4:5], a[18:33] +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_mfma_f32_32x32x4xf32 a[0:15], v[2:3], v[4:5], a[18:33] +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_cvt_f32_bf8 v1, 3 +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_cvt_f32_bf8_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_cvt_f32_bf8_e64 v5, v1 +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_cvt_f32_bf8_sdwa v5, v1 src0_sel:BYTE_0 +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_cvt_f32_fp8 v1, 3 +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_cvt_f32_fp8_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_cvt_f32_fp8_e64 v5, v1 +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_cvt_f32_fp8_sdwa v5, v1 src0_sel:BYTE_0 +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_cvt_pk_f32_fp8 v[0:1], v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_cvt_pk_f32_fp8_dpp v[10:11], v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_cvt_pk_f32_fp8_sdwa v[10:11], v1 src0_sel:WORD_0 +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_cvt_pk_f32_bf8 v[0:1], v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_cvt_pk_f32_bf8_dpp v[10:11], v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_cvt_pk_f32_bf8_sdwa v[10:11], v1 src0_sel:WORD_0 +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_mfma_f32_16x16x32_bf8_bf8 a[0:3], v[2:3], v[4:5], a[0:3] +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_mfma_f32_16x16x32_fp8_bf8 a[0:3], v[2:3], v[4:5], a[0:3] +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_mfma_f32_16x16x32_bf8_fp8 a[0:3], v[2:3], v[4:5], a[0:3] +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_mfma_f32_16x16x32_fp8_fp8 a[0:3], v[2:3], v[4:5], a[0:3] +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_mfma_f32_32x32x16_bf8_bf8 a[0:15], v[2:3], v[4:5], a[0:15] +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_mfma_f32_32x32x16_bf8_fp8 a[0:15], v[2:3], v[4:5], a[0:15] +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_mfma_f32_32x32x16_fp8_bf8 a[0:15], v[2:3], v[4:5], a[0:15] +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_mfma_f32_32x32x16_fp8_fp8 a[0:15], v[2:3], v[4:5], a[0:15] +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_smfmac_f32_16x16x64_bf8_bf8 a[0:3], v[2:3], a[4:7], v1 +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_smfmac_f32_16x16x64_bf8_fp8 a[0:3], v[2:3], a[4:7], v1 +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_smfmac_f32_16x16x64_fp8_bf8 a[0:3], v[2:3], a[4:7], v1 +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_smfmac_f32_16x16x64_fp8_fp8 a[0:3], v[2:3], a[4:7], v1 +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_smfmac_f32_32x32x32_bf8_bf8 a[0:15], v[2:3], a[4:7], v1 +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_smfmac_f32_32x32x32_bf8_fp8 a[0:15], v[2:3], a[4:7], v1 +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_smfmac_f32_32x32x32_fp8_bf8 a[0:15], v[2:3], a[4:7], v1 +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_smfmac_f32_32x32x32_fp8_fp8 a[0:15], v[2:3], a[4:7], v1 +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU diff --git a/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml b/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml index 37234dba7d9b4c..9c79ea588f6247 100644 --- a/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml +++ b/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml @@ -250,6 +250,10 @@ # RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX9_GENERIC | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX9_GENERIC %s # RUN: obj2yaml %t.o.AMDGCN_GFX9_GENERIC | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX9_GENERIC %s +# RUN: sed -e 's//64/' -e 's//AMDGCN_GFX9_4_GENERIC/' %s | yaml2obj -o %t.o.AMDGCN_GFX9_4_GENERIC +# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX9_4_GENERIC | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX9_4_GENERIC %s +# RUN: obj2yaml %t.o.AMDGCN_GFX9_4_GENERIC | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX9_4_GENERIC %s + # RUN: sed -e 's//64/' -e 's//AMDGCN_GFX10_1_GENERIC/' %s | yaml2obj -o %t.o.AMDGCN_GFX10_1_GENERIC # RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX10_1_GENERIC | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX10_1_GENERIC %s # RUN: obj2yaml %t.o.AMDGCN_GFX10_1_GENERIC | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX10_1_GENERIC %s @@ -473,6 +477,9 @@ # ELF-AMDGCN-GFX9_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC (0x51) # YAML-AMDGCN-GFX9_GENERIC: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC ] +# ELF-AMDGCN-GFX9_4_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC (0x5F) +# YAML-AMDGCN-GFX9_4_GENERIC: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC ] + # ELF-AMDGCN-GFX10_1_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC (0x52) # YAML-AMDGCN-GFX10_1_GENERIC: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC ] diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll index c38f6b4e7833cd..45071ecb751321 100644 --- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll @@ -137,6 +137,12 @@ define amdgpu_kernel void @test_kernel() { ; ----------------------------------GFX9--------------------------------------- ; + +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx9-4-generic -filetype=obj -O0 -o %t.o %s +; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx9-4-generic %t.o > %t-specify.txt +; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt +; RUN: diff %t-specify.txt %t-detect.txt + ; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx9-generic -filetype=obj -O0 -o %t.o %s ; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx9-generic %t.o > %t-specify.txt ; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt diff --git a/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test index 78acbd657b7635..34c22dca3aa183 100644 --- a/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test +++ b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test @@ -364,6 +364,9 @@ # RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC # RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=4 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC -DFLAG_VALUE=0x51 +# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=4 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC -DFLAG_VALUE=0x5F + # RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1100 # RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1100 -DFLAG_VALUE=0x41 diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 7d92a492d8b181..1012cd020d525e 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -1641,6 +1641,7 @@ const EnumEntry ElfHeaderMipsFlags[] = { ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1200, "gfx1200"), \ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1201, "gfx1201"), \ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC, "gfx9-generic"), \ + ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC, "gfx9-4-generic"), \ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC, "gfx10-1-generic"), \ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC, "gfx10-3-generic"), \ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC, "gfx11-generic"), \