Skip to content

Commit

Permalink
[AMDGPU] Introduce a new generic target gfx9-4-generic (llvm#115190)
Browse files Browse the repository at this point in the history
This patch introduces a new generic target, `gfx9-4-generic`. Since it doesn’t support FP8 and XF32-related instructions, the patch includes several code reorganizations to accommodate these changes.
  • Loading branch information
shiltian authored Nov 13, 2024
1 parent 5a5502b commit de0fd64
Show file tree
Hide file tree
Showing 34 changed files with 245 additions and 13 deletions.
1 change: 1 addition & 0 deletions clang/include/clang/Basic/Cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ enum class OffloadArch {
GFX909,
GFX90a,
GFX90c,
GFX9_4_GENERIC,
GFX940,
GFX941,
GFX942,
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Basic/Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ static const OffloadArchToStringMap arch_names[] = {
GFX(909), // gfx909
GFX(90a), // gfx90a
GFX(90c), // gfx90c
{OffloadArch::GFX9_4_GENERIC, "gfx9-4-generic", "compute_amdgcn"},
GFX(940), // gfx940
GFX(941), // gfx941
GFX(942), // gfx942
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Basic/Targets/NVPTX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
case OffloadArch::GFX909:
case OffloadArch::GFX90a:
case OffloadArch::GFX90c:
case OffloadArch::GFX9_4_GENERIC:
case OffloadArch::GFX940:
case OffloadArch::GFX941:
case OffloadArch::GFX942:
Expand Down
1 change: 1 addition & 0 deletions clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2301,6 +2301,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) {
case OffloadArch::GFX909:
case OffloadArch::GFX90a:
case OffloadArch::GFX90c:
case OffloadArch::GFX9_4_GENERIC:
case OffloadArch::GFX940:
case OffloadArch::GFX941:
case OffloadArch::GFX942:
Expand Down
9 changes: 6 additions & 3 deletions clang/test/CodeGenOpenCL/amdgpu-features.cl
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@

// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1103 -target-feature +wavefrontsize64 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1103-W64 %s

// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx9-4-generic -emit-llvm -o - %s | FileCheck --check-prefix=GFX9_4_Generic %s

// NOCPU-NOT: "target-features"
// NOCPU-WAVE32: "target-features"="+wavefrontsize32"
// NOCPU-WAVE64: "target-features"="+wavefrontsize64"
Expand All @@ -82,9 +84,10 @@
// GFX909: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
// GFX90A: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
// GFX90C: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
// GFX940: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
// GFX941: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
// GFX940: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
// GFX941: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
// GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
Expand Down
38 changes: 38 additions & 0 deletions clang/test/CodeGenOpenCL/builtins-amdgcn-gfx9-4-generic-err.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// REQUIRES: amdgpu-registered-target

// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx9-4-generic -verify -emit-llvm -o - %s

typedef unsigned int uint;
typedef float float2 __attribute__((ext_vector_type(2)));
typedef float float4 __attribute__((ext_vector_type(4)));
typedef float float16 __attribute__((ext_vector_type(16)));
typedef int int2 __attribute__((ext_vector_type(2)));
typedef int int4 __attribute__((ext_vector_type(4)));

void builtin_test_unsupported(uint a, uint b, int a_int, long a_long, float a_float, float b_float,
int2 a_int2, int4 a_int4, float2 a_float2, float4 a_float4, float16 a_float16) {
a_float4 = __builtin_amdgcn_mfma_f32_16x16x32_bf8_bf8(a_long, a_long, a_float4, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_16x16x32_bf8_bf8' needs target feature fp8-insts}}
a_float4 = __builtin_amdgcn_mfma_f32_16x16x32_bf8_fp8(a_long, a_long, a_float4, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_16x16x32_bf8_fp8' needs target feature fp8-insts}}
a_float4 = __builtin_amdgcn_mfma_f32_16x16x32_fp8_bf8(a_long, a_long, a_float4, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_16x16x32_fp8_bf8' needs target feature fp8-insts}}
a_float4 = __builtin_amdgcn_mfma_f32_16x16x32_fp8_fp8(a_long, a_long, a_float4, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_16x16x32_fp8_fp8' needs target feature fp8-insts}}
a_float16 = __builtin_amdgcn_mfma_f32_32x32x16_bf8_bf8(a_long, a_long, a_float16, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_32x32x16_bf8_bf8' needs target feature fp8-insts}}
a_float16 = __builtin_amdgcn_mfma_f32_32x32x16_bf8_fp8(a_long, a_long, a_float16, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_32x32x16_bf8_fp8' needs target feature fp8-insts}}
a_float16 = __builtin_amdgcn_mfma_f32_32x32x16_fp8_bf8(a_long, a_long, a_float16, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_32x32x16_fp8_bf8' needs target feature fp8-insts}}
a_float16 = __builtin_amdgcn_mfma_f32_32x32x16_fp8_fp8(a_long, a_long, a_float16, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_32x32x16_fp8_fp8' needs target feature fp8-insts}}
a_float4 = __builtin_amdgcn_smfmac_f32_16x16x64_bf8_bf8(a_int2, a_int4, a_float4, a_int, 0, 0); // expected-error {{'__builtin_amdgcn_smfmac_f32_16x16x64_bf8_bf8' needs target feature fp8-insts}}
a_float4 = __builtin_amdgcn_smfmac_f32_16x16x64_bf8_fp8(a_int2, a_int4, a_float4, a_int, 0, 0); // expected-error {{'__builtin_amdgcn_smfmac_f32_16x16x64_bf8_fp8' needs target feature fp8-insts}}
a_float4 = __builtin_amdgcn_smfmac_f32_16x16x64_fp8_bf8(a_int2, a_int4, a_float4, a_int, 0, 0); // expected-error {{'__builtin_amdgcn_smfmac_f32_16x16x64_fp8_bf8' needs target feature fp8-insts}}
a_float4 = __builtin_amdgcn_smfmac_f32_16x16x64_fp8_fp8(a_int2, a_int4, a_float4, a_int, 0, 0); // expected-error {{'__builtin_amdgcn_smfmac_f32_16x16x64_fp8_fp8' needs target feature fp8-insts}}
a_float16 = __builtin_amdgcn_smfmac_f32_32x32x32_bf8_bf8(a_int2, a_int4, a_float16, a_int, 0, 0); // expected-error {{'__builtin_amdgcn_smfmac_f32_32x32x32_bf8_bf8' needs target feature fp8-insts}}
a_float16 = __builtin_amdgcn_smfmac_f32_32x32x32_bf8_fp8(a_int2, a_int4, a_float16, a_int, 0, 0); // expected-error {{'__builtin_amdgcn_smfmac_f32_32x32x32_bf8_fp8' needs target feature fp8-insts}}
a_float16 = __builtin_amdgcn_smfmac_f32_32x32x32_fp8_bf8(a_int2, a_int4, a_float16, a_int, 0, 0); // expected-error {{'__builtin_amdgcn_smfmac_f32_32x32x32_fp8_bf8' needs target feature fp8-insts}}
a_float16 = __builtin_amdgcn_smfmac_f32_32x32x32_fp8_fp8(a_int2, a_int4, a_float16, a_int, 0, 0); // expected-error {{'__builtin_amdgcn_smfmac_f32_32x32x32_fp8_fp8' needs target feature fp8-insts}}
b = __builtin_amdgcn_cvt_f32_bf8(a, 0); // expected-error {{'__builtin_amdgcn_cvt_f32_bf8' needs target feature fp8-conversion-insts}}
b = __builtin_amdgcn_cvt_f32_fp8(a, 1); // expected-error {{'__builtin_amdgcn_cvt_f32_fp8' needs target feature fp8-conversion-insts}}
a_float2 = __builtin_amdgcn_cvt_pk_f32_bf8(a, false); // expected-error {{'__builtin_amdgcn_cvt_pk_f32_bf8' needs target feature fp8-conversion-insts}}
a_float2 = __builtin_amdgcn_cvt_pk_f32_fp8(a, true); // expected-error {{'__builtin_amdgcn_cvt_pk_f32_fp8' needs target feature fp8-conversion-insts}}
b = __builtin_amdgcn_cvt_pk_bf8_f32(a_float, b_float, a, false); // expected-error {{'__builtin_amdgcn_cvt_pk_bf8_f32' needs target feature fp8-conversion-insts}}
b = __builtin_amdgcn_cvt_pk_fp8_f32(a_float, b_float, a, true); // expected-error {{'__builtin_amdgcn_cvt_pk_fp8_f32' needs target feature fp8-conversion-insts}}
b = __builtin_amdgcn_cvt_sr_bf8_f32(a_float, b_float, a, 2); // expected-error {{'__builtin_amdgcn_cvt_sr_bf8_f32' needs target feature fp8-conversion-insts}}
b = __builtin_amdgcn_cvt_sr_fp8_f32(a_float, b_float, a, 3); // expected-error {{'__builtin_amdgcn_cvt_sr_fp8_f32' needs target feature fp8-conversion-insts}}
}
1 change: 1 addition & 0 deletions clang/test/Driver/amdgpu-macros.cl
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1201 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1201 -DFAMILY=GFX12

// RUN: %clang -E -dM -target amdgcn -mcpu=gfx9-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx9_generic -DFAMILY=GFX9
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx9-4-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx9_4_generic -DFAMILY=GFX9
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx10-1-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx10_1_generic -DFAMILY=GFX10
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx10-3-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx10_3_generic -DFAMILY=GFX10
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx11-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx11_generic -DFAMILY=GFX11
Expand Down
2 changes: 2 additions & 0 deletions clang/test/Driver/amdgpu-mcpu.cl
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@
// RUN: %clang -### -target amdgcn -mcpu=gfx1201 %s 2>&1 | FileCheck --check-prefix=GFX1201 %s

// RUN: %clang -### -target amdgcn -mcpu=gfx9-generic %s 2>&1 | FileCheck --check-prefix=GFX9_GENERIC %s
// RUN: %clang -### -target amdgcn -mcpu=gfx9-4-generic %s 2>&1 | FileCheck --check-prefix=GFX9_4_GENERIC %s
// RUN: %clang -### -target amdgcn -mcpu=gfx10-1-generic %s 2>&1 | FileCheck --check-prefix=GFX10_1_GENERIC %s
// RUN: %clang -### -target amdgcn -mcpu=gfx10-3-generic %s 2>&1 | FileCheck --check-prefix=GFX10_3_GENERIC %s
// RUN: %clang -### -target amdgcn -mcpu=gfx11-generic %s 2>&1 | FileCheck --check-prefix=GFX11_GENERIC %s
Expand Down Expand Up @@ -172,6 +173,7 @@
// GFX1201: "-target-cpu" "gfx1201"

// GFX9_GENERIC: "-target-cpu" "gfx9-generic"
// GFX9_4_GENERIC: "-target-cpu" "gfx9-4-generic"
// GFX10_1_GENERIC: "-target-cpu" "gfx10-1-generic"
// GFX10_3_GENERIC: "-target-cpu" "gfx10-3-generic"
// GFX11_GENERIC: "-target-cpu" "gfx11-generic"
Expand Down
1 change: 1 addition & 0 deletions clang/test/Misc/target-invalid-cpu-note/amdgcn.c
Original file line number Diff line number Diff line change
Expand Up @@ -74,4 +74,5 @@
// CHECK-SAME: {{^}}, gfx10-3-generic
// CHECK-SAME: {{^}}, gfx11-generic
// CHECK-SAME: {{^}}, gfx12-generic
// CHECK-SAME: {{^}}, gfx9-4-generic
// CHECK-SAME: {{$}}
1 change: 1 addition & 0 deletions clang/test/Misc/target-invalid-cpu-note/nvptx.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
// CHECK-SAME: {{^}}, gfx909
// CHECK-SAME: {{^}}, gfx90a
// CHECK-SAME: {{^}}, gfx90c
// CHECK-SAME: {{^}}, gfx9-4-generic
// CHECK-SAME: {{^}}, gfx940
// CHECK-SAME: {{^}}, gfx941
// CHECK-SAME: {{^}}, gfx942
Expand Down
7 changes: 7 additions & 0 deletions llvm/docs/AMDGPUUsage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -576,6 +576,12 @@ Generic processor code objects are versioned. See :ref:`amdgpu-generic-processor
- ``v_dot2_f32_f16``


``gfx9-4-generic`` ``amdgcn`` - ``gfx940`` - xnack - Absolute flat FP8 and BF8 instructions,
- ``gfx941`` - sramecc scratch FP8 and BF8 conversion instructions,
- ``gfx942`` as well as instructions with XF32 format support
are not available.


``gfx10-1-generic`` ``amdgcn`` - ``gfx1010`` - xnack - Absolute flat - The following instructions are
- ``gfx1011`` - wavefrontsize64 scratch not available on ``gfx1011``
- ``gfx1012`` - cumode and ``gfx1012``
Expand Down Expand Up @@ -2180,6 +2186,7 @@ The AMDGPU backend uses the following ELF header:
*reserved* 0x057 Reserved.
``EF_AMDGPU_MACH_AMDGCN_GFX1153`` 0x058 ``gfx1153``.
``EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC`` 0x059 ``gfx12-generic``
``EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC`` 0x05f ``gfx9-4-generic``
========================================== ========== =============================

Sections
Expand Down
3 changes: 2 additions & 1 deletion llvm/include/llvm/BinaryFormat/ELF.h
Original file line number Diff line number Diff line change
Expand Up @@ -822,11 +822,12 @@ enum : unsigned {
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X57 = 0x057,
EF_AMDGPU_MACH_AMDGCN_GFX1153 = 0x058,
EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC = 0x059,
EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC = 0x05f,
// clang-format on

// First/last AMDGCN-based processors.
EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC,
EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC,

// Indicates if the "xnack" target feature is enabled for all code contained
// in the object.
Expand Down
3 changes: 2 additions & 1 deletion llvm/include/llvm/TargetParser/TargetParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,10 @@ enum GPUKind : uint32_t {
GK_GFX10_3_GENERIC = 194,
GK_GFX11_GENERIC = 195,
GK_GFX12_GENERIC = 196,
GK_GFX9_4_GENERIC = 197,

GK_AMDGCN_GENERIC_FIRST = GK_GFX9_GENERIC,
GK_AMDGCN_GENERIC_LAST = GK_GFX12_GENERIC,
GK_AMDGCN_GENERIC_LAST = GK_GFX9_4_GENERIC,
};

/// Instruction set architecture version.
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Object/ELFObjectFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,8 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
// Generic AMDGCN targets
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC:
return "gfx9-generic";
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC:
return "gfx9-4-generic";
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC:
return "gfx10-1-generic";
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC:
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/ObjectYAML/ELFYAML.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -631,6 +631,7 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1200, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1201, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC, EF_AMDGPU_MACH);
Expand Down
31 changes: 24 additions & 7 deletions llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -1451,11 +1451,7 @@ def FeatureISAVersion9_4_Common : FeatureSet<
FeatureDPALU_DPP,
FeaturePackedFP32Ops,
FeatureMAIInsts,
FeatureFP8Insts,
FeatureFP8ConversionInsts,
FeatureCvtFP8VOP1Bug,
FeaturePkFmacF16Inst,
FeatureXF32Insts,
FeatureAtomicFaddRtnInsts,
FeatureAtomicFaddNoRtnInsts,
FeatureAtomicBufferGlobalPkAddF16Insts,
Expand All @@ -1476,15 +1472,36 @@ def FeatureISAVersion9_4_Common : FeatureSet<

def FeatureISAVersion9_4_0 : FeatureSet<
!listconcat(FeatureISAVersion9_4_Common.Features,
[FeatureForceStoreSC0SC1])>;
[
FeatureForceStoreSC0SC1,
FeatureFP8Insts,
FeatureFP8ConversionInsts,
FeatureCvtFP8VOP1Bug,
FeatureXF32Insts
])>;

def FeatureISAVersion9_4_1 : FeatureSet<
!listconcat(FeatureISAVersion9_4_Common.Features,
[FeatureForceStoreSC0SC1])>;
[
FeatureForceStoreSC0SC1,
FeatureFP8Insts,
FeatureFP8ConversionInsts,
FeatureCvtFP8VOP1Bug,
FeatureXF32Insts
])>;

def FeatureISAVersion9_4_2 : FeatureSet<
!listconcat(FeatureISAVersion9_4_Common.Features,
[])>;
[
FeatureFP8Insts,
FeatureFP8ConversionInsts,
FeatureCvtFP8VOP1Bug,
FeatureXF32Insts
])>;

def FeatureISAVersion9_4_Generic : FeatureSet<
!listconcat(FeatureISAVersion9_4_Common.Features,
[FeatureRequiresCOV6])>;

def FeatureISAVersion10_Common : FeatureSet<
[FeatureGFX10,
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNProcessors.td
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,11 @@ def : ProcessorModel<"gfx9-generic", SIQuarterSpeedModel,
FeatureISAVersion9_Generic.Features
>;

// [gfx940, gfx941, gfx942]
def : ProcessorModel<"gfx9-4-generic", SIDPGFX940FullSpeedModel,
FeatureISAVersion9_4_Generic.Features
>;

//===----------------------------------------------------------------------===//
// GCN GFX10.
//===----------------------------------------------------------------------===//
Expand Down
Loading

0 comments on commit de0fd64

Please sign in to comment.