[AMDGPU][MLIR] Replace gfx940 and gfx941 with gfx942 in MLIR #125836

ritter-x2a · 2025-02-05T10:59:40Z

gfx940 and gfx941 are no longer supported. This is one of a series of
PRs to remove them from the code base.

For SWDEV-512631

ritter-x2a · 2025-02-05T11:00:03Z

llvmbot · 2025-02-05T11:03:27Z

@llvm/pr-subscribers-mlir
@llvm/pr-subscribers-mlir-llvm
@llvm/pr-subscribers-backend-amdgpu
@llvm/pr-subscribers-mlir-gpu

@llvm/pr-subscribers-mlir-amdgpu

Author: Fabian Ritter (ritter-x2a)

Changes

gfx940 and gfx941 are no longer supported. This is one of a series of
PRs to remove them from the code base.

For SWDEV-512631

Full diff: https://github.com/llvm/llvm-project/pull/125836.diff

10 Files Affected:

(modified) mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td (+1-1)
(modified) mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td (+4-4)
(modified) mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp (+11-11)
(modified) mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp (+1-1)
(modified) mlir/lib/Dialect/AMDGPU/Transforms/EmulateAtomics.cpp (+1-7)
(modified) mlir/test/Conversion/AMDGPUToROCDL/8-bit-floats.mlir (+1-1)
(modified) mlir/test/Conversion/AMDGPUToROCDL/mfma.mlir (+1-1)
(modified) mlir/test/Conversion/ArithToAMDGPU/8-bit-float-saturation.mlir (+1-1)
(modified) mlir/test/Conversion/ArithToAMDGPU/8-bit-floats.mlir (+1-1)
(modified) mlir/unittests/Dialect/AMDGPU/AMDGPUUtilsTest.cpp (+8-12)

diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
index 69745addfd748ec..24f541587cba88a 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
@@ -602,7 +602,7 @@ def AMDGPU_MFMAOp :
     order (that is, v[0] will go to arg[7:0], v[1] to arg[15:8] and so on).
 
     The negateA, negateB, and negateC flags are only supported for double-precision
-    operations on gfx940+.
+    operations on gfx942+.
   }];
   let assemblyFormat = [{
     $sourceA `*` $sourceB `+` $destC
diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
index 974712c581537a9..8b4f7e49f573d68 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
@@ -382,11 +382,11 @@ def ROCDL_mfma_f32_16x16x4bf16_1k : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x4bf16.1k">
 def ROCDL_mfma_f32_4x4x4bf16_1k : ROCDL_Mfma_IntrOp<"mfma.f32.4x4x4bf16.1k">;
 def ROCDL_mfma_f32_32x32x8bf16_1k : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x8bf16.1k">;
 def ROCDL_mfma_f32_16x16x16bf16_1k : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x16bf16.1k">;
-// Note: in gfx940, unlike in gfx90a, the f64 xdlops use the "blgp" argument as a
-// NEG bitfield. See IntrinsicsAMDGPU.td for more info.
+// Note: in gfx942, unlike in gfx90a, the f64 xdlops use the "blgp" argument as
+// a NEG bitfield. See IntrinsicsAMDGPU.td for more info.
 def ROCDL_mfma_f64_16x16x4f64 : ROCDL_Mfma_IntrOp<"mfma.f64.16x16x4f64">;
 def ROCDL_mfma_f64_4x4x4f64 : ROCDL_Mfma_IntrOp<"mfma.f64.4x4x4f64">;
-// New in gfx940.
+// New in gfx942.
 def ROCDL_mfma_i32_16x16x32_i8 : ROCDL_Mfma_IntrOp<"mfma.i32.16x16x32.i8">;
 def ROCDL_mfma_i32_32x32x16_i8 : ROCDL_Mfma_IntrOp<"mfma.i32.32x32x16.i8">;
 def ROCDL_mfma_f32_16x16x8_xf32 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x8.xf32">;
@@ -409,7 +409,7 @@ def ROCDL_mfma_f32_32x32x16_f16 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x16.f16">;
 def ROCDL_mfma_scale_f32_16x16x128_f8f6f4 : ROCDL_Mfma_OO_IntrOp<"mfma.scale.f32.16x16x128.f8f6f4", [0,1]>;
 def ROCDL_mfma_scale_f32_32x32x64_f8f6f4 : ROCDL_Mfma_OO_IntrOp<"mfma.scale.f32.32x32x64.f8f6f4", [0,1]>;
 
-// 2:4 Sparsity ops (GFX940)
+// 2:4 Sparsity ops (GFX942)
 def ROCDL_smfmac_f32_16x16x32_f16 : ROCDL_Mfma_IntrOp<"smfmac.f32.16x16x32.f16">;
 def ROCDL_smfmac_f32_32x32x16_f16 : ROCDL_Mfma_IntrOp<"smfmac.f32.32x32x16.f16">;
 def ROCDL_smfmac_f32_16x16x32_bf16 : ROCDL_Mfma_IntrOp<"smfmac.f32.16x16x32.bf16">;
diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
index 51f5d7a161b9030..f67d174d7d7c9a6 100644
--- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
+++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
@@ -80,7 +80,7 @@ namespace {
 // Define commonly used chipsets versions for convenience.
 constexpr Chipset kGfx908 = Chipset(9, 0, 8);
 constexpr Chipset kGfx90a = Chipset(9, 0, 0xa);
-constexpr Chipset kGfx940 = Chipset(9, 4, 0);
+constexpr Chipset kGfx942 = Chipset(9, 4, 2);
 
 /// Define lowering patterns for raw buffer ops
 template <typename GpuOp, typename Intrinsic>
@@ -483,7 +483,7 @@ static std::optional<StringRef> mfmaOpToIntrinsic(MFMAOp mfma,
     destElem = destType.getElementType();
 
   if (sourceElem.isF32() && destElem.isF32()) {
-    if (mfma.getReducePrecision() && chipset >= kGfx940) {
+    if (mfma.getReducePrecision() && chipset >= kGfx942) {
       if (m == 32 && n == 32 && k == 4 && b == 1)
         return ROCDL::mfma_f32_32x32x4_xf32::getOperationName();
       if (m == 16 && n == 16 && k == 8 && b == 1)
@@ -551,9 +551,9 @@ static std::optional<StringRef> mfmaOpToIntrinsic(MFMAOp mfma,
       return ROCDL::mfma_i32_32x32x8i8::getOperationName();
     if (m == 16 && n == 16 && k == 16 && b == 1)
       return ROCDL::mfma_i32_16x16x16i8::getOperationName();
-    if (m == 32 && n == 32 && k == 16 && b == 1 && chipset >= kGfx940)
+    if (m == 32 && n == 32 && k == 16 && b == 1 && chipset >= kGfx942)
       return ROCDL::mfma_i32_32x32x16_i8::getOperationName();
-    if (m == 16 && n == 16 && k == 32 && b == 1 && chipset >= kGfx940)
+    if (m == 16 && n == 16 && k == 32 && b == 1 && chipset >= kGfx942)
       return ROCDL::mfma_i32_16x16x32_i8::getOperationName();
   }
 
@@ -565,7 +565,7 @@ static std::optional<StringRef> mfmaOpToIntrinsic(MFMAOp mfma,
   }
 
   if (isa<Float8E5M2FNUZType>(sourceElem) && destElem.isF32() &&
-      chipset >= kGfx940) {
+      chipset >= kGfx942) {
     // Known to be correct because there are no scalar f8 instructions and
     // because a length mismatch will have been caught by the verifier.
     Type sourceBElem =
@@ -585,7 +585,7 @@ static std::optional<StringRef> mfmaOpToIntrinsic(MFMAOp mfma,
   }
 
   if (isa<Float8E4M3FNUZType>(sourceElem) && destElem.isF32() &&
-      chipset >= kGfx940) {
+      chipset >= kGfx942) {
     Type sourceBElem =
         cast<VectorType>(mfma.getSourceB().getType()).getElementType();
     if (m == 16 && n == 16 && k == 32 && b == 1) {
@@ -653,8 +653,8 @@ struct MFMAOpLowering : public ConvertOpToLLVMPattern<MFMAOp> {
       return op->emitOpError("MFMA only supported on gfx908+");
     uint32_t getBlgpField = static_cast<uint32_t>(op.getBlgp());
     if (op.getNegateA() || op.getNegateB() || op.getNegateC()) {
-      if (chipset < kGfx940)
-        return op.emitOpError("negation unsupported on older than gfx940");
+      if (chipset < kGfx942)
+        return op.emitOpError("negation unsupported on older than gfx942");
       getBlgpField |=
           op.getNegateA() | (op.getNegateB() << 1) | (op.getNegateC() << 2);
     }
@@ -775,7 +775,7 @@ LogicalResult ExtPackedFp8OpLowering::matchAndRewrite(
     ExtPackedFp8Op op, ExtPackedFp8OpAdaptor adaptor,
     ConversionPatternRewriter &rewriter) const {
   Location loc = op.getLoc();
-  if (chipset.majorVersion != 9 || chipset < kGfx940)
+  if (chipset.majorVersion != 9 || chipset < kGfx942)
     return rewriter.notifyMatchFailure(
         loc, "Fp8 conversion instructions are not available on target "
              "architecture and their emulation is not implemented");
@@ -819,7 +819,7 @@ LogicalResult PackedTrunc2xFp8OpLowering::matchAndRewrite(
     PackedTrunc2xFp8Op op, PackedTrunc2xFp8OpAdaptor adaptor,
     ConversionPatternRewriter &rewriter) const {
   Location loc = op.getLoc();
-  if (chipset.majorVersion != 9 || chipset < kGfx940)
+  if (chipset.majorVersion != 9 || chipset < kGfx942)
     return rewriter.notifyMatchFailure(
         loc, "Fp8 conversion instructions are not available on target "
              "architecture and their emulation is not implemented");
@@ -856,7 +856,7 @@ LogicalResult PackedStochRoundFp8OpLowering::matchAndRewrite(
     PackedStochRoundFp8Op op, PackedStochRoundFp8OpAdaptor adaptor,
     ConversionPatternRewriter &rewriter) const {
   Location loc = op.getLoc();
-  if (chipset.majorVersion != 9 || chipset < kGfx940)
+  if (chipset.majorVersion != 9 || chipset < kGfx942)
     return rewriter.notifyMatchFailure(
         loc, "Fp8 conversion instructions are not available on target "
              "architecture and their emulation is not implemented");
diff --git a/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp b/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp
index 33370566996eee5..338a2124149a844 100644
--- a/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp
+++ b/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp
@@ -381,7 +381,7 @@ void ArithToAMDGPUConversionPass::runOnOperation() {
   }
 
   bool convertFP8Arithmetic =
-      maybeChipset->majorVersion == 9 && *maybeChipset >= Chipset(9, 4, 0);
+      maybeChipset->majorVersion == 9 && *maybeChipset >= Chipset(9, 4, 2);
   arith::populateArithToAMDGPUConversionPatterns(
       patterns, convertFP8Arithmetic, saturateFP8Truncf, allowPackedF16Rtz,
       *maybeChipset);
diff --git a/mlir/lib/Dialect/AMDGPU/Transforms/EmulateAtomics.cpp b/mlir/lib/Dialect/AMDGPU/Transforms/EmulateAtomics.cpp
index 77f972e0e589445..7459a6503cddfb4 100644
--- a/mlir/lib/Dialect/AMDGPU/Transforms/EmulateAtomics.cpp
+++ b/mlir/lib/Dialect/AMDGPU/Transforms/EmulateAtomics.cpp
@@ -179,7 +179,7 @@ void mlir::amdgpu::populateAmdgpuEmulateAtomicsPatterns(
   }
   // gfx9 has no to a very limited support for floating-point min and max.
   if (chipset.majorVersion == 9) {
-    if (chipset >= Chipset(9, 0, 0xa) && chipset != Chipset(9, 4, 1)) {
+    if (chipset >= Chipset(9, 0, 0xa)) {
       // gfx90a supports f64 max (and min, but we don't have a min wrapper right
       // now) but all other types need to be emulated.
       target.addDynamicallyLegalOp<RawBufferAtomicFmaxOp>(
@@ -189,12 +189,6 @@ void mlir::amdgpu::populateAmdgpuEmulateAtomicsPatterns(
     } else {
       target.addIllegalOp<RawBufferAtomicFmaxOp>();
     }
-    if (chipset == Chipset(9, 4, 1)) {
-      // gfx941 requires non-CAS atomics to be implemented with CAS loops.
-      // The workaround here mirrors HIP and OpenMP.
-      target.addIllegalOp<RawBufferAtomicFaddOp, RawBufferAtomicFmaxOp,
-                          RawBufferAtomicSmaxOp, RawBufferAtomicUminOp>();
-    }
   }
   patterns.add<
       RawBufferAtomicByCasPattern<RawBufferAtomicFaddOp, arith::AddFOp>,
diff --git a/mlir/test/Conversion/AMDGPUToROCDL/8-bit-floats.mlir b/mlir/test/Conversion/AMDGPUToROCDL/8-bit-floats.mlir
index 7818a525d17b537..a313aaffdf5ccca 100644
--- a/mlir/test/Conversion/AMDGPUToROCDL/8-bit-floats.mlir
+++ b/mlir/test/Conversion/AMDGPUToROCDL/8-bit-floats.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx940 | FileCheck %s
+// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx942 | FileCheck %s
 
 // CHECK-LABEL: func @ext_scalar
 // CHECK: [[V:%.+]] = builtin.unrealized_conversion_cast %{{.+}} : f8E5M2FNUZ to i8
diff --git a/mlir/test/Conversion/AMDGPUToROCDL/mfma.mlir b/mlir/test/Conversion/AMDGPUToROCDL/mfma.mlir
index f8a60d37801ebec..52db1421dc3c686 100644
--- a/mlir/test/Conversion/AMDGPUToROCDL/mfma.mlir
+++ b/mlir/test/Conversion/AMDGPUToROCDL/mfma.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx940 -cse | FileCheck %s
+// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx942 -cse | FileCheck %s
 func.func @mfma_to_rocdl(%arg0 : f32, %arg1 : vector<32xf32>,
                     %arg2 : vector<16xf32>, %arg3 : vector<4xf32>,
                     %arg4 : vector<4xf16>, %arg5 : vector<4xi8>,
diff --git a/mlir/test/Conversion/ArithToAMDGPU/8-bit-float-saturation.mlir b/mlir/test/Conversion/ArithToAMDGPU/8-bit-float-saturation.mlir
index cd921da2294e13b..07a428566d48852 100644
--- a/mlir/test/Conversion/ArithToAMDGPU/8-bit-float-saturation.mlir
+++ b/mlir/test/Conversion/ArithToAMDGPU/8-bit-float-saturation.mlir
@@ -1,5 +1,5 @@
 // RUN: mlir-opt --split-input-file %s \
-// RUN:   --pass-pipeline='builtin.module(func.func(convert-arith-to-amdgpu{chipset=gfx940 saturate-fp8-truncf=true}))' \
+// RUN:   --pass-pipeline='builtin.module(func.func(convert-arith-to-amdgpu{chipset=gfx942 saturate-fp8-truncf=true}))' \
 // RUN:   | FileCheck %s
 
 // CHECK-LABEL: func.func @scalar_trunc
diff --git a/mlir/test/Conversion/ArithToAMDGPU/8-bit-floats.mlir b/mlir/test/Conversion/ArithToAMDGPU/8-bit-floats.mlir
index bd90facb6154408..da834ff4a111526 100644
--- a/mlir/test/Conversion/ArithToAMDGPU/8-bit-floats.mlir
+++ b/mlir/test/Conversion/ArithToAMDGPU/8-bit-floats.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt --split-input-file %s -convert-arith-to-amdgpu="chipset=gfx940" | FileCheck %s
+// RUN: mlir-opt --split-input-file %s -convert-arith-to-amdgpu="chipset=gfx942" | FileCheck %s
 
 // CHECK-LABEL: func.func @scalar_ext
 // CHECK-SAME: ([[V:%.+]]: f8E5M2FNUZ)
diff --git a/mlir/unittests/Dialect/AMDGPU/AMDGPUUtilsTest.cpp b/mlir/unittests/Dialect/AMDGPU/AMDGPUUtilsTest.cpp
index 976ff2e7382edf7..570d56f3c6ff139 100644
--- a/mlir/unittests/Dialect/AMDGPU/AMDGPUUtilsTest.cpp
+++ b/mlir/unittests/Dialect/AMDGPU/AMDGPUUtilsTest.cpp
@@ -19,11 +19,11 @@ TEST(ChipsetTest, Parsing) {
   EXPECT_EQ(chipset->minorVersion, 0u);
   EXPECT_EQ(chipset->steppingVersion, 0xau);
 
-  chipset = Chipset::parse("gfx940");
+  chipset = Chipset::parse("gfx942");
   ASSERT_TRUE(succeeded(chipset));
   EXPECT_EQ(chipset->majorVersion, 9u);
   EXPECT_EQ(chipset->minorVersion, 4u);
-  EXPECT_EQ(chipset->steppingVersion, 0u);
+  EXPECT_EQ(chipset->steppingVersion, 2u);
 
   chipset = Chipset::parse("gfx1103");
   ASSERT_TRUE(succeeded(chipset));
@@ -36,30 +36,26 @@ TEST(ChipsetTest, ParsingInvalid) {
   EXPECT_TRUE(failed(Chipset::parse("navi33")));
   EXPECT_TRUE(failed(Chipset::parse("rdna2")));
   EXPECT_TRUE(failed(Chipset::parse("sm_80")));
-  EXPECT_TRUE(failed(Chipset::parse("GFX940")));
-  EXPECT_TRUE(failed(Chipset::parse("Gfx940")));
+  EXPECT_TRUE(failed(Chipset::parse("GFX942")));
+  EXPECT_TRUE(failed(Chipset::parse("Gfx942")));
   EXPECT_TRUE(failed(Chipset::parse("gfx9")));
-  EXPECT_TRUE(failed(Chipset::parse("gfx_940")));
-  EXPECT_TRUE(failed(Chipset::parse("gfx940_")));
+  EXPECT_TRUE(failed(Chipset::parse("gfx_942")));
+  EXPECT_TRUE(failed(Chipset::parse("gfx942_")));
   EXPECT_TRUE(failed(Chipset::parse("gfxmeow")));
   EXPECT_TRUE(failed(Chipset::parse("gfx1fff")));
 }
 
 TEST(ChipsetTest, Comparison) {
-  EXPECT_EQ(Chipset(9, 4, 0), Chipset(9, 4, 0));
-  EXPECT_NE(Chipset(9, 4, 0), Chipset(9, 4, 2));
+  EXPECT_EQ(Chipset(9, 4, 2), Chipset(9, 4, 2));
   EXPECT_NE(Chipset(9, 0, 0), Chipset(10, 0, 0));
 
   EXPECT_LT(Chipset(9, 0, 0), Chipset(10, 0, 0));
   EXPECT_LT(Chipset(9, 0, 0), Chipset(9, 4, 2));
-  EXPECT_LE(Chipset(9, 4, 1), Chipset(9, 4, 1));
   EXPECT_FALSE(Chipset(9, 4, 2) < Chipset(9, 4, 2));
-  EXPECT_FALSE(Chipset(9, 4, 2) < Chipset(9, 4, 0));
 
   EXPECT_GT(Chipset(9, 0, 0xa), Chipset(9, 0, 8));
   EXPECT_GE(Chipset(9, 0, 0xa), Chipset(9, 0, 0xa));
-  EXPECT_FALSE(Chipset(9, 4, 1) >= Chipset(9, 4, 2));
-  EXPECT_FALSE(Chipset(9, 0, 0xa) >= Chipset(9, 4, 0));
+  EXPECT_FALSE(Chipset(9, 0, 0xa) >= Chipset(9, 4, 2));
 }
 
 } // namespace

arsenm

These seem too aggressive. we're not deleting the 940 target, and all the same properties apply

mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td

mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td

ritter-x2a · 2025-02-05T12:29:59Z

As far as I'm aware deleting the 940 target is the goal (the PRs for that don't exist yet, but should end up in this stack)

krzysz00 · 2025-02-06T04:25:34Z

Ok so I'm going to take issue with jumping the constants in a bunch of tests to gfx8942. Unless we're planning to completely wipe gfx940/1 from the codebase - which seems extremely unusual and like bad practice ... gfx940 and 941 are real targets that did really exist and might need code compiled for them in the future - we should still allow compiling for there targets while updating tests and such to gfx942.

And as to the atomics emulation pass ... what's the harm in keeping some old workaround around?

krzysz00

Ok yeah, just to be safe, blocker on actually making things that work fine unsupported do this doesn't land overnight, ping me tomorrow

ritter-x2a · 2025-02-06T07:39:05Z

Ok so I'm going to take issue with jumping the constants in a bunch of tests to gfx8942. Unless we're planning to completely wipe gfx940/1 from the codebase - which seems extremely unusual and like bad practice ... gfx940 and 941 are real targets that did really exist and might need code compiled for them in the future - we should still allow compiling for there targets while updating tests and such to gfx942.

And as to the atomics emulation pass ... what's the harm in keeping some old workaround around?

I'll bring this issue up for discussion in the next compiler team meeting.

krzysz00 · 2025-02-06T09:13:11Z

I'll try and remember to go to that

krzysz00

Per further discussion, given the broader context of these changes, this is fine assuming @arsenm's comments about the documentation are addressed.

kuhar

Since this essentially breaks logic for gfx940 and gfx941, should we assert in code like Chipset that these are not used and silently miscompiled?

ritter-x2a · 2025-02-11T10:46:28Z

Since this essentially breaks logic for gfx940 and gfx941, should we assert in code like Chipset that these are not used and silently miscompiled?

@kuhar as far as I can see that would be the first check for invalid targets in this part of the code base. I don't think we should treat gfx940/gfx941 differently than any other invalid target. We will only merge this PR together with the rest of the stack, which will remove gfx940\gfx941 code generation from the rest of LLVM as well.

ritter-x2a · 2025-02-19T08:48:56Z

Merge activity

Feb 19, 3:48 AM EST: A user started a stack merge that includes this pull request via Graphite.
Feb 19, 3:59 AM EST: Graphite rebased this pull request as part of a merge.
Feb 19, 4:03 AM EST: Graphite rebased this pull request as part of a merge.
Feb 19, 4:05 AM EST: A user merged this pull request with Graphite.

gfx940 and gfx941 are no longer supported. This is one of a series of PRs to remove them from the code base. For SWDEV-512631

This was referenced Feb 5, 2025

[AMDGPU] Add missing gfx architectures to AddFlangOffloadRuntime.cmake #125827

Merged

[AMDGPU][NFC] Replace gfx940 and gfx941 with gfx942 in llvm/test #125711

Merged

[AMDGPU] Replace gfx940 and gfx941 with gfx942 in offload and libclc #125826

Merged

ritter-x2a requested review from krzysz00 and kuhar February 5, 2025 11:01

ritter-x2a marked this pull request as ready for review February 5, 2025 11:02

llvmbot added backend:AMDGPU mlir:llvm mlir:gpu mlir mlir:amdgpu labels Feb 5, 2025

arsenm reviewed Feb 5, 2025

View reviewed changes

mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td Outdated Show resolved Hide resolved

mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td Outdated Show resolved Hide resolved

kuhar mentioned this pull request Feb 6, 2025

Refresh compile/run examples in deployment configuration guides. iree-org/iree#19920

Merged

krzysz00 requested changes Feb 6, 2025

View reviewed changes

krzysz00 approved these changes Feb 10, 2025

View reviewed changes

ritter-x2a force-pushed the users/ritter-x2a/rm-gfx940-gfx941-mlir branch from 656adb4 to 0c95e13 Compare February 11, 2025 09:26

ritter-x2a force-pushed the users/ritter-x2a/add-missing-flang-offload-archs branch from 97f4e5a to 175aff5 Compare February 11, 2025 09:26

kuhar reviewed Feb 11, 2025

View reviewed changes

ritter-x2a force-pushed the users/ritter-x2a/rm-gfx940-gfx941-mlir branch from 0c95e13 to 8dd1c0a Compare February 13, 2025 09:31

ritter-x2a force-pushed the users/ritter-x2a/add-missing-flang-offload-archs branch from 175aff5 to 55e8283 Compare February 13, 2025 09:31

ritter-x2a force-pushed the users/ritter-x2a/rm-gfx940-gfx941-mlir branch from 8dd1c0a to c27c8b4 Compare February 13, 2025 14:57

ritter-x2a force-pushed the users/ritter-x2a/add-missing-flang-offload-archs branch 2 times, most recently from 51b377d to 876dba7 Compare February 18, 2025 08:22

ritter-x2a force-pushed the users/ritter-x2a/rm-gfx940-gfx941-mlir branch from c27c8b4 to 61eb3cc Compare February 18, 2025 08:22

ritter-x2a force-pushed the users/ritter-x2a/add-missing-flang-offload-archs branch from 876dba7 to 21cf1a0 Compare February 19, 2025 08:56

Base automatically changed from users/ritter-x2a/add-missing-flang-offload-archs to main February 19, 2025 08:58

ritter-x2a force-pushed the users/ritter-x2a/rm-gfx940-gfx941-mlir branch from 61eb3cc to a86e6a8 Compare February 19, 2025 08:59

ritter-x2a added 2 commits February 19, 2025 09:02

[AMDGPU][MLIR] Replace gfx940 and gfx941 with gfx942 in MLIR

5980c82

gfx940 and gfx941 are no longer supported. This is one of a series of PRs to remove them from the code base. For SWDEV-512631

Use "gfx94x" in docs and comments.

d623fc8

ritter-x2a force-pushed the users/ritter-x2a/rm-gfx940-gfx941-mlir branch from a86e6a8 to d623fc8 Compare February 19, 2025 09:02

ritter-x2a merged commit 8900e41 into main Feb 19, 2025
5 of 8 checks passed

ritter-x2a deleted the users/ritter-x2a/rm-gfx940-gfx941-mlir branch February 19, 2025 09:05

joaosaffran mentioned this pull request Feb 26, 2025

[libc++] Set feature-test macro __cpp_lib_atomic_float (#127559) joaosaffran/llvm-project#1

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AMDGPU][MLIR] Replace gfx940 and gfx941 with gfx942 in MLIR #125836

[AMDGPU][MLIR] Replace gfx940 and gfx941 with gfx942 in MLIR #125836

Uh oh!

ritter-x2a commented Feb 5, 2025

Uh oh!

ritter-x2a commented Feb 5, 2025 •

edited

Loading

Uh oh!

llvmbot commented Feb 5, 2025 •

edited

Loading

Uh oh!

arsenm left a comment

Uh oh!

Uh oh!

Uh oh!

ritter-x2a commented Feb 5, 2025

Uh oh!

krzysz00 commented Feb 6, 2025

Uh oh!

krzysz00 left a comment

Uh oh!

ritter-x2a commented Feb 6, 2025

Uh oh!

krzysz00 commented Feb 6, 2025

Uh oh!

krzysz00 left a comment

Uh oh!

kuhar left a comment

Uh oh!

ritter-x2a commented Feb 11, 2025

Uh oh!

ritter-x2a commented Feb 19, 2025 •

edited

Loading

Uh oh!

Uh oh!

Uh oh!

[AMDGPU][MLIR] Replace gfx940 and gfx941 with gfx942 in MLIR #125836

[AMDGPU][MLIR] Replace gfx940 and gfx941 with gfx942 in MLIR #125836

Uh oh!

Conversation

ritter-x2a commented Feb 5, 2025

Uh oh!

ritter-x2a commented Feb 5, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Feb 5, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

arsenm left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

ritter-x2a commented Feb 5, 2025

Uh oh!

krzysz00 commented Feb 6, 2025

Uh oh!

krzysz00 left a comment

Choose a reason for hiding this comment

Uh oh!

ritter-x2a commented Feb 6, 2025

Uh oh!

krzysz00 commented Feb 6, 2025

Uh oh!

krzysz00 left a comment

Choose a reason for hiding this comment

Uh oh!

kuhar left a comment

Choose a reason for hiding this comment

Uh oh!

ritter-x2a commented Feb 11, 2025

Uh oh!

ritter-x2a commented Feb 19, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Merge activity

Uh oh!

Uh oh!

Uh oh!

ritter-x2a commented Feb 5, 2025 •

edited

Loading

llvmbot commented Feb 5, 2025 •

edited

Loading

ritter-x2a commented Feb 19, 2025 •

edited

Loading