Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

#include "mlir/Dialect/Affine/Utils.h"
#include "mlir/Dialect/Arith/Utils/Utils.h"
#include "mlir/Dialect/Index/IR/IndexOps.h"
#include "mlir/Dialect/Utils/IndexingUtils.h"
#include "mlir/Dialect/XeGPU/IR/XeGPU.h"
#include "mlir/Dialect/XeGPU/uArch/IntelGpuXe2.h"
Expand Down Expand Up @@ -61,7 +60,7 @@ genCoordinates(OpBuilder &builder, Location loc,
// Get the offset of `subShape` within a distribution unit.
SmallVector<Value> distUnitLocalOffset = llvm::map_to_vector(
llvm::zip(delinearizedId, subShape), [&](const auto &t) -> Value {
return builder.createOrFold<index::MulOp>(
return builder.createOrFold<arith::MulIOp>(
loc, std::get<0>(t),
builder.createOrFold<arith::ConstantIndexOp>(loc, std::get<1>(t)));
});
Expand All @@ -84,7 +83,7 @@ genCoordinates(OpBuilder &builder, Location loc,
// Do not go beyond `srcShape` bounds.
SmallVector<Value> mods = llvm::map_to_vector(
llvm::zip_equal(adds, srcShape), [&](const auto &t) -> Value {
return builder.createOrFold<index::RemUOp>(
return builder.createOrFold<arith::RemUIOp>(
loc, std::get<0>(t),
arith::ConstantIndexOp::create(builder, loc, std::get<1>(t)));
});
Expand Down Expand Up @@ -343,7 +342,7 @@ LayoutAttr::delinearizeId(OpBuilder &builder, Location loc, Value linearId) {
/// e.g., linearId=22, dimSize=4: 22 % 4 = 2 (we're at position 2 within
/// this dimension)
result[dimIdx] =
builder.createOrFold<index::RemUOp>(loc, remaining, dimSizeVal);
builder.createOrFold<arith::RemUIOp>(loc, remaining, dimSizeVal);

/// Update remaining for the next dimension by removing what we've already
/// processed. Division tells us "how many complete groups of this dimension
Expand All @@ -352,7 +351,7 @@ LayoutAttr::delinearizeId(OpBuilder &builder, Location loc, Value linearId) {
/// no next dimension to process
if (i < order.size() - 1) {
remaining =
builder.createOrFold<index::DivUOp>(loc, remaining, dimSizeVal);
builder.createOrFold<arith::DivUIOp>(loc, remaining, dimSizeVal);
}
}
return result;
Expand Down
3 changes: 1 addition & 2 deletions mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@

#include "mlir/Dialect/XeGPU/Utils/XeGPUUtils.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/Index/IR/IndexOps.h"
#include "mlir/Dialect/LLVMIR/XeVMDialect.h"
#include "mlir/Dialect/SCF/Transforms/Patterns.h"
#include "mlir/Dialect/Utils/IndexingUtils.h"
Expand Down Expand Up @@ -527,7 +526,7 @@ SmallVector<OpFoldResult> xegpu::addElementwise(OpBuilder &builder,
for (auto [l, r] : llvm::zip_equal(lhs, rhs)) {
auto lval = getValueOrCreateConstantIndexOp(builder, loc, l);
auto rval = getValueOrCreateConstantIndexOp(builder, loc, r);
results.push_back(builder.createOrFold<index::AddOp>(loc, lval, rval));
results.push_back(builder.createOrFold<arith::AddIOp>(loc, lval, rval));
}
return results;
}
Expand Down
24 changes: 12 additions & 12 deletions mlir/test/Dialect/XeGPU/subgroup-distribute.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -271,11 +271,11 @@ gpu.module @xevm_module{
// CHECK: %[[C2:.*]] = arith.constant 2 : index
// CHECK: %[[C8:.*]] = arith.constant 8 : index
// CHECK: %[[LANE_ID:.*]] = gpu.lane_id
// CHECK: %[[REMU1:.*]] = index.remu %[[LANE_ID]], %[[C8]]
// CHECK: %[[DIVU:.*]] = index.divu %[[LANE_ID]], %[[C8]]
// CHECK: %[[REMU2:.*]] = index.remu %[[DIVU]], %[[C2]]
// CHECK: %[[REMU3:.*]] = index.remu %[[REMU2]], %[[C2]]
// CHECK: %[[REMU4:.*]] = index.remu %[[REMU1]], %[[C8]]
// CHECK: %[[REMU1:.*]] = arith.remui %[[LANE_ID]], %[[C8]]
// CHECK: %[[DIVU:.*]] = arith.divui %[[LANE_ID]], %[[C8]]
// CHECK: %[[REMU2:.*]] = arith.remui %[[DIVU]], %[[C2]]
// CHECK: %[[REMU3:.*]] = arith.remui %[[REMU2]], %[[C2]]
// CHECK: %[[REMU4:.*]] = arith.remui %[[REMU1]], %[[C8]]
// CHECK: %[[MAT:.*]] = xegpu.load_matrix %arg0[%[[REMU3]], %[[REMU4]]] : !xegpu.mem_desc<32x32xf32>, index, index -> vector<1x1xf32>
// CHECK: xegpu.store_matrix %[[MAT]], %arg0[%[[REMU3]], %[[REMU4]]] : vector<1x1xf32>, !xegpu.mem_desc<32x32xf32>, index, index
gpu.module @xevm_module{
Expand All @@ -294,13 +294,13 @@ gpu.module @xevm_module{
// CHECK: %[[C4:.*]] = arith.constant 4 : index
// CHECK: %[[C1:.*]] = arith.constant 1 : index
// CHECK: %[[LANE_ID:.*]] = gpu.lane_id
// CHECK: %[[REMU1:.*]] = index.remu %[[LANE_ID]], %[[C4]]
// CHECK: %[[DIVU:.*]] = index.divu %[[LANE_ID]], %[[C4]]
// CHECK: %[[REMU2:.*]] = index.remu %[[DIVU]], %[[C4]]
// CHECK: %[[MUL:.*]] = index.mul %[[REMU2]], %[[C2]]
// CHECK: %[[REMU3:.*]] = index.remu %[[MUL]], %[[C8]]
// CHECK: %[[REMU4:.*]] = index.remu %[[REMU1]], %[[C4]]
// CHECK: %[[ADD:.*]] = index.add %[[REMU4]], %[[C1]]
// CHECK: %[[REMU1:.*]] = arith.remui %[[LANE_ID]], %[[C4]]
// CHECK: %[[DIVU:.*]] = arith.divui %[[LANE_ID]], %[[C4]]
// CHECK: %[[REMU2:.*]] = arith.remui %[[DIVU]], %[[C4]]
// CHECK: %[[MUL:.*]] = arith.muli %[[REMU2]], %[[C2]]
// CHECK: %[[REMU3:.*]] = arith.remui %[[MUL]], %[[C8]]
// CHECK: %[[REMU4:.*]] = arith.remui %[[REMU1]], %[[C4]]
// CHECK: %[[ADD:.*]] = arith.addi %[[REMU4]], %[[C1]]
// CHECK: %[[MAT:.*]] = xegpu.load_matrix %arg0[%[[REMU3]], %[[ADD]]] : !xegpu.mem_desc<32x32xf32>, index, index -> vector<2x1xf32>
// CHECK: xegpu.store_matrix %[[MAT]], %arg0[%[[REMU3]], %[[ADD]]] : vector<2x1xf32>, !xegpu.mem_desc<32x32xf32>, index, index
gpu.module @xevm_module{
Expand Down
18 changes: 8 additions & 10 deletions mlir/test/Dialect/XeGPU/xegpu-attr-interface.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
gpu.module @test {
gpu.func @slice_attr() -> vector<128xindex> {
// CHECK-DAG: %[[SGID:.*]] = gpu.subgroup_id : index
// CHECK-DAG: %[[DIVU:.*]] = index.divu %[[SGID]], %[[C8:.*]]
// CHECK-DAG: %[[REMU:.*]] = index.remu %[[DIVU]], %[[C4:.*]]
// CHECK-DAG: %[[MUL:.*]] = index.mul %[[REMU]], %[[C32:.*]]
// CHECK-DAG: %[[MOD:.*]] = index.remu %[[MUL]], %[[C128:.*]]
// CHECK-DAG: %[[DIVU:.*]] = arith.divui %[[SGID]], %[[C8:.*]]
// CHECK-DAG: %[[REMU:.*]] = arith.remui %[[DIVU]], %[[C4:.*]]
// CHECK-DAG: %[[MUL:.*]] = arith.muli %[[REMU]], %[[C32:.*]]
// CHECK-DAG: %[[MOD:.*]] = arith.remui %[[MUL]], %[[C128:.*]]
// CHECK-DAG: %[[BASE:.*]] = vector.step : vector<32xindex>
// CHECK-DAG: %[[CAST:.*]] = vector.broadcast %[[MOD]] : index to vector<32xindex>
// CHECK-DAG: %[[ADD:.*]] = arith.addi %[[BASE]], %[[CAST]] : vector<32xindex>
Expand All @@ -16,11 +16,10 @@ gpu.module @test {

gpu.func @nested_slice_attr() -> vector<128xindex> {
// CHECK-DAG: %[[SGID:.*]] = gpu.subgroup_id : index
// CHECK-DAG: %[[DIVU1:.*]] = index.divu %[[SGID]], %[[C1:.*]]
// CHECK-DAG: %[[DIVU2:.*]] = index.divu %[[DIVU1]], %[[C8:.*]]
// CHECK-DAG: %[[REMU:.*]] = index.remu %[[DIVU2]], %[[C4:.*]]
// CHECK-DAG: %[[MUL:.*]] = index.mul %[[REMU]], %[[C32:.*]]
// CHECK-DAG: %[[MOD:.*]] = index.remu %[[MUL]], %[[C128:.*]]
// CHECK-DAG: %[[DIVU2:.*]] = arith.divui %[[SGID]], %[[C8:.*]]
// CHECK-DAG: %[[REMU:.*]] = arith.remui %[[DIVU2]], %[[C4:.*]]
// CHECK-DAG: %[[MUL:.*]] = arith.muli %[[REMU]], %[[C32:.*]]
// CHECK-DAG: %[[MOD:.*]] = arith.remui %[[MUL]], %[[C128:.*]]
// CHECK-DAG: %[[BASE:.*]] = vector.step : vector<32xindex>
// CHECK-DAG: %[[CAST:.*]] = vector.broadcast %[[MOD]] : index to vector<32xindex>
// CHECK-DAG: %[[ADD:.*]] = arith.addi %[[BASE]], %[[CAST]] : vector<32xindex>
Expand All @@ -29,4 +28,3 @@ gpu.module @test {
}

}

14 changes: 7 additions & 7 deletions mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-rr.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,18 @@ gpu.module @test_round_robin_assignment {
gpu.func @create_nd_tdesc_with_shared_data(%src: memref<256x128xf32>) {
// CHECK: %[[SGID:.*]] = gpu.subgroup_id : index
// CHECK: %[[C4:.*]] = arith.constant 4 : index
// CHECK: %[[IDX:.*]] = index.remu %[[SGID]], %[[C4]]
// CHECK: %[[IDY_DIV:.*]] = index.divu %[[SGID]], %[[C4]]
// CHECK: %[[IDX:.*]] = arith.remui %[[SGID]], %[[C4]]
// CHECK: %[[IDY_DIV:.*]] = arith.divui %[[SGID]], %[[C4]]
// CHECK: %[[C8:.*]] = arith.constant 8 : index
// CHECK: %[[IDY:.*]] = index.remu %[[IDY_DIV]], %[[C8]]
// CHECK: %[[IDY:.*]] = arith.remui %[[IDY_DIV]], %[[C8]]
// CHECK: %[[C16:.*]] = arith.constant 16 : index
// CHECK: %[[LY:.*]] = index.mul %[[IDY]], %[[C16]]
// CHECK: %[[LY:.*]] = arith.muli %[[IDY]], %[[C16]]
// CHECK: %[[C64:.*]] = arith.constant 64 : index
// CHECK: %[[LX:.*]] = index.mul %[[IDX]], %[[C64]]
// CHECK: %[[LX:.*]] = arith.muli %[[IDX]], %[[C64]]
// CHECK: %[[C128:.*]] = arith.constant 128 : index
// CHECK: %[[OFFY:.*]] = index.remu %[[LY]], %[[C128]]
// CHECK: %[[OFFY:.*]] = arith.remui %[[LY]], %[[C128]]
// CHECK: %[[C64_1:.*]] = arith.constant 64 : index
// CHECK: %[[OFFX:.*]] = index.remu %[[LX]], %[[C64_1]]
// CHECK: %[[OFFX:.*]] = arith.remui %[[LX]], %[[C64_1]]
// CHECK: xegpu.create_nd_tdesc %[[ARG_0]][%[[OFFY]], %[[OFFX]]] : memref<256x128xf32> -> !xegpu.tensor_desc<16x64xf32>
%tdesc = xegpu.create_nd_tdesc %src[0, 0] : memref<256x128xf32>
-> !xegpu.tensor_desc<128x64xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [16, 64]>>
Expand Down
42 changes: 19 additions & 23 deletions mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops-rr.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -90,30 +90,27 @@ gpu.module @test_distribution {
gpu.return
}

// CHECK-LABEL: non_splat_constant
gpu.func @non_splat_constant() {
// CHECK-DAG: %[[BASECST:.*]] = arith.constant dense<{{.*}}> : vector<2x1xindex>
// CHECK-DAG: %[[CST:.*]] = arith.constant dense<{{.*}}0{{.*}}, {{.*}}16{{.*}}> : vector<2x1xindex>
// CHECK-DAG: %[[SGID:.*]] = gpu.subgroup_id : index
// CHECK-DAG: %[[REMU1:.*]] = index.remu %[[SGID]], %[[C1:.*]]
// CHECK-DAG: %[[DIVU:.*]] = index.divu %[[SGID]], %[[C1:.*]]
// CHECK-DAG: %[[REMU2:.*]] = index.remu %[[DIVU]], %[[C8:.*]]
// CHECK-DAG: %[[MUL:.*]] = index.mul %[[REMU2]], %[[C2:.*]]
// CHECK-DAG: %[[REMU3:.*]] = index.remu %[[MUL]], %[[C32:.*]]
// CHECK-DAG: %[[REMU4:.*]] = index.remu %[[REMU1]], %[[C1:.*]]
// CHECK-DAG: %[[ADD16:.*]] = arith.addi %[[MUL]], %[[C16:.*]] : index
// CHECK-DAG: %[[REMU5:.*]] = index.remu %[[ADD16]], %[[C32:.*]]
// CHECK-DAG: %[[REMU6:.*]] = index.remu %[[REMU1]], %[[C1:.*]]
// CHECK-DAG: %[[STRIDE1:.*]] = arith.muli %[[REMU3]], %[[C16:.*]] : index
// CHECK-DAG: %[[ADDSTRIDES:.*]] = arith.addi %[[C0:.*]], %[[STRIDE1]] : index
// CHECK-DAG: %[[STRIDE2:.*]] = arith.muli %[[REMU4]], %[[C0:.*]] : index
// CHECK-DAG: %[[ADDSTRIDES1:.*]] = arith.addi %[[ADDSTRIDES]], %[[STRIDE2]] : index
// CHECK-DAG: %[[BCAST1:.*]] = vector.broadcast %[[ADDSTRIDES1]] : index to vector<2x1xindex>
// CHECK-DAG: %[[RESULT1:.*]] = arith.addi %[[BASECST]], %[[BCAST1]] : vector<2x1xindex>
// CHECK-DAG: %[[STRIDE3:.*]] = arith.muli %[[REMU5]], %[[C16:.*]] : index
// CHECK-DAG: %[[ADDSTRIDES2:.*]] = arith.addi %[[C0:.*]], %[[STRIDE3]] : index
// CHECK-DAG: %[[STRIDE4:.*]] = arith.muli %[[REMU6]], %[[C0:.*]] : index
// CHECK-DAG: %[[ADDSTRIDES3:.*]] = arith.addi %[[ADDSTRIDES2]], %[[STRIDE4]] : index
// CHECK-DAG: %[[BCAST2:.*]] = vector.broadcast %[[ADDSTRIDES3]] : index to vector<2x1xindex>
// CHECK-DAG: %[[RESULT2:.*]] = arith.addi %[[BASECST]], %[[BCAST2]] : vector<2x1xindex>
// CHECK-DAG: %[[T1:.*]] = arith.remui %[[SGID]], %[[C8:.*]] : index
// CHECK-DAG: %[[T2:.*]] = arith.muli %[[T1]], %[[C2:.*]] : index
// CHECK-DAG: %[[T3:.*]] = arith.remui %[[T2]], %[[C32:.*]] : index
// CHECK-DAG: %[[T4:.*]] = arith.addi %[[T2]], %[[C16:.*]] : index
// CHECK-DAG: %[[T5:.*]] = arith.remui %[[T4]], %[[C32_6:.*]] : index
// CHECK-DAG: %[[T6:.*]] = arith.muli %[[T3]], %[[C16_10:.*]] : index
// CHECK-DAG: %[[T7:.*]] = arith.addi %[[C0_11:.*]], %[[T6]] : index
// CHECK-DAG: %[[T8:.*]] = arith.muli %[[C0_4:.*]], %[[C0_9:.*]] : index
// CHECK-DAG: %[[T9:.*]] = arith.addi %[[T7]], %[[T8]] : index
// CHECK-DAG: %[[T10:.*]] = vector.broadcast %[[T9]] : index to vector<2x1xindex>
// CHECK-DAG: %[[T11:.*]] = arith.addi %[[CST]], %[[T10]] : vector<2x1xindex>
// CHECK-DAG: %[[T12:.*]] = arith.muli %[[T5]], %[[C16_10:.*]] : index
// CHECK-DAG: %[[T13:.*]] = arith.addi %[[C0_12:.*]], %[[T12]] : index
// CHECK-DAG: %[[T14:.*]] = arith.muli %[[C0_8:.*]], %[[C0_9:.*]] : index
// CHECK-DAG: %[[T15:.*]] = arith.addi %[[T13]], %[[T14]] : index
// CHECK-DAG: %[[T16:.*]] = vector.broadcast %[[T15]] : index to vector<2x1xindex>
// CHECK-DAG: %[[T17:.*]] = arith.addi %[[CST]], %[[T16]] : vector<2x1xindex>
%cst_2 = arith.constant {layout_result_0 = #xegpu.layout<sg_layout = [8, 1], sg_data = [2, 1]>} dense<[[0], [16], [32], [48], [64], [80], [96], [112], [128], [144], [160], [176], [192], [208], [224], [240], [256], [272], [288], [304], [320], [336], [352], [368], [384], [400], [416], [432], [448], [464], [480], [496]]> : vector<32x1xindex>
gpu.return
}
Expand All @@ -139,4 +136,3 @@ gpu.module @test_distribution {
gpu.return
}
}

Loading