Skip to content

[MLIR] Fix rewrite of ops with vector operands to LLVM on GPU #127844

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 34 additions & 17 deletions mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "GPUOpsLowering.h"

#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
#include "mlir/Conversion/LLVMCommon/VectorPattern.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/IR/Attributes.h"
#include "mlir/IR/Builders.h"
Expand Down Expand Up @@ -586,22 +587,15 @@ LogicalResult GPUPrintfOpToVPrintfLowering::matchAndRewrite(
return success();
}

/// Unrolls op if it's operating on vectors.
LogicalResult impl::scalarizeVectorOp(Operation *op, ValueRange operands,
ConversionPatternRewriter &rewriter,
const LLVMTypeConverter &converter) {
/// Helper for impl::scalarizeVectorOp. Scalarizes vectors to elements.
/// Used either directly (for ops on 1D vectors) or as the callback passed to
/// detail::handleMultidimensionalVectors (for ops on higher-rank vectors).
static Value scalarizeVectorOpHelper(Operation *op, ValueRange operands,
Type llvm1DVectorTy,
ConversionPatternRewriter &rewriter,
const LLVMTypeConverter &converter) {
TypeRange operandTypes(operands);
if (llvm::none_of(operandTypes, llvm::IsaPred<VectorType>)) {
return rewriter.notifyMatchFailure(op, "expected vector operand");
}
if (op->getNumRegions() != 0 || op->getNumSuccessors() != 0)
return rewriter.notifyMatchFailure(op, "expected no region/successor");
if (op->getNumResults() != 1)
return rewriter.notifyMatchFailure(op, "expected single result");
VectorType vectorType = dyn_cast<VectorType>(op->getResult(0).getType());
if (!vectorType)
return rewriter.notifyMatchFailure(op, "expected vector result");

VectorType vectorType = cast<VectorType>(llvm1DVectorTy);
Location loc = op->getLoc();
Value result = rewriter.create<LLVM::PoisonOp>(loc, vectorType);
Type indexType = converter.convertType(rewriter.getIndexType());
Expand All @@ -621,9 +615,32 @@ LogicalResult impl::scalarizeVectorOp(Operation *op, ValueRange operands,
result = rewriter.create<LLVM::InsertElementOp>(
loc, result, scalarOp->getResult(0), index);
}
return result;
}

rewriter.replaceOp(op, result);
return success();
/// Unrolls op to array/vector elements.
LogicalResult impl::scalarizeVectorOp(Operation *op, ValueRange operands,
ConversionPatternRewriter &rewriter,
const LLVMTypeConverter &converter) {
TypeRange operandTypes(operands);
if (llvm::any_of(operandTypes, llvm::IsaPred<VectorType>)) {
VectorType vectorType = cast<VectorType>(op->getResultTypes()[0]);
rewriter.replaceOp(op, scalarizeVectorOpHelper(op, operands, vectorType,
rewriter, converter));
return success();
}

if (llvm::any_of(operandTypes, llvm::IsaPred<LLVM::LLVMArrayType>)) {
return LLVM::detail::handleMultidimensionalVectors(
op, operands, converter,
[&](Type llvm1DVectorTy, ValueRange operands) -> Value {
return scalarizeVectorOpHelper(op, operands, llvm1DVectorTy, rewriter,
converter);
},
rewriter);
}

return rewriter.notifyMatchFailure(op, "no llvm.array or vector to unroll");
}

static IntegerAttr wrapNumericMemorySpace(MLIRContext *ctx, unsigned space) {
Expand Down
5 changes: 3 additions & 2 deletions mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -172,13 +172,13 @@ struct GPUReturnOpLowering : public ConvertOpToLLVMPattern<gpu::ReturnOp> {
};

namespace impl {
/// Unrolls op if it's operating on vectors.
/// Unrolls op to array/vector elements.
LogicalResult scalarizeVectorOp(Operation *op, ValueRange operands,
ConversionPatternRewriter &rewriter,
const LLVMTypeConverter &converter);
} // namespace impl

/// Rewriting that unrolls SourceOp to scalars if it's operating on vectors.
/// Unrolls SourceOp to array/vector elements.
template <typename SourceOp>
struct ScalarizeVectorOpLowering : public ConvertOpToLLVMPattern<SourceOp> {
public:
Expand All @@ -191,6 +191,7 @@ struct ScalarizeVectorOpLowering : public ConvertOpToLLVMPattern<SourceOp> {
*this->getTypeConverter());
}
};

} // namespace mlir

#endif // MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_
51 changes: 51 additions & 0 deletions mlir/test/Conversion/MathToROCDL/math-to-rocdl.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -513,3 +513,54 @@ module {
"test.possible_terminator"() : () -> ()
}) : () -> ()
}

// -----

module @test_module {
// CHECK: llvm.func @__ocml_sin_f16(f16) -> f16
// CHECK-LABEL: func @math_sin_vector_1d
func.func @math_sin_vector_1d(%arg : vector<4xf16>) -> vector<4xf16> {
// CHECK: llvm.extractelement {{.*}} : vector<4xf16>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want to check to see these actually getting passed in to a sin call?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!

// CHECK: llvm.call @__ocml_sin_f16(%{{.*}}) : (f16) -> f16
// CHECK: llvm.insertelement {{.*}} : vector<4xf16>
// CHECK: llvm.extractelement {{.*}} : vector<4xf16>
// CHECK: llvm.call @__ocml_sin_f16(%{{.*}}) : (f16) -> f16
// CHECK: llvm.insertelement {{.*}} : vector<4xf16>
// CHECK: llvm.extractelement {{.*}} : vector<4xf16>
// CHECK: llvm.call @__ocml_sin_f16(%{{.*}}) : (f16) -> f16
// CHECK: llvm.insertelement {{.*}} : vector<4xf16>
// CHECK: llvm.extractelement {{.*}} : vector<4xf16>
// CHECK: llvm.call @__ocml_sin_f16(%{{.*}}) : (f16) -> f16
// CHECK: llvm.insertelement {{.*}} : vector<4xf16>
%result = math.sin %arg : vector<4xf16>
func.return %result : vector<4xf16>
}
}

// -----

module @test_module {
// CHECK: llvm.func @__ocml_sin_f16(f16) -> f16
// CHECK-LABEL: func @math_sin_vector_2d
func.func @math_sin_vector_2d(%arg : vector<2x2xf16>) -> vector<2x2xf16> {
// CHECK: builtin.unrealized_conversion_cast {{.*}} : vector<2x2xf16> to !llvm.array<2 x vector<2xf16>>
// CHECK: llvm.extractvalue {{.*}} : !llvm.array<2 x vector<2xf16>>
// CHECK: llvm.extractelement {{.*}} : vector<2xf16>
// CHECK: llvm.call @__ocml_sin_f16(%{{.*}}) : (f16) -> f16
// CHECK: llvm.insertelement {{.*}} : vector<2xf16>
// CHECK: llvm.extractelement {{.*}} : vector<2xf16>
// CHECK: llvm.call @__ocml_sin_f16(%{{.*}}) : (f16) -> f16
// CHECK: llvm.insertelement {{.*}} : vector<2xf16>
// CHECK: llvm.insertvalue {{.*}} : !llvm.array<2 x vector<2xf16>>
// CHECK: llvm.extractvalue {{.*}} : !llvm.array<2 x vector<2xf16>>
// CHECK: llvm.extractelement {{.*}} : vector<2xf16>
// CHECK: llvm.call @__ocml_sin_f16(%{{.*}}) : (f16) -> f16
// CHECK: llvm.insertelement {{.*}} : vector<2xf16>
// CHECK: llvm.extractelement {{.*}} : vector<2xf16>
// CHECK: llvm.call @__ocml_sin_f16(%{{.*}}) : (f16) -> f16
// CHECK: llvm.insertelement {{.*}} : vector<2xf16>
// CHECK: llvm.insertvalue {{.*}} : !llvm.array<2 x vector<2xf16>>
%result = math.sin %arg : vector<2x2xf16>
func.return %result : vector<2x2xf16>
}
}