-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[Matrix] Propagate shape information through (f)abs insts #141704
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Jon Roelofs (jroelofs) ChangesFull diff: https://github.com/llvm/llvm-project/pull/141704.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index 56d4be513ea6f..a3f1ca1644002 100644
--- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -229,10 +229,20 @@ static bool isUniformShape(Value *V) {
if (!I)
return true;
+ if (auto *II = dyn_cast<IntrinsicInst>(V))
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::abs:
+ case Intrinsic::fabs:
+ return true;
+ default:
+ return false;
+ }
+
switch (I->getOpcode()) {
case Instruction::FAdd:
case Instruction::FSub:
case Instruction::FMul: // Scalar multiply.
+ case Instruction::FDiv:
case Instruction::FNeg:
case Instruction::Add:
case Instruction::Mul:
@@ -624,7 +634,7 @@ class LowerMatrixIntrinsics {
case Intrinsic::matrix_column_major_store:
return true;
default:
- return false;
+ return isUniformShape(II);
}
return isUniformShape(V) || isa<StoreInst>(V) || isa<LoadInst>(V);
}
@@ -1130,6 +1140,9 @@ class LowerMatrixIntrinsics {
case Intrinsic::matrix_column_major_store:
LowerColumnMajorStore(Inst);
break;
+ case Intrinsic::abs:
+ case Intrinsic::fabs:
+ return VisitUniformIntrinsic(cast<IntrinsicInst>(Inst));
default:
return false;
}
@@ -2167,6 +2180,8 @@ class LowerMatrixIntrinsics {
return Builder.CreateFAdd(LHS, RHS);
case Instruction::FMul:
return Builder.CreateFMul(LHS, RHS);
+ case Instruction::FDiv:
+ return Builder.CreateFDiv(LHS, RHS);
case Instruction::FSub:
return Builder.CreateFSub(LHS, RHS);
default:
@@ -2220,6 +2235,44 @@ class LowerMatrixIntrinsics {
return true;
}
+ /// Lower uniform shape intrinsics, if shape information is available.
+ bool VisitUniformIntrinsic(IntrinsicInst *Inst) {
+ auto I = ShapeMap.find(Inst);
+ if (I == ShapeMap.end())
+ return false;
+
+ IRBuilder<> Builder(Inst);
+ ShapeInfo &Shape = I->second;
+
+ MatrixTy Result;
+
+ switch (Inst->getIntrinsicID()) {
+ case Intrinsic::abs:
+ case Intrinsic::fabs: {
+ Value *Op = Inst->getOperand(0);
+
+ MatrixTy M = getMatrix(Op, Shape, Builder);
+
+ Builder.setFastMathFlags(getFastMathFlags(Inst));
+
+ for (unsigned I = 0; I < Shape.getNumVectors(); ++I)
+ switch (Inst->getIntrinsicID()) {
+ case Intrinsic::abs:
+ Result.addVector(Builder.CreateBinaryIntrinsic(Intrinsic::abs, M.getVector(I), Inst->getOperand(1)));
+ break;
+ case Intrinsic::fabs:
+ Result.addVector(Builder.CreateUnaryIntrinsic(Inst->getIntrinsicID(), M.getVector(I)));
+ break;
+ }
+
+ finalizeLowering(Inst, Result.addNumComputeOps(getNumOps(Result.getVectorTy()) * Result.getNumVectors()), Builder);
+ return true;
+ }
+ default:
+ llvm_unreachable("unexpected intrinsic");
+ }
+ }
+
/// Helper to linearize a matrix expression tree into a string. Currently
/// matrix expressions are linarized by starting at an expression leaf and
/// linearizing bottom up.
diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/binop.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/binop.ll
new file mode 100644
index 0000000000000..1eacb2a32e07d
--- /dev/null
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/binop.ll
@@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes='lower-matrix-intrinsics' -S < %s | FileCheck %s
+
+define void @fdiv_2x2(ptr %num, ptr %denom, ptr %out) {
+; CHECK-LABEL: @fdiv_2x2(
+; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x double>, ptr [[NUM:%.*]], align 32
+; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, ptr [[NUM]], i64 2
+; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x double>, ptr [[VEC_GEP]], align 16
+; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x double>, ptr [[DENOM:%.*]], align 32
+; CHECK-NEXT: [[VEC_GEP3:%.*]] = getelementptr double, ptr [[DENOM]], i64 2
+; CHECK-NEXT: [[COL_LOAD4:%.*]] = load <2 x double>, ptr [[VEC_GEP3]], align 16
+; CHECK-NEXT: [[TMP1:%.*]] = fdiv <2 x double> [[COL_LOAD]], [[COL_LOAD2]]
+; CHECK-NEXT: [[TMP2:%.*]] = fdiv <2 x double> [[COL_LOAD1]], [[COL_LOAD4]]
+; CHECK-NEXT: store <2 x double> [[TMP1]], ptr [[OUT:%.*]], align 32
+; CHECK-NEXT: [[VEC_GEP5:%.*]] = getelementptr double, ptr [[OUT]], i64 2
+; CHECK-NEXT: store <2 x double> [[TMP2]], ptr [[VEC_GEP5]], align 16
+; CHECK-NEXT: ret void
+;
+ %numv = load <4 x double>, ptr %num
+ %denomv = load <4 x double>, ptr %denom
+ %div = fdiv <4 x double> %numv, %denomv
+ %divt = call <4 x double> @llvm.matrix.transpose(<4 x double> %div, i32 2, i32 2)
+ %divtt = call <4 x double> @llvm.matrix.transpose(<4 x double> %divt, i32 2, i32 2)
+ store <4 x double> %divtt, ptr %out
+ ret void
+}
+
+define void @fabs_2x2f64(ptr %in, ptr %out) {
+; CHECK-LABEL: @fabs_2x2f64(
+; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x double>, ptr [[IN:%.*]], align 32
+; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, ptr [[IN]], i64 2
+; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x double>, ptr [[VEC_GEP]], align 16
+; CHECK-NEXT: [[TMP1:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[COL_LOAD]])
+; CHECK-NEXT: [[TMP2:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[COL_LOAD1]])
+; CHECK-NEXT: store <2 x double> [[TMP1]], ptr [[OUT:%.*]], align 32
+; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr double, ptr [[OUT]], i64 2
+; CHECK-NEXT: store <2 x double> [[TMP2]], ptr [[VEC_GEP2]], align 16
+; CHECK-NEXT: ret void
+;
+ %load = load <4 x double>, ptr %in
+ %fabs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %load)
+ %fabst = call <4 x double> @llvm.matrix.transpose(<4 x double> %fabs, i32 2, i32 2)
+ %fabstt = call <4 x double> @llvm.matrix.transpose(<4 x double> %fabst, i32 2, i32 2)
+ store <4 x double> %fabstt, ptr %out
+ ret void
+}
+
+define void @fabs_2x2i32(ptr %in, ptr %out) {
+; CHECK-LABEL: @fabs_2x2i32(
+; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x i32>, ptr [[IN:%.*]], align 16
+; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr i32, ptr [[IN]], i64 2
+; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x i32>, ptr [[VEC_GEP]], align 8
+; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[COL_LOAD]], i1 false)
+; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[COL_LOAD1]], i1 false)
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[TMP1]], i1 true)
+; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[TMP2]], i1 true)
+; CHECK-NEXT: store <2 x i32> [[TMP3]], ptr [[OUT:%.*]], align 16
+; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr i32, ptr [[OUT]], i64 2
+; CHECK-NEXT: store <2 x i32> [[TMP4]], ptr [[VEC_GEP2]], align 8
+; CHECK-NEXT: ret void
+;
+ %load = load <4 x i32>, ptr %in
+ %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %load, i1 false)
+ %abst = call <4 x i32> @llvm.matrix.transpose(<4 x i32> %abs, i32 2, i32 2)
+ %abstt = call <4 x i32> @llvm.matrix.transpose(<4 x i32> %abst, i32 2, i32 2)
+ %absabstt = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %abstt, i1 true)
+ store <4 x i32> %absabstt, ptr %out
+ ret void
+}
|
This is a stacked PR. Its counterpart should be merged first: #141705 |
✅ With the latest revision this PR passed the C/C++ code formatter. |
a70843a
to
41eeb1e
Compare
84767fb
to
0b5ffd2
Compare
Co-authored-by: Florian Hahn <flo@fhahn.com>
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks
No description provided.