-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[Matrix] Add a Remark when matrices get flattened #142078
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
✅ With the latest revision this PR passed the C/C++ code formatter. |
232fe4e
to
f53d419
Compare
This is a potential source of overhead, which we might be able to alleviate in some cases. For example, static element extracts, or shuffles that pluck out a specific row.
f53d419
to
cd456be
Compare
@llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-llvm-transforms Author: Jon Roelofs (jroelofs) ChangesThis is a potential source of overhead, which we might be able to alleviate in some cases. For example, static element extracts, or shuffles that pluck out a specific row. Full diff: https://github.com/llvm/llvm-project/pull/142078.diff 6 Files Affected:
diff --git a/llvm/include/llvm/IR/DiagnosticInfo.h b/llvm/include/llvm/IR/DiagnosticInfo.h
index 862be04e0fc00..3fc2ada523dae 100644
--- a/llvm/include/llvm/IR/DiagnosticInfo.h
+++ b/llvm/include/llvm/IR/DiagnosticInfo.h
@@ -516,6 +516,7 @@ class LLVM_ABI DiagnosticInfoOptimizationBase
explicit Argument(StringRef Str = "") : Key("String"), Val(Str) {}
LLVM_ABI Argument(StringRef Key, const Value *V);
+ LLVM_ABI Argument(StringRef Key, const Instruction &I);
LLVM_ABI Argument(StringRef Key, const Type *T);
LLVM_ABI Argument(StringRef Key, StringRef S);
Argument(StringRef Key, const char *S) : Argument(Key, StringRef(S)) {};
diff --git a/llvm/lib/IR/DiagnosticInfo.cpp b/llvm/lib/IR/DiagnosticInfo.cpp
index 0f1291b8bd8be..fdeb8082e54d5 100644
--- a/llvm/lib/IR/DiagnosticInfo.cpp
+++ b/llvm/lib/IR/DiagnosticInfo.cpp
@@ -25,6 +25,7 @@
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
@@ -211,6 +212,9 @@ DiagnosticInfoOptimizationBase::Argument::Argument(StringRef Key,
else if (isa<Constant>(V)) {
raw_string_ostream OS(Val);
V->printAsOperand(OS, /*PrintType=*/false);
+ } else if (auto *I = dyn_cast<IntrinsicInst>(V)) {
+ raw_string_ostream OS(Val);
+ OS << "call " << *V->getType() << " @" << I->getCalledFunction()->getName();
} else if (auto *I = dyn_cast<Instruction>(V)) {
Val = I->getOpcodeName();
} else if (auto *MD = dyn_cast<MetadataAsValue>(V)) {
@@ -219,6 +223,14 @@ DiagnosticInfoOptimizationBase::Argument::Argument(StringRef Key,
}
}
+DiagnosticInfoOptimizationBase::Argument::Argument(StringRef Key,
+ const Instruction &I)
+ : Key(std::string(Key)) {
+ Loc = I.getDebugLoc();
+ raw_string_ostream OS(Val);
+ OS << I;
+}
+
DiagnosticInfoOptimizationBase::Argument::Argument(StringRef Key, const Type *T)
: Key(std::string(Key)) {
raw_string_ostream OS(Val);
diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index 20279bf69dd59..32ad0c61e00c4 100644
--- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -569,6 +569,41 @@ class LowerMatrixIntrinsics {
SplitVecs.push_back(V);
}
+ if (ORE) {
+ if (Instruction *Inst = dyn_cast<Instruction>(MatrixVal)) {
+ if (Found != Inst2ColumnMatrix.end()) {
+ ORE->emit([&]() {
+ // FIXME: SplitVecs.size() doesn't count the shuffles that
+ // embedInVector created.
+ return OptimizationRemarkAnalysis(DEBUG_TYPE, "mismatched-shape",
+ Inst)
+ << "matrix reshape from "
+ << ore::NV("FromRows", Found->second.getNumRows()) << "x"
+ << ore::NV("FromCols", Found->second.getNumColumns())
+ << " to " << ore::NV("ToRows", SI.NumRows) << "x"
+ << ore::NV("ToCols", SI.NumColumns) << " using at least "
+ << ore::NV("Shuffles", SplitVecs.size()) << " shuffles";
+ });
+ } else if (!ShapeMap.contains(MatrixVal)) {
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE,
+ "unknown-shape-lowering-def", Inst)
+ << "splitting a " << ore::NV("Rows", SI.NumRows) << "x"
+ << ore::NV("Cols", SI.NumColumns) << " matrix "
+ << " with " << ore::NV("Shuffles", SplitVecs.size())
+ << " shuffles because we do not have a shape-aware lowering "
+ "for its def: "
+ << ore::NV("Instr", Inst) << ore::setExtraArgs()
+ << ore::NV("Opcode", Inst->getOpcodeName());
+ });
+ } else {
+ // The ShapeMap has it, so it's a case where we're being lowered
+ // before the def, and we expect that InstCombine will clean things up
+ // afterward.
+ }
+ }
+ }
+
return {SplitVecs};
}
@@ -1352,11 +1387,24 @@ class LowerMatrixIntrinsics {
ToRemove.push_back(Inst);
Value *Flattened = nullptr;
for (Use &U : llvm::make_early_inc_range(Inst->uses())) {
- if (!ShapeMap.contains(U.getUser())) {
- if (!Flattened)
- Flattened = Matrix.embedInVector(Builder);
- U.set(Flattened);
+ if (ShapeMap.contains(U.getUser()))
+ continue;
+
+ if (!Flattened) {
+ Flattened = Matrix.embedInVector(Builder);
+ if (ORE)
+ if (Instruction *User = dyn_cast<Instruction>(U.getUser()))
+ ORE->emit(OptimizationRemarkMissed(
+ DEBUG_TYPE, "unknown-shape-lowering-use", User)
+ << "flattening a " << ore::NV("Rows", Matrix.getNumRows())
+ << "x" << ore::NV("Cols", Matrix.getNumColumns())
+ << " matrix " << ore::NV("Source", *Inst)
+ << " because we do not have a shape-aware lowering for "
+ "its user:"
+ << ore::NV("Instr", *User) << ore::setExtraArgs()
+ << ore::NV("Opcode", User->getOpcodeName()));
}
+ U.set(Flattened);
}
}
diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/flatten.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/flatten.ll
new file mode 100644
index 0000000000000..2752fddd21477
--- /dev/null
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/flatten.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes='lower-matrix-intrinsics' -S < %s | FileCheck %s
+; RUN: opt -passes=lower-matrix-intrinsics -pass-remarks-missed=lower-matrix-intrinsics < %s -pass-remarks-output=%t -disable-output && FileCheck --input-file %t %s --check-prefix=REMARK
+
+define void @diag_3x3(ptr %in, ptr %out) {
+; REMARK-LABEL: --- !Missed
+; REMARK-NEXT: Pass: lower-matrix-intrinsics
+; REMARK-NEXT: Name: unknown-shape-lowering-use
+; REMARK-NEXT: Function: diag_3x3
+; REMARK-NEXT: Args:
+; REMARK-NEXT: - String: 'flattening a '
+; REMARK-NEXT: - Rows: '3'
+; REMARK-NEXT: - String: x
+; REMARK-NEXT: - Cols: '3'
+; REMARK-NEXT: - String: ' matrix '
+; REMARK-NEXT: - Source: ' %{{.*}} = call <9 x float> @llvm.matrix.column.major.load.v9f32.i64(ptr %{{.*}}, i64 3, i1 false, i32 3, i32 3)'
+; REMARK-NEXT: - String: ' because we do not have a shape-aware lowering for its user:'
+; REMARK-NEXT: - Instr: ' %{{.*}} = shufflevector <9 x float> %{{.*}}, <9 x float> poison, <3 x i32> <i32 0, i32 4, i32 8>'
+; REMARK-NEXT: - Opcode: shufflevector
+; REMARK-NEXT: ...
+; CHECK-LABEL: @diag_3x3(
+; CHECK-NEXT: [[COL_LOAD:%.*]] = load <3 x float>, ptr [[IN:%.*]], align 4
+; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr float, ptr [[IN]], i64 3
+; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <3 x float>, ptr [[VEC_GEP]], align 4
+; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr float, ptr [[IN]], i64 6
+; CHECK-NEXT: [[COL_LOAD3:%.*]] = load <3 x float>, ptr [[VEC_GEP2]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x float> [[COL_LOAD]], <3 x float> [[COL_LOAD1]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <3 x float> [[COL_LOAD3]], <3 x float> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <6 x float> [[TMP1]], <6 x float> [[TMP2]], <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+; CHECK-NEXT: [[DIAG:%.*]] = shufflevector <9 x float> [[TMP3]], <9 x float> poison, <3 x i32> <i32 0, i32 4, i32 8>
+; CHECK-NEXT: store <3 x float> [[DIAG]], ptr [[OUT:%.*]], align 16
+; CHECK-NEXT: ret void
+;
+ %inv = call <9 x float> @llvm.matrix.column.major.load(ptr %in, i64 3, i1 false, i32 3, i32 3)
+ %diag = shufflevector <9 x float> %inv, <9 x float> poison, <3 x i32> <i32 0, i32 4, i32 8>
+ store <3 x float> %diag, ptr %out
+ ret void
+}
diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-fold.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-fold.ll
index ef2e224f69c5e..0c7930fb2492a 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-fold.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-fold.ll
@@ -1,9 +1,46 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -passes='lower-matrix-intrinsics' -S < %s | FileCheck %s
+; RUN: opt -passes='lower-matrix-intrinsics' -S < %s -pass-remarks-output=%t | FileCheck %s && FileCheck --input-file %t --check-prefix=REMARK %s
; We can only fold matching transposes.
define void @reshape(ptr %in, ptr %out) {
+; REMARK: --- !Analysis
+; REMARK-NEXT: Pass: lower-matrix-intrinsics
+; REMARK-NEXT: Name: mismatched-shape
+; REMARK-NEXT: Function: reshape
+; REMARK-NEXT: Args:
+; REMARK-NEXT: - String: 'matrix reshape from '
+; REMARK-NEXT: - FromRows: '4'
+; REMARK-NEXT: - String: x
+; REMARK-NEXT: - FromCols: '1'
+; REMARK-NEXT: - String: ' to '
+; REMARK-NEXT: - ToRows: '2'
+; REMARK-NEXT: - String: x
+; REMARK-NEXT: - ToCols: '2'
+; REMARK-NEXT: - String: ' using at least '
+; REMARK-NEXT: - Shuffles: '2'
+; REMARK-NEXT: - String: ' shuffles'
+; REMARK-NEXT: ...
+; REMARK-NEXT: --- !Passed
+; REMARK-NEXT: Pass: lower-matrix-intrinsics
+; REMARK-NEXT: Name: matrix-lowered
+; REMARK-NEXT: Function: reshape
+; REMARK-NEXT: Args:
+; REMARK-NEXT: - String: 'Lowered with '
+; REMARK-NEXT: - NumStores: '8'
+; REMARK-NEXT: - String: ' stores, '
+; REMARK-NEXT: - NumLoads: '8'
+; REMARK-NEXT: - String: ' loads, '
+; REMARK-NEXT: - NumComputeOps: '8'
+; REMARK-NEXT: - String: ' compute ops, '
+; REMARK-NEXT: - NumExposedTransposes: '1'
+; REMARK-NEXT: - String: ' exposed transposes'
+; REMARK-NEXT: - String: |
+; REMARK-NEXT:{{ }}
+; REMARK-NEXT: store(
+; REMARK-NEXT: transpose.4x1.double(load(addr %in)),
+; REMARK-NEXT: addr %out)
+; REMARK-NEXT: ...
; CHECK-LABEL: define void @reshape(
; CHECK-SAME: ptr [[IN:%.*]], ptr [[OUT:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-opts.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-opts.ll
index 57d038e5cf947..88531d11bb8f1 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-opts.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-opts.ll
@@ -50,7 +50,36 @@ entry:
}
define void @multiply_ntt(ptr %A, ptr %B, ptr %C, ptr %R) {
-; REMARK: Pass: lower-matrix-intrinsics
+; REMARK-LABEL: Name: unknown-shape-lowering-use
+; REMARK-NEXT: Function: multiply_ntt
+; REMARK-NEXT: Args:
+; REMARK-NEXT: - String: 'flattening a '
+; REMARK-NEXT: - Rows: '2'
+; REMARK-NEXT: - String: x
+; REMARK-NEXT: - Cols: '3'
+; REMARK-NEXT: - String: ' matrix '
+; REMARK-NEXT: - Source: ' %{{.*}} = load <6 x double>, ptr %{{.*}}, align 16'
+; REMARK-NEXT: - String: ' because we do not have a shape-aware lowering for its user:'
+; REMARK-NEXT: - Instr: ' %{{.*}} = shufflevector <6 x double> %{{.*}}, <6 x double> poison, <2 x i32> <i32 4, i32 5>'
+; REMARK-NEXT: - Opcode: shufflevector
+; REMARK-NEXT: ...
+; REMARK-NEXT: --- !Missed
+; REMARK-NEXT: Pass: lower-matrix-intrinsics
+; REMARK-LABEL: Name: unknown-shape-lowering-use
+; REMARK-NEXT: Function: multiply_ntt
+; REMARK-NEXT: Args:
+; REMARK-NEXT: - String: 'flattening a '
+; REMARK-NEXT: - Rows: '4'
+; REMARK-NEXT: - String: x
+; REMARK-NEXT: - Cols: '3'
+; REMARK-NEXT: - String: ' matrix '
+; REMARK-NEXT: - Source: ' %{{.*}} = call <12 x double> @llvm.matrix.multiply.v12f64.v8f64.v6f64(<8 x double> %{{.*}}, <6 x double> %{{.*}}, i32 4, i32 2, i32 3)'
+; REMARK-NEXT: - String: ' because we do not have a shape-aware lowering for its user:'
+; REMARK-NEXT: - Instr: ' %{{.*}} = shufflevector <12 x double> %{{.*}}, <12 x double> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>'
+; REMARK-NEXT: - Opcode: shufflevector
+; REMARK-NEXT: ...
+; REMARK-NEXT: --- !Passed
+; REMARK-NEXT: Pass: lower-matrix-intrinsics
; REMARK-NEXT: Name: matrix-lowered
; REMARK-NEXT: Function: multiply_ntt
; REMARK-NEXT: Args:
@@ -438,7 +467,36 @@ entry:
}
define void @multiply_nt_t(ptr %A, ptr %B, ptr %C) {
-; REMARK: Pass: lower-matrix-intrinsics
+; REMARK-LABEL: Name: unknown-shape-lowering-use
+; REMARK-NEXT: Function: multiply_nt_t
+; REMARK-NEXT: Args:
+; REMARK-NEXT: - String: 'flattening a '
+; REMARK-NEXT: - Rows: '2'
+; REMARK-NEXT: - String: x
+; REMARK-NEXT: - Cols: '3'
+; REMARK-NEXT: - String: ' matrix '
+; REMARK-NEXT: - Source: ' %{{.*}} = load <6 x double>, ptr %{{.*}}, align 16'
+; REMARK-NEXT: - String: ' because we do not have a shape-aware lowering for its user:'
+; REMARK-NEXT: - Instr: ' %{{.*}} = shufflevector <6 x double> %{{.*}}, <6 x double> poison, <2 x i32> <i32 4, i32 5>'
+; REMARK-NEXT: - Opcode: shufflevector
+; REMARK-NEXT: ...
+; REMARK-NEXT: --- !Missed
+; REMARK-NEXT: Pass: lower-matrix-intrinsics
+; REMARK-LABEL: Name: unknown-shape-lowering-use
+; REMARK-NEXT: Function: multiply_nt_t
+; REMARK-NEXT: Args:
+; REMARK-NEXT: - String: 'flattening a '
+; REMARK-NEXT: - Rows: '4'
+; REMARK-NEXT: - String: x
+; REMARK-NEXT: - Cols: '3'
+; REMARK-NEXT: - String: ' matrix '
+; REMARK-NEXT: - Source: ' %{{.*}} = load <12 x double>, ptr %{{.*}}, align 16'
+; REMARK-NEXT: - String: ' because we do not have a shape-aware lowering for its user:'
+; REMARK-NEXT: - Instr: ' %{{.*}} = shufflevector <12 x double> %{{.*}}, <12 x double> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>'
+; REMARK-NEXT: - Opcode: shufflevector
+; REMARK-NEXT: ...
+; REMARK-NEXT: --- !Passed
+; REMARK-NEXT: Pass: lower-matrix-intrinsics
; REMARK-NEXT: Name: matrix-lowered
; REMARK-NEXT: Function: multiply_nt_t
; REMARK-NEXT: Args:
@@ -573,6 +631,33 @@ entry:
}
define void @multiply_ntt_t(ptr %A, ptr %B, ptr %C, ptr %R) {
+; REMARK-LABEL: Name: unknown-shape-lowering-use
+; REMARK-NEXT: Function: multiply_ntt_t
+; REMARK-NEXT: Args:
+; REMARK-NEXT: - String: 'flattening a '
+; REMARK-NEXT: - Rows: '3'
+; REMARK-NEXT: - String: x
+; REMARK-NEXT: - Cols: '3'
+; REMARK-NEXT: - String: ' matrix '
+; REMARK-NEXT: - Source: ' %{{.*}} = load <9 x double>, ptr %{{.*}}, align 16'
+; REMARK-NEXT: - String: ' because we do not have a shape-aware lowering for its user:'
+; REMARK-NEXT: - Instr: ' %{{.*}} = shufflevector <9 x double> %{{.*}}, <9 x double> poison, <3 x i32> <i32 6, i32 7, i32 8>'
+; REMARK-NEXT: - Opcode: shufflevector
+; REMARK-NEXT: ...
+; REMARK-LABEL: Pass: lower-matrix-intrinsics
+; REMARK-NEXT: Name: unknown-shape-lowering-use
+; REMARK-NEXT: Function: multiply_ntt_t
+; REMARK-NEXT: Args:
+; REMARK-NEXT: - String: 'flattening a '
+; REMARK-NEXT: - Rows: '3'
+; REMARK-NEXT: - String: x
+; REMARK-NEXT: - Cols: '3'
+; REMARK-NEXT: - String: ' matrix '
+; REMARK-NEXT: - Source: ' %{{.*}} = call <9 x double> @llvm.matrix.multiply.v9f64.v9f64.v9f64(<9 x double> %{{.*}}, <9 x double> %{{.*}}, i32 3, i32 3, i32 3)'
+; REMARK-NEXT: - String: ' because we do not have a shape-aware lowering for its user:'
+; REMARK-NEXT: - Instr: ' %{{.*}} = shufflevector <9 x double> %{{.*}}, <9 x double> poison, <3 x i32> <i32 6, i32 7, i32 8>'
+; REMARK-NEXT: - Opcode: shufflevector
+; REMARK-NEXT: ...
; REMARK: Pass: lower-matrix-intrinsics
; REMARK-NEXT: Name: matrix-lowered
; REMARK-NEXT: Function: multiply_ntt_t
|
; REMARK-NEXT: - String: x | ||
; REMARK-NEXT: - Cols: '3' | ||
; REMARK-NEXT: - String: ' matrix ' | ||
; REMARK-NEXT: - Source: ' %{{.*}} = call <9 x float> @llvm.matrix.column.major.load.v9f32.i64(ptr %{{.*}}, i64 3, i1 false, i32 3, i32 3)' |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
IIRC we tried to avoid direct LLVM IR references in the remarks, to make them more approachable to non-LLVM developers. Might be worth keeping consistent with the syntax we use in emitRemarks
, although the printing there could probably also be improved.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This remark isn’t actionable for anyone other than an LLVM developer working on this pass. Maybe we need another category for dev focused diagnostics… or I should shove this under -debug-only=lower-matrix-intrinsics
.
This is a potential source of overhead, which we might be able to alleviate in some cases. For example, static element extracts, or shuffles that pluck out a specific row. In some cases, we expect that InstCombine will clean them up, so this is not always a pure signal of overhead.