-
Notifications
You must be signed in to change notification settings - Fork 13.3k
[CodeExtractor] Improve debug info for input values. #136016
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Abid Qadeer (abidh) ChangesIf we use
it will look like the extracted function shown below (with some irrelevent details removed).
You will notice that it has replaced the usage of values that were in the parent function (%1 and %2) with the arguments to the new function. But it did not do the same thing with This is not just a theoretical limitations. This PR tries to address this problem. It iterates over the input to the extracted function and looks at their debug uses. If they were present in the new function, it updates their location. Otherwise it materialize a similar usage in the new function. Most of these changes are localized in Full diff: https://github.com/llvm/llvm-project/pull/136016.diff 6 Files Affected:
diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
index 60c5def3472b6..d23229750ca68 100644
--- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
+++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
@@ -290,7 +290,8 @@ class CodeExtractorAnalysisCache {
void emitFunctionBody(const ValueSet &inputs, const ValueSet &outputs,
const ValueSet &StructValues, Function *newFunction,
StructType *StructArgTy, BasicBlock *header,
- const ValueSet &SinkingCands);
+ const ValueSet &SinkingCands,
+ SmallVector<Value *> &NewValues);
/// Generates a Basic Block that calls the extracted function.
CallInst *emitReplacerCall(const ValueSet &inputs, const ValueSet &outputs,
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 2b055020022be..13083b62a4701 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -1242,11 +1242,18 @@ static void eraseDebugIntrinsicsWithNonLocalRefs(Function &F) {
}
}
-/// Fix up the debug info in the old and new functions by pointing line
-/// locations and debug intrinsics to the new subprogram scope, and by deleting
-/// intrinsics which point to values outside of the new function.
-static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc,
- CallInst &TheCall) {
+/// Fix up the debug info in the old and new functions. Following changes are
+/// done.
+/// 1. If a debug record points to a value that has been replaced, update the
+/// record to use the new value.
+/// 2. If an Input value that has been replaced was used as a location of a
+/// debug record in the Parent function, then materealize a similar record in
+/// the new function.
+/// 3. Point line locations and debug intrinsics to the new subprogram scope
+/// 4. Remove intrinsics which point to values outside of the new function.
+static void fixupDebugInfoPostExtraction(
+ Function &OldFunc, Function &NewFunc, CallInst &TheCall,
+ const SetVector<Value *> &Inputs, const SmallVector<Value *> &NewValues) {
DISubprogram *OldSP = OldFunc.getSubprogram();
LLVMContext &Ctx = OldFunc.getContext();
@@ -1273,14 +1280,50 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc,
/*LineNo=*/0, SPType, /*ScopeLine=*/0, DINode::FlagZero, SPFlags);
NewFunc.setSubprogram(NewSP);
+ auto UpdateOrInsertDebugRecord = [&](auto *DR, Value *OldLoc, Value *NewLoc,
+ DIExpression *Expr, bool Declare) {
+ if (DR->getParent()->getParent() == &NewFunc)
+ DR->replaceVariableLocationOp(OldLoc, NewLoc);
+ else {
+ if (Declare)
+ DIB.insertDeclare(NewLoc, DR->getVariable(), Expr, DR->getDebugLoc(),
+ &NewFunc.getEntryBlock());
+ else
+ DIB.insertDbgValueIntrinsic(
+ NewLoc, DR->getVariable(), Expr, DR->getDebugLoc(),
+ NewFunc.getEntryBlock().getTerminator()->getIterator());
+ }
+ };
+ for (auto [Input, NewVal] : zip_equal(Inputs, NewValues)) {
+ SmallVector<DbgVariableIntrinsic *, 1> DbgUsers;
+ SmallVector<DbgVariableRecord *, 1> DPUsers;
+ findDbgUsers(DbgUsers, Input, &DPUsers);
+ DIExpression *Expr = DIB.createExpression();
+
+ // Iterate the debud users of the Input values. If they are in the extracted
+ // function then update their location with the new value. If they are in
+ // the parent function then create a similar debug record.
+ for (auto *DVI : DbgUsers) {
+ UpdateOrInsertDebugRecord(DVI, Input, NewVal, Expr,
+ isa<DbgDeclareInst>(DVI));
+ }
+ for (auto *DVR : DPUsers)
+ UpdateOrInsertDebugRecord(DVR, Input, NewVal, Expr,
+ DVR->isDbgDeclare());
+ }
+
auto IsInvalidLocation = [&NewFunc](Value *Location) {
- // Location is invalid if it isn't a constant or an instruction, or is an
- // instruction but isn't in the new function.
- if (!Location ||
- (!isa<Constant>(Location) && !isa<Instruction>(Location)))
+ // Location is invalid if it isn't a constant, an instruction or an
+ // argument, or is an instruction/argument but isn't in the new function.
+ if (!Location || (!isa<Constant>(Location) && !isa<Argument>(Location) &&
+ !isa<Instruction>(Location)))
return true;
- Instruction *LocationInst = dyn_cast<Instruction>(Location);
- return LocationInst && LocationInst->getFunction() != &NewFunc;
+
+ if (Argument *Arg = dyn_cast<Argument>(Location))
+ return Arg->getParent() != &NewFunc;
+ else if (Instruction *LocationInst = dyn_cast<Instruction>(Location))
+ return LocationInst->getFunction() != &NewFunc;
+ return false;
};
// Debug intrinsics in the new function need to be updated in one of two
@@ -1509,9 +1552,10 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC,
inputs, outputs, EntryFreq, oldFunction->getName() + "." + SuffixToUse,
StructValues, StructTy);
newFunction->IsNewDbgInfoFormat = oldFunction->IsNewDbgInfoFormat;
+ SmallVector<Value *> NewValues;
emitFunctionBody(inputs, outputs, StructValues, newFunction, StructTy, header,
- SinkingCands);
+ SinkingCands, NewValues);
std::vector<Value *> Reloads;
CallInst *TheCall = emitReplacerCall(
@@ -1521,7 +1565,8 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC,
insertReplacerCall(oldFunction, header, TheCall->getParent(), outputs,
Reloads, ExitWeights);
- fixupDebugInfoPostExtraction(*oldFunction, *newFunction, *TheCall);
+ fixupDebugInfoPostExtraction(*oldFunction, *newFunction, *TheCall, inputs,
+ NewValues);
LLVM_DEBUG(if (verifyFunction(*newFunction, &errs())) {
newFunction->dump();
@@ -1586,7 +1631,8 @@ Type *CodeExtractor::getSwitchType() {
void CodeExtractor::emitFunctionBody(
const ValueSet &inputs, const ValueSet &outputs,
const ValueSet &StructValues, Function *newFunction,
- StructType *StructArgTy, BasicBlock *header, const ValueSet &SinkingCands) {
+ StructType *StructArgTy, BasicBlock *header, const ValueSet &SinkingCands,
+ SmallVector<Value *> &NewValues) {
Function *oldFunction = header->getParent();
LLVMContext &Context = oldFunction->getContext();
@@ -1618,7 +1664,6 @@ void CodeExtractor::emitFunctionBody(
// Rewrite all users of the inputs in the extracted region to use the
// arguments (or appropriate addressing into struct) instead.
- SmallVector<Value *> NewValues;
for (unsigned i = 0, e = inputs.size(), aggIdx = 0; i != e; ++i) {
Value *RewriteVal;
if (StructValues.contains(inputs[i])) {
diff --git a/llvm/test/Transforms/CodeExtractor/LoopExtractor_alloca.ll b/llvm/test/Transforms/CodeExtractor/LoopExtractor_alloca.ll
index ff1ed786cefc4..b932a7dc0bf9f 100644
--- a/llvm/test/Transforms/CodeExtractor/LoopExtractor_alloca.ll
+++ b/llvm/test/Transforms/CodeExtractor/LoopExtractor_alloca.ll
@@ -19,6 +19,7 @@
; CHECK-LABEL: define internal void @test.loop1(ptr %v1)
; CHECK-NEXT: newFuncRoot:
+; CHECK-NEXT: #dbg_value
; CHECK-NEXT: br
define void @test() {
diff --git a/llvm/test/Transforms/CodeExtractor/input-value-debug.ll b/llvm/test/Transforms/CodeExtractor/input-value-debug.ll
new file mode 100644
index 0000000000000..d9723d61b4d63
--- /dev/null
+++ b/llvm/test/Transforms/CodeExtractor/input-value-debug.ll
@@ -0,0 +1,53 @@
+; RUN: opt -passes=hotcoldsplit -hotcoldsplit-threshold=0 -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @foo(i32 %a, i32 %b) !dbg !2 {
+entry:
+ %1 = alloca i32, i64 1, align 4
+ %2 = alloca i32, i64 1, align 4
+ store i32 %a, ptr %1, align 4
+ #dbg_declare(ptr %1, !8, !DIExpression(), !1)
+ #dbg_value(i32 %b, !9, !DIExpression(), !1)
+ br i1 undef, label %if.then, label %if.end
+if.then: ; preds = %entry
+ ret void
+
+if.end: ; preds = %entry
+ store i32 10, ptr %1, align 4
+ %3 = add i32 %b, 1
+ store i32 1, ptr %2, align 4
+ call void @sink(i32 %3)
+ #dbg_declare(ptr %2, !10, !DIExpression(), !1)
+ ret void
+}
+
+declare void @sink(i32) cold
+
+!llvm.dbg.cu = !{!6}
+!llvm.module.flags = !{!0}
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = !DILocation(line: 11, column: 7, scope: !2)
+!2 = distinct !DISubprogram(name: "foo", scope: !3, file: !3, type: !4, spFlags: DISPFlagDefinition, unit: !6)
+!3 = !DIFile(filename: "test.c", directory: "")
+!4 = !DISubroutineType(cc: DW_CC_program, types: !5)
+!5 = !{}
+!6 = distinct !DICompileUnit(language: DW_LANG_C, file: !3)
+!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!8 = !DILocalVariable(name: "a", scope: !2, file: !3, type: !7)
+!9 = !DILocalVariable(name: "b", scope: !2, file: !3, type: !7)
+!10 = !DILocalVariable(name: "c", scope: !2, file: !3, type: !7)
+
+; CHECK: define {{.*}}@foo.cold.1(ptr %[[ARG0:.*]], i32 %[[ARG1:.*]], ptr %[[ARG2:.*]]){{.*}} !dbg ![[FN:.*]] {
+; CHECK-NEXT: newFuncRoot:
+; CHECK-NEXT: #dbg_declare(ptr %[[ARG0]], ![[V1:[0-9]+]], {{.*}})
+; CHECK-NEXT: #dbg_value(i32 %[[ARG1]], ![[V2:[0-9]+]], {{.*}})
+; CHECK-NEXT: br
+; CHECK: if.end:
+; CHECK: #dbg_declare(ptr %[[ARG2]], ![[V3:[0-9]+]], {{.*}})
+; CHECK: }
+
+; CHECK: ![[V1]] = !DILocalVariable(name: "a", scope: ![[FN]]{{.*}})
+; CHECK: ![[V2]] = !DILocalVariable(name: "b", scope: ![[FN]]{{.*}})
+; CHECK: ![[V3]] = !DILocalVariable(name: "c", scope: ![[FN]]{{.*}})
diff --git a/llvm/test/Transforms/HotColdSplit/transfer-debug-info.ll b/llvm/test/Transforms/HotColdSplit/transfer-debug-info.ll
index e8c1b464ab0c6..3f69f0c200dad 100644
--- a/llvm/test/Transforms/HotColdSplit/transfer-debug-info.ll
+++ b/llvm/test/Transforms/HotColdSplit/transfer-debug-info.ll
@@ -8,10 +8,6 @@ target triple = "x86_64-apple-macosx10.14.0"
; CHECK-LABEL: define {{.*}}@foo.cold.1
-; - The llvm.dbg.value intrinsic pointing to an argument in @foo (%arg1) is
-; dropped
-; CHECK-NOT: #dbg_value
-
; - Instructions without locations in the original function have no
; location in the new function
; CHECK: [[ADD1:%.*]] = add i32 %{{.*}}, 1{{$}}
diff --git a/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp b/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp
index cfe07a2f6c461..c54e93b59e04b 100644
--- a/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp
+++ b/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp
@@ -17,6 +17,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Verifier.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/Support/SourceMgr.h"
#include "gtest/gtest.h"
@@ -731,4 +732,88 @@ TEST(CodeExtractor, OpenMPAggregateArgs) {
EXPECT_FALSE(verifyFunction(*Outlined));
EXPECT_FALSE(verifyFunction(*Func));
}
+
+TEST(CodeExtractor, ArgsDebugInfo) {
+ LLVMContext Ctx;
+ SMDiagnostic Err;
+ std::unique_ptr<Module> M(parseAssemblyString(R"ir(
+
+ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+ target triple = "x86_64-unknown-linux-gnu"
+
+ define void @foo(i32 %a, i32 %b) !dbg !2 {
+ %1 = alloca i32, i64 1, align 4, !dbg !1
+ store i32 %a, ptr %1, align 4, !dbg !1
+ #dbg_declare(ptr %1, !8, !DIExpression(), !1)
+ #dbg_value(i32 %b, !9, !DIExpression(), !1)
+ br label %entry
+
+ entry:
+ br label %extract
+
+ extract:
+ store i32 10, ptr %1, align 4, !dbg !1
+ %2 = add i32 %b, 1, !dbg !1
+ br label %exit
+
+ exit:
+ ret void
+ }
+ !llvm.dbg.cu = !{!6}
+ !llvm.module.flags = !{!0}
+ !0 = !{i32 2, !"Debug Info Version", i32 3}
+ !1 = !DILocation(line: 11, column: 7, scope: !2)
+ !2 = distinct !DISubprogram(name: "foo", scope: !3, file: !3, type: !4, spFlags: DISPFlagDefinition, unit: !6)
+ !3 = !DIFile(filename: "test.f90", directory: "")
+ !4 = !DISubroutineType(cc: DW_CC_program, types: !5)
+ !5 = !{null}
+ !6 = distinct !DICompileUnit(language: DW_LANG_Fortran95, file: !3)
+ !7 = !DIBasicType(name: "integer", size: 32, encoding: DW_ATE_signed)
+ !8 = !DILocalVariable(name: "a", scope: !2, file: !3, type: !7)
+ !9 = !DILocalVariable(name: "b", scope: !2, file: !3, type: !7)
+
+ )ir",
+ Err, Ctx));
+
+ Function *Func = M->getFunction("foo");
+ SmallVector<BasicBlock *, 1> Blocks{getBlockByName(Func, "extract")};
+
+ auto TestExtracted = [&](bool AggregateArgs) {
+ CodeExtractor CE(Blocks, /* DominatorTree */ nullptr, AggregateArgs);
+ EXPECT_TRUE(CE.isEligible());
+ CodeExtractorAnalysisCache CEAC(*Func);
+ SetVector<Value *> Inputs, Outputs, SinkingCands, HoistingCands;
+ BasicBlock *CommonExit = nullptr;
+ CE.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
+ CE.findInputsOutputs(Inputs, Outputs, SinkingCands);
+ Function *Outlined = CE.extractCodeRegion(CEAC, Inputs, Outputs);
+ Outlined->dump();
+ EXPECT_TRUE(Outlined);
+ BasicBlock &EB = Outlined->getEntryBlock();
+ Instruction *Term = EB.getTerminator();
+ EXPECT_TRUE(Term);
+ EXPECT_TRUE(Term->hasDbgRecords());
+ for (DbgVariableRecord &DVR : filterDbgVars(Term->getDbgRecordRange())) {
+ DILocalVariable *Var = DVR.getVariable();
+ EXPECT_TRUE(Var);
+ if (DVR.isDbgDeclare())
+ EXPECT_TRUE(Var->getName() == "a");
+ else
+ EXPECT_TRUE(Var->getName() == "b");
+ for (Value *Loc : DVR.location_ops()) {
+ if (Instruction *I = dyn_cast<Instruction>(Loc))
+ EXPECT_TRUE(I->getParent() == &EB);
+ else if (Argument *A = dyn_cast<Argument>(Loc))
+ EXPECT_TRUE(A->getParent() == Outlined);
+ }
+ }
+ EXPECT_FALSE(verifyFunction(*Outlined));
+ };
+ // Test with both true and false for AggregateArgs.
+ TestExtracted(true);
+ TestExtracted(false);
+ EXPECT_FALSE(verifyFunction(*Func));
+
+}
+
} // end anonymous namespace
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
✅ With the latest revision this PR passed the undef deprecator. |
If we use CodeExtractor to extract the block1 into a new function, it will look like the extracted function shown below (with some irrelevent details removed). define void @foo() !dbg !2 { entry: %1 = alloca i32, i64 1, align 4 %2 = alloca i32, i64 1, align 4 #dbg_declare(ptr %1, !8, !DIExpression(), !1) br label %block1 block1: store i32 1, ptr %1, align 4 store i32 2, ptr %2, align 4 #dbg_declare(ptr %2, !10, !DIExpression(), !1) ret void } define internal void @Extracted(ptr %arg0, ptr %arg1) { newFuncRoot: br label %block1 block1: store i32 1, ptr %arg0, align 4 store i32 2, ptr %arg1, align 4 ret void } You will notice that it has replaced the usage of values that were in the parent function (%1 and %2) with the arguments to the new function. But it did not do the same thing with #dbg_declare which was simply dropped because its location pointed to a value outside of the new function. Similarly arg0 is without any debug record, although the value that it replaced had one and we could materealize one for it based on that. This is not just a theoratical limitations. CodeExtractor is used to create functions that implmenent many of the OpenMP constructs in OMPIRBuilder. As a result of these limitations, the debug information is missing from the created functions. This PR tries to address this problem. It iterates over the input to the extraced function and looks at their debug uses. If they were present in the new function, it updates their location. Otherwise it materealize a similar usage in the new function. Most of these changes are localised in fixupDebugInfoPostExtraction. Only other change is to propogate function inputs and the replacement values to the it.
Co-authored-by: Tim Gymnich <tim@gymni.ch>
Co-authored-by: Tim Gymnich <tim@gymni.ch>
Change the type used of a parameter.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
|
||
if (Argument *Arg = dyn_cast<Argument>(Location)) | ||
return Arg->getParent() != &NewFunc; | ||
else if (Instruction *LocationInst = dyn_cast<Instruction>(Location)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
else if (Instruction *LocationInst = dyn_cast<Instruction>(Location)) | |
if (Instruction *LocationInst = dyn_cast<Instruction>(Location)) |
/// 1. If a debug record points to a value that has been replaced, update the | ||
/// record to use the new value. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
/// 1. If a debug record points to a value that has been replaced, update the | |
/// record to use the new value. | |
/// 1. If a debug record points to a value that has been replaced, update the | |
/// record to use the new value. |
[nit] indention
if (DR->getParent()->getParent() == &NewFunc) | ||
DR->replaceVariableLocationOp(OldLoc, NewLoc); | ||
else { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if (DR->getParent()->getParent() == &NewFunc) | |
DR->replaceVariableLocationOp(OldLoc, NewLoc); | |
else { | |
if (DR->getParent()->getParent() == &NewFunc) { | |
DR->replaceVariableLocationOp(OldLoc, NewLoc); | |
return; | |
} |
https://llvm.org/docs/CodingStandards.html#use-early-exits-and-continue-to-simplify-code
if (Declare) | ||
DIB.insertDeclare(NewLoc, DR->getVariable(), Expr, DR->getDebugLoc(), | ||
&NewFunc.getEntryBlock()); | ||
else |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if (Declare) | |
DIB.insertDeclare(NewLoc, DR->getVariable(), Expr, DR->getDebugLoc(), | |
&NewFunc.getEntryBlock()); | |
else | |
if (Declare) { | |
DIB.insertDeclare(NewLoc, DR->getVariable(), Expr, DR->getDebugLoc(), | |
&NewFunc.getEntryBlock()); | |
return; | |
} |
llvm.org/docs/CodingStandards.html#use-early-exits-and-continue-to-simplify-code
Fix style and formatting issues.
If we use
CodeExtractor
to extract the block1 into a new function,it will look like the extracted function shown below (with some irrelevent details removed).
You will notice that it has replaced the usage of values that were in the parent function (%1 and %2) with the arguments to the new function. But it did not do the same thing with
#dbg_declare
which was simply dropped because its location pointed to a value outside of the new function. Similarly arg0 is without any debug record, although the value that it replaced had one and we could materialize one for it based on that.This is not just a theoretical limitations.
CodeExtractor
is used to create functions that implement many of theOpenMP
constructs inOMPIRBuilder
. As a result of these limitations, the debug information is missing from the created functions.This PR tries to address this problem. It iterates over the input to the extracted function and looks at their debug uses. If they were present in the new function, it updates their location. Otherwise it materialize a similar usage in the new function.
Most of these changes are localized in
fixupDebugInfoPostExtraction
. Only other change is to propagate function inputs and the replacement values to it.