-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[BOLT] Add dump-dot-func option for selective function CFG dumping #153007
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-bolt Author: (yafet-a) ChangesChange:
Usage:
Full diff: https://github.com/llvm/llvm-project/pull/153007.diff 6 Files Affected:
diff --git a/bolt/docs/CommandLineArgumentReference.md b/bolt/docs/CommandLineArgumentReference.md
index f3881c9a640a9..de54184ff8db1 100644
--- a/bolt/docs/CommandLineArgumentReference.md
+++ b/bolt/docs/CommandLineArgumentReference.md
@@ -138,6 +138,11 @@
Dump function CFGs to graphviz format after each stage;enable '-print-loops'
for color-coded blocks
+- `--dump-dot-func=<func1,func2,func3...>`
+
+ Dump function CFGs to graphviz format for specified functions only;
+ takes function name patterns (regex supported)
+
- `--dump-linux-exceptions`
Dump Linux kernel exception table
diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h b/bolt/include/bolt/Utils/CommandLineOpts.h
index a75b6bf720ec4..859d6f3bf6774 100644
--- a/bolt/include/bolt/Utils/CommandLineOpts.h
+++ b/bolt/include/bolt/Utils/CommandLineOpts.h
@@ -15,6 +15,12 @@
#include "llvm/Support/CommandLine.h"
+namespace llvm {
+namespace bolt {
+class BinaryFunction;
+}
+} // namespace llvm
+
namespace opts {
enum HeatmapModeKind {
@@ -100,6 +106,9 @@ extern llvm::cl::opt<unsigned> Verbosity;
/// Return true if we should process all functions in the binary.
bool processAllFunctions();
+/// Return true if we should dump dot graphs for the given function.
+bool shouldDumpDot(const llvm::bolt::BinaryFunction &Function);
+
enum GadgetScannerKind { GS_PACRET, GS_PAUTH, GS_ALL };
extern llvm::cl::bits<GadgetScannerKind> GadgetScannersToRun;
diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp
index 996d2e972599d..0ddb73f828878 100644
--- a/bolt/lib/Rewrite/BinaryPassManager.cpp
+++ b/bolt/lib/Rewrite/BinaryPassManager.cpp
@@ -52,6 +52,7 @@ namespace opts {
extern cl::opt<bool> PrintAll;
extern cl::opt<bool> PrintDynoStats;
extern cl::opt<bool> DumpDotAll;
+extern bool shouldDumpDot(const bolt::BinaryFunction &Function);
extern cl::opt<std::string> AsmDump;
extern cl::opt<bolt::PLTCall::OptType> PLT;
extern cl::opt<bolt::IdenticalCodeFolding::ICFLevel, false,
@@ -340,7 +341,7 @@ Error BinaryFunctionPassManager::runPasses() {
Function.print(BC.outs(), Message);
- if (opts::DumpDotAll)
+ if (opts::shouldDumpDot(Function))
Function.dumpGraphForPass(PassIdName);
}
}
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index fe4a23cc01382..69dcc8ce08a5b 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -114,6 +114,35 @@ cl::opt<bool> DumpDotAll(
"enable '-print-loops' for color-coded blocks"),
cl::Hidden, cl::cat(BoltCategory));
+cl::list<std::string> DumpDotFunc(
+ "dump-dot-func", cl::CommaSeparated,
+ cl::desc(
+ "dump function CFGs to graphviz format for specified functions only;"
+ "takes function name patterns (regex supported)"),
+ cl::value_desc("func1,func2,func3,..."), cl::Hidden, cl::cat(BoltCategory));
+
+bool shouldDumpDot(const bolt::BinaryFunction &Function) {
+ // If dump-dot-all is enabled, dump all functions
+ if (DumpDotAll)
+ return !Function.isIgnored();
+
+ // If no specific functions specified in dump-dot-func, don't dump any
+ if (DumpDotFunc.empty())
+ return false;
+
+ if (Function.isIgnored())
+ return false;
+
+ // Check if function matches any of the specified patterns
+ for (const std::string &Name : DumpDotFunc) {
+ if (Function.hasNameRegex(Name)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
static cl::list<std::string>
ForceFunctionNames("funcs",
cl::CommaSeparated,
@@ -3570,7 +3599,7 @@ void RewriteInstance::postProcessFunctions() {
if (opts::PrintAll || opts::PrintCFG)
Function.print(BC->outs(), "after building cfg");
- if (opts::DumpDotAll)
+ if (opts::shouldDumpDot(Function))
Function.dumpGraphForPass("00_build-cfg");
if (opts::PrintLoopInfo) {
diff --git a/bolt/test/Inputs/multi-func.c b/bolt/test/Inputs/multi-func.c
new file mode 100644
index 0000000000000..622cc285570e0
--- /dev/null
+++ b/bolt/test/Inputs/multi-func.c
@@ -0,0 +1,24 @@
+#include <stdio.h>
+
+// Multiple functions to test selective dumping
+int add(int a, int b) { return a + b; }
+
+int multiply(int a, int b) { return a * b; }
+
+int main_helper() {
+ printf("Helper function\n");
+ return 42;
+}
+
+int main_secondary() { return add(5, 3); }
+
+void other_function() { printf("Other function\n"); }
+
+int main() {
+ int result = add(10, 20);
+ result = multiply(result, 2);
+ main_helper();
+ main_secondary();
+ other_function();
+ return result;
+}
\ No newline at end of file
diff --git a/bolt/test/dump-dot-func.test b/bolt/test/dump-dot-func.test
new file mode 100644
index 0000000000000..3b62130ef1f6d
--- /dev/null
+++ b/bolt/test/dump-dot-func.test
@@ -0,0 +1,43 @@
+# Test the --dump-dot-func option with multiple functions
+
+RUN: %clang %p/Inputs/multi-func.c -o %t.exe -Wl,-q
+
+# Test 1: --dump-dot-func with specific function name
+RUN: llvm-bolt %t.exe -o %t.bolt1 --dump-dot-func=add --print-only=add -v=1 2>&1 | FileCheck %s --check-prefix=ADD
+RUN: ls add-*.dot | wc -l | FileCheck %s --check-prefix=COUNT-ONE
+
+# Test 2: --dump-dot-func with regex pattern (main.*)
+RUN: llvm-bolt %t.exe -o %t.bolt2 --dump-dot-func="main.*" --print-only=main,main_helper,main_secondary -v=1 2>&1 | FileCheck %s --check-prefix=MAIN-REGEX
+
+# Test 3: --dump-dot-func with multiple specific functions
+RUN: llvm-bolt %t.exe -o %t.bolt3 --dump-dot-func=add,multiply --print-only=add,multiply -v=1 2>&1 | FileCheck %s --check-prefix=MULTI
+
+# Test 4: No option specified should create no dot files
+RUN: llvm-bolt %t.exe -o %t.bolt4 --print-only=main 2>&1 | FileCheck %s --check-prefix=NONE
+
+# Test 5: --dump-dot-func with non-existent function
+RUN: llvm-bolt %t.exe -o %t.bolt5 --dump-dot-func=nonexistent --print-only=main -v=1 2>&1 | FileCheck %s --check-prefix=NONEXISTENT
+
+# Test 6: Backward compatibility - --dump-dot-all should still work
+RUN: llvm-bolt %t.exe -o %t.bolt6 --dump-dot-all --print-only=main -v=1 2>&1 | FileCheck %s --check-prefix=ALL
+
+# Check that specific functions are dumped
+ADD: BOLT-INFO: dumping CFG to add-00_build-cfg.dot
+
+MAIN-REGEX-DAG: BOLT-INFO: dumping CFG to main-00_build-cfg.dot
+MAIN-REGEX-DAG: BOLT-INFO: dumping CFG to main_helper-00_build-cfg.dot
+MAIN-REGEX-DAG: BOLT-INFO: dumping CFG to main_secondary-00_build-cfg.dot
+
+MULTI-DAG: BOLT-INFO: dumping CFG to add-00_build-cfg.dot
+MULTI-DAG: BOLT-INFO: dumping CFG to multiply-00_build-cfg.dot
+
+# Should be no dumping messages when no option is specified
+NONE-NOT: BOLT-INFO: dumping CFG
+
+# Should be no dumping messages for non-existent function
+NONEXISTENT-NOT: BOLT-INFO: dumping CFG
+
+ALL: BOLT-INFO: dumping CFG to main-00_build-cfg.dot
+
+# Count checks
+COUNT-ONE: 1
\ No newline at end of file
|
I think this will be a very useful feature. For C++, name mangling might be slightly annoying, but I guess it is what it is and the mangled name needs to be provided. Anyway, a test-case for C++ might be good. |
…mangled names work
Thanks for the suggestion - I have added a cpp test case and updated the tests to verify that passing in the mangled names works correctly. For the matter of allowing unmangled names afaiu this seems to be a present limitation in the other bolt flags that pass in multiple functions and use There are ways to work around this limitation and get cpp mangled names recognised, such as using substring matching with the mangled names (e.g., If the desire to support unmangled names for C++ functions is high enough, this could be addressed in a separate PR and applied consistently across all the function selection flags. However, given that this hasn't been implemented for the existing flags yet, I suspect it may not be a high priority for most users. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looking a bit closer, I now see that options --dump-dot-func
and --print-only
go hand in hand. It looks like --dump-dot piggy backs on --print, which is fine, but having to specify both looks inconvenient for a user, and also error prone.
So, my question is, can we only use --dump-dot-func to dump a CFG dot file if we want that?
bolt/test/dump-dot-func.test
Outdated
RUN: ls add-*.dot | wc -l | FileCheck %s --check-prefix=COUNT-ONE | ||
# Test 1: --dump-dot-func with specific function name (mangled) | ||
RUN: llvm-bolt %t.exe -o %t.bolt1 --dump-dot-func=_Z3addii --print-only=_Z3addii -v=1 2>&1 | FileCheck %s --check-prefix=ADD | ||
RUN: ls _Z3addii-*.dot | wc -l | FileCheck %s --check-prefix=COUNT-ONE |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see that we are checking for the existing of exactly 1 dot file here, but can we not do this without tools ls and wc?
COUNT-ONE: dumping CFG to _Z3addii.dot
COUNT-ONE-NOT: dumping CFG
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This can also be done with test -f if that is preferred. Since the test file is small enough, we can test -f the dot files we expect and test ! -f the ones we don't expect and ensure those checks are being met. Is this preferable? I have provided it in the latest commit da220c5
bolt/test/dump-dot-func.test
Outdated
RUN: llvm-bolt %t.exe -o %t.bolt4 --print-only=main 2>&1 | FileCheck %s --check-prefix=NONE | ||
|
||
# Test 5: --dump-dot-func with non-existent function | ||
RUN: llvm-bolt %t.exe -o %t.bolt5 --dump-dot-func=nonexistent --print-only=main -v=1 2>&1 | FileCheck %s --check-prefix=NONEXISTENT |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is the current behaviour if a non-existent function is provided, e.g. a user typo? Is this silently ignored? For bonus points we could emit a diagnostic.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, currently it just silently ignores it. I can add a warning to inform the user but assumed they would know from the lack of .dot file being created
Yes, just dump-dot-func alone is sufficient to dump a CFG dot file and it doesn't rely on the implementation of print-only The inclusion of the print-only flag in the tests is just so that the dot files generated by the tests don't clutter the test directory. Hence the tests check for the diagnostic tldr: If print-only is enabled, dot files aren't outputted so leads to less clutter in test directory void BinaryFunction::dumpGraphForPass(std::string Annotation) const {
if (!opts::shouldPrint(*this))
return; // exits early here and hence doesnt call dumpGraphToFile hence no .dot file created
std::string Filename = constructFilename(getPrintName(), Annotation, ".dot");
if (opts::Verbosity >= 1)
BC.outs() << "BOLT-INFO: dumping CFG to " << Filename << "\n";
dumpGraphToFile(Filename);
}``` |
RUN: %clang++ %p/Inputs/multi-func.cpp -o %t.exe -Wl,-q | ||
|
||
# Test 1: --dump-dot-func with specific function name (mangled) | ||
RUN: rm -f *.dot |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You're using a lot of linux commands in your tests, like rm
and test
. Ideally, we just pattern match the output of the compiler, and not have all these dependencies. So, first, I don't think you need to clean up temp files, so you can omit the rm
. Second, can we not simply match BOLT-INFO: dumping CFG
for all of these tests and the -NOT variant for the negative tests?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I had assumed it was ok given other tests with REQUIRED: system-linux had them. But I see now how simple pattern matching is preferred. Thanks, I've made the update
Change:
--dump-dot-func
command-line option that allows users to dump CFGs only for specific functions instead of dumping all functions (the current only available option being--dump-dot-all
)Usage:
--dump-dot-func=main,helper
or--dump-dot-func="init.*
") to generate .dot files only for functions of interestTesting
The introduced test
dump-dot-func.test
confirms the new option does the following:dump-dot-func
can correctly filter a specified functionsdump-dot-all
continues to work as expected