-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[Sanitizer] Use % patterns in report paths #141820
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-compiler-rt-sanitizer Author: Ellis Hoag (ellishg) ChangesSupport llvm-project/compiler-rt/lib/profile/InstrProfilingFile.c Lines 999 to 1017 in 4bf67cd
In particular, the Also, avoid using Full diff: https://github.com/llvm/llvm-project/pull/141820.diff 3 Files Affected:
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_file.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_file.cpp
index 96af270f9d8b5..b55c4ff093c1b 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_file.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_file.cpp
@@ -96,12 +96,75 @@ static void RecursiveCreateParentDirs(char *path) {
}
}
+/// Parse the report path \p pattern and copy the parsed path to \p dest.
+///
+/// * `%%` becomes `%`
+/// * `%H` expands to the environment variable `HOME`
+/// * `%t` expands to the environment variable `TMPDIR`
+/// * `%p` expands to the process ID (PID)
+static void ParseAndSetPath(const char *pattern, char *dest,
+ const uptr dest_size) {
+ CHECK(pattern);
+ CHECK(dest);
+ CHECK_GT(dest_size, 1);
+ dest[0] = '\0';
+ uptr next_substr_start_idx = 0;
+ for (uptr i = 0; i < internal_strlen(pattern) - 1; i++) {
+ if (pattern[i] != '%')
+ continue;
+ int bytes_to_copy = i - next_substr_start_idx;
+ // Copy over previous substring.
+ CHECK_LT(internal_strlcat(dest, pattern + next_substr_start_idx,
+ internal_strlen(dest) + bytes_to_copy + 1),
+ dest_size);
+ const char *str_to_concat;
+ switch (pattern[++i]) {
+ case '%':
+ str_to_concat = "%";
+ break;
+ case 'H':
+ str_to_concat = GetEnv("HOME");
+ break;
+ case 't':
+ str_to_concat = GetEnv("TMPDIR");
+ break;
+ case 'p': {
+ // Use printf directly to write the PID since it's not a static string.
+ int remaining_capacity = dest_size - internal_strlen(dest);
+ int bytes_copied =
+ internal_snprintf(dest + internal_strlen(dest), remaining_capacity,
+ "%ld", internal_getpid());
+ CHECK_GT(bytes_copied, 0);
+ CHECK_LT(bytes_copied, remaining_capacity);
+ str_to_concat = "";
+ break;
+ }
+ default: {
+ // Invalid pattern: fallback to original pattern.
+ const char *message = "ERROR: Unexpected pattern: ";
+ WriteToFile(kStderrFd, message, internal_strlen(message));
+ WriteToFile(kStderrFd, pattern, internal_strlen(pattern));
+ WriteToFile(kStderrFd, "\n", internal_strlen("\n"));
+ CHECK_LT(internal_strlcpy(dest, pattern, dest_size), dest_size);
+ return;
+ }
+ }
+ CHECK(str_to_concat);
+ CHECK_LT(internal_strlcat(dest, str_to_concat, dest_size), dest_size);
+ next_substr_start_idx = i + 1;
+ }
+ CHECK_LT(internal_strlcat(dest, pattern + next_substr_start_idx, dest_size),
+ dest_size);
+}
+
void ReportFile::SetReportPath(const char *path) {
if (path) {
uptr len = internal_strlen(path);
if (len > sizeof(path_prefix) - 100) {
- Report("ERROR: Path is too long: %c%c%c%c%c%c%c%c...\n", path[0], path[1],
- path[2], path[3], path[4], path[5], path[6], path[7]);
+ const char *message = "ERROR: Path is too long: ";
+ WriteToFile(kStderrFd, message, internal_strlen(message));
+ WriteToFile(kStderrFd, path, 8);
+ WriteToFile(kStderrFd, "\n", internal_strlen("\n"));
Die();
}
}
@@ -115,7 +178,7 @@ void ReportFile::SetReportPath(const char *path) {
} else if (internal_strcmp(path, "stdout") == 0) {
fd = kStdoutFd;
} else {
- internal_snprintf(path_prefix, kMaxPathLength, "%s", path);
+ ParseAndSetPath(path, path_prefix, kMaxPathLength);
RecursiveCreateParentDirs(path_prefix);
}
}
diff --git a/compiler-rt/test/sanitizer_common/TestCases/Posix/sanitizer_set_report_path_fail.cpp b/compiler-rt/test/sanitizer_common/TestCases/Posix/sanitizer_set_report_path_fail.cpp
new file mode 100644
index 0000000000000..c86e1e7e25522
--- /dev/null
+++ b/compiler-rt/test/sanitizer_common/TestCases/Posix/sanitizer_set_report_path_fail.cpp
@@ -0,0 +1,19 @@
+// RUN: %clangxx -O2 %s -o %t
+// RUN: not %env %run %t 2>&1 | FileCheck %s --check-prefix=ERROR1
+// RUN: not %env %run %t A 2>&1 | FileCheck %s --check-prefix=ERROR2
+
+#include <sanitizer/common_interface_defs.h>
+#include <stdio.h>
+
+int main(int argc, char **argv) {
+ char buff[4096];
+ if (argc == 1) {
+ // Try setting again with an invalid/inaccessible directory.
+ sprintf(buff, "%s/report", argv[0]);
+ // ERROR1: Can't create directory: {{.*}}
+ } else {
+ snprintf(buff, sizeof(buff), "%04095d", 42);
+ // ERROR2: Path is too long: 00000000
+ }
+ __sanitizer_set_report_path(buff);
+}
diff --git a/compiler-rt/test/sanitizer_common/TestCases/Posix/sanitizer_set_report_path_test.cpp b/compiler-rt/test/sanitizer_common/TestCases/Posix/sanitizer_set_report_path_test.cpp
index ab1017a2efc07..9d7ed80b44ccb 100644
--- a/compiler-rt/test/sanitizer_common/TestCases/Posix/sanitizer_set_report_path_test.cpp
+++ b/compiler-rt/test/sanitizer_common/TestCases/Posix/sanitizer_set_report_path_test.cpp
@@ -1,27 +1,42 @@
// Test __sanitizer_set_report_path and __sanitizer_get_report_path:
// RUN: %clangxx -O2 %s -o %t
-// RUN: not %run %t 2>&1 | FileCheck %s
+// RUN: %env HOME=%t.homedir TMPDIR=%t.tmpdir %run %t 2>%t.err | FileCheck %s
+// RUN: FileCheck %s --input-file=%t.err --check-prefix=ERROR
-#include <assert.h>
#include <sanitizer/common_interface_defs.h>
#include <stdio.h>
#include <string.h>
-volatile int *null = 0;
-
int main(int argc, char **argv) {
- char buff[1000];
+ char buff[4096];
sprintf(buff, "%s.report_path/report", argv[0]);
__sanitizer_set_report_path(buff);
- assert(strncmp(buff, __sanitizer_get_report_path(), strlen(buff)) == 0);
+ // CHECK: {{.*}}.report_path/report.[[PID:[0-9]+]]
+ printf("%s\n", __sanitizer_get_report_path());
- // Try setting again with an invalid/inaccessible directory.
- char buff_bad[1000];
- sprintf(buff_bad, "%s/report", argv[0]);
- fprintf(stderr, "Expected bad report path: %s\n", buff_bad);
- // CHECK: Expected bad report path: [[BADPATH:.*]]/report
- __sanitizer_set_report_path(buff_bad);
- assert(strncmp(buff, __sanitizer_get_report_path(), strlen(buff)) == 0);
-}
+ strcpy(buff, "%H/foo");
+ __sanitizer_set_report_path(buff);
+ // CHECK: [[T:.*]].homedir/foo.[[PID]]
+ printf("%s\n", __sanitizer_get_report_path());
-// CHECK: ERROR: Can't create directory: [[BADPATH]]
+ strcpy(buff, "%t/foo");
+ __sanitizer_set_report_path(buff);
+ // CHECK: [[T]].tmpdir/foo.[[PID]]
+ printf("%s\n", __sanitizer_get_report_path());
+
+ strcpy(buff, "%H/%p/%%foo");
+ __sanitizer_set_report_path(buff);
+ // CHECK: [[T]].homedir/[[PID]]/%foo.[[PID]]
+ printf("%s\n", __sanitizer_get_report_path());
+
+ strcpy(buff, "%%foo%%bar");
+ __sanitizer_set_report_path(buff);
+ // CHECK: %foo%bar.[[PID]]
+ printf("%s\n", __sanitizer_get_report_path());
+
+ strcpy(buff, "%%foo%ba%%r");
+ __sanitizer_set_report_path(buff);
+ // ERROR: Unexpected pattern: %%foo%ba%%r
+ // CHECK: %%foo%ba%%r.[[PID]]
+ printf("%s\n", __sanitizer_get_report_path());
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks! Mostly looks good, I just have a few questions/suggestions below.
@@ -0,0 +1,19 @@ | |||
// RUN: %clangxx -O2 %s -o %t | |||
// RUN: not %env %run %t 2>&1 | FileCheck %s --check-prefix=ERROR1 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
add comment as to what each of these invocations is testing
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added a comment next to the snprintf()
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks but I meant up here - could you also add a 1-sentence comment before each run line as to what it is testing? (Reading the test from the top down I was initially confused about the "A" argument)
@@ -96,12 +96,75 @@ static void RecursiveCreateParentDirs(char *path) { | |||
} | |||
} | |||
|
|||
/// Parse the report path \p pattern and copy the parsed path to \p dest. | |||
/// | |||
/// * `%%` becomes `%` |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why support this one? "%" isn't a common filename character
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just for completeness. Is there any reason we shouldn't handle this case?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It just didn't seem like it would be useful in practice, but I'm not strongly opposed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
IMO having this for completeness is a good idea. As a user it would be infuriating if we didn't if I happened to need it for whatever reason.
/// Parse the report path \p pattern and copy the parsed path to \p dest. | ||
/// | ||
/// * `%%` becomes `%` | ||
/// * `%H` expands to the environment variable `HOME` |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This one doesn't seem to be supported on the PGO side, why is it more useful here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is actually the main motivation for this PR. I'd like some way to resolve the home directory at runtime. I think it could also be useful on the PGO siee
/// * `%%` becomes `%` | ||
/// * `%H` expands to the environment variable `HOME` | ||
/// * `%t` expands to the environment variable `TMPDIR` | ||
/// * `%p` expands to the process ID (PID) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since the PID is already appended, is it useful?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's true. I mainly implemented this for completeness compared to PGO
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok, again not strongly opposed
// CHECK: %foo%bar.[[PID]] | ||
printf("%s\n", __sanitizer_get_report_path()); | ||
|
||
strcpy(buff, "%%foo%ba%%r"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Suggest removing the "%%" from this one to make it clearer that what is being tested is the "%b".
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we can't parse a path, we fallback to the original path. I want to test that the other %%
isn't parsed to %
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm
@@ -96,12 +96,75 @@ static void RecursiveCreateParentDirs(char *path) { | |||
} | |||
} | |||
|
|||
/// Parse the report path \p pattern and copy the parsed path to \p dest. | |||
/// | |||
/// * `%%` becomes `%` |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It just didn't seem like it would be useful in practice, but I'm not strongly opposed.
/// * `%%` becomes `%` | ||
/// * `%H` expands to the environment variable `HOME` | ||
/// * `%t` expands to the environment variable `TMPDIR` | ||
/// * `%p` expands to the process ID (PID) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok, again not strongly opposed
@@ -0,0 +1,19 @@ | |||
// RUN: %clangxx -O2 %s -o %t | |||
// RUN: not %env %run %t 2>&1 | FileCheck %s --check-prefix=ERROR1 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks but I meant up here - could you also add a 1-sentence comment before each run line as to what it is testing? (Reading the test from the top down I was initially confused about the "A" argument)
const uptr dest_size) { | ||
CHECK(pattern); | ||
CHECK(dest); | ||
CHECK_GT(dest_size, 1); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why GT
and not GE
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If dest_size = 1
then that only leave space for the null byte. I guess an empty path is valid, so maybe I should allow that
} | ||
} | ||
CHECK(str_to_concat); | ||
CHECK_LT(internal_strlcat(dest, str_to_concat, dest_size), dest_size); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Confirming that LT
is because of the trailing \0
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, strlcat returns the size of the final string, not including the null byte.
Looks like this test is failing. I'll try to see if I can forward fix. Otherwise I'll revert |
This test is broken on android by #141820 https://lab.llvm.org/buildbot/#/builders/186/builds/9498 > FileCheck error: '' is empty. I suspect that on android printf only works if its emitted to stderr because this use to work https://github.com/llvm/llvm-project/blob/a2ce5647200ad40ae356affd44db7d054de444d2/compiler-rt/test/sanitizer_common/TestCases/Posix/sanitizer_set_report_path_test.cpp#L21-L22 Only emit to stderr and see if that fixes the test.
This test is broken on android by llvm/llvm-project#141820 https://lab.llvm.org/buildbot/#/builders/186/builds/9498 > FileCheck error: '' is empty. I suspect that on android printf only works if its emitted to stderr because this use to work https://github.com/llvm/llvm-project/blob/a2ce5647200ad40ae356affd44db7d054de444d2/compiler-rt/test/sanitizer_common/TestCases/Posix/sanitizer_set_report_path_test.cpp#L21-L22 Only emit to stderr and see if that fixes the test.
I attempted to fix android tests in #142207 (broken by #141820). They are still failing but now I have more info. https://lab.llvm.org/buildbot/#/builders/186/builds/9504/steps/16/logs/stdio ERROR: Can't open file: //foo.8862 (reason: 30) I believe the reason is that on android the HOME and TMPDIR environment variables are not being set correctly, or they are not read correctly. (#142234 (comment))
I attempted to fix android tests in llvm/llvm-project#142207 (broken by llvm/llvm-project#141820). They are still failing but now I have more info. https://lab.llvm.org/buildbot/#/builders/186/builds/9504/steps/16/logs/stdio ERROR: Can't open file: //foo.8862 (reason: 30) I believe the reason is that on android the HOME and TMPDIR environment variables are not being set correctly, or they are not read correctly. (llvm/llvm-project#142234 (comment))
Support
%
patterns in sanitizer report paths, similar to the patterns supported in IRPGOllvm-project/compiler-rt/lib/profile/InstrProfilingFile.c
Lines 999 to 1017 in 4bf67cd
%%
becomes%
%H
expands to the environment variableHOME
%t
expands to the environment variableTMPDIR
%p
expands to the process ID (PID)In particular, the
%H
pattern is useful to resolve the home directory at runtime, which is not possible before this PR.Also, avoid using
Report()
before the report path has been set.