Suppress compiler optimizations which may break container poisoning.

Many compiler optimizations, even some in Asan, assume that size of the object on the stack is fixed. Compiler takes into account the lifetime of the stack object, but it does not expect that size of the object can change. This assumption allow to accesses when ever object is alive. However, this assumption does not work well with custom poisoning of objects by sanitizers. `Cord` may poison parts of the object to improve a precision of the bug detection. But compiler still may load from poisoned parts of the object, assuming it's alive. So without the patch we can have `false-positive`, because compiler reordered load from poisoned local before size-check. llvm/llvm-project#100639 Alternative is to make compiler optimization to know about possibility of local object resize, but it's not worth of effort, as it's needed only for sanitizers so far. Another minor issue fixed with this patch is `false-negative`. llvm/llvm-project#100640 Asan drops instrumentation of memory accesses proven to be inbound of local variable. However if we if we take into account `custom poisoning` we will have to pessimize too often: any random function call can potentially `resize` a variable by `poisoning`. We are already using similar `poisoning` workaround in libc++: https://github.com/llvm/llvm-project/pull/79536/files#diff-534bc2907ddb3b074ded1353d18fd7d578daf1707943b3039bab4ed975aba3b3R772 PiperOrigin-RevId: 656129711 Change-Id: I6d78997da6d31c7ab979a00b84dc9b3b7cffc26f
abseil · Jul 25, 2024 · e342b7f · e342b7f
1 parent bd3ae17
commit e342b7f
Show file tree

Hide file tree

Showing 2 changed files with 52 additions and 13 deletions.
diff --git a/absl/strings/cord_test.cc b/absl/strings/cord_test.cc
@@ -3278,6 +3278,26 @@ TEST_P(CordTest, ChecksummedEmptyCord) {
   EXPECT_EQ(absl::HashOf(c3), absl::HashOf(absl::string_view()));
 }
 
+// This must not be static to avoid aggressive optimizations.
+ABSL_ATTRIBUTE_WEAK
+size_t FalseReport(const absl::Cord& a, bool f);
+
+ABSL_ATTRIBUTE_NOINLINE
+size_t FalseReport(const absl::Cord& a, bool f) {
+  absl::Cord b;
+  const absl::Cord& ref = f ? b : a;
+  // Test that sanitizers report nothing here. Without
+  // InlineData::Rep::annotated_this() compiler can unconditionally load
+  // poisoned parts, assuming that local variable is fully accessible.
+  return ref.size();
+}
+
+TEST(CordSanitizerTest, SanitizesCordFalseReport) {
+  absl::Cord c;
+  for (int i = 0; i < 1000; ++i) c.Append("a");
+  FalseReport(c, false);
+}
+
 TEST(CrcCordTest, ChecksummedEmptyCordEstimateMemoryUsage) {
   absl::Cord cord;
   cord.SetExpectedChecksum(0);

diff --git a/absl/strings/internal/cord_internal.h b/absl/strings/internal/cord_internal.h
@@ -713,35 +713,54 @@ class InlineData {
                GetOrNull(chars, 13),
                GetOrNull(chars, 14)} {}
 
+#ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
+    // Break compiler optimization for cases when value is allocated on the
+    // stack. Compiler assumes that the the variable is fully accessible
+    // regardless of our poisoning.
+    // False report: https://github.com/llvm/llvm-project/issues/100639
+    // Missing report: https://github.com/llvm/llvm-project/issues/100640
+    const Rep* self() const {
+      const Rep* volatile ptr = this;
+      return ptr;
+    }
+    Rep* self() {
+      Rep* volatile ptr = this;
+      return ptr;
+    }
+#else
+    constexpr const Rep* self() const { return this; }
+    constexpr Rep* self() { return this; }
+#endif
+
     // Disable sanitizer as we must always be able to read `tag`.
     ABSL_CORD_INTERNAL_NO_SANITIZE
     int8_t tag() const { return reinterpret_cast<const int8_t*>(this)[0]; }
-    void set_tag(int8_t rhs) { reinterpret_cast<int8_t*>(this)[0] = rhs; }
+    void set_tag(int8_t rhs) { reinterpret_cast<int8_t*>(self())[0] = rhs; }
 
-    char* as_chars() { return data + 1; }
-    const char* as_chars() const { return data + 1; }
+    char* as_chars() { return self()->data + 1; }
+    const char* as_chars() const { return self()->data + 1; }
 
-    bool is_tree() const { return (tag() & 1) != 0; }
+    bool is_tree() const { return (self()->tag() & 1) != 0; }
 
     size_t inline_size() const {
-      ABSL_ASSERT(!is_tree());
-      return static_cast<size_t>(tag()) >> 1;
+      ABSL_ASSERT(!self()->is_tree());
+      return static_cast<size_t>(self()->tag()) >> 1;
     }
 
     void set_inline_size(size_t size) {
       ABSL_ASSERT(size <= kMaxInline);
-      set_tag(static_cast<int8_t>(size << 1));
+      self()->set_tag(static_cast<int8_t>(size << 1));
     }
 
-    CordRep* tree() const { return as_tree.rep; }
-    void set_tree(CordRep* rhs) { as_tree.rep = rhs; }
+    CordRep* tree() const { return self()->as_tree.rep; }
+    void set_tree(CordRep* rhs) { self()->as_tree.rep = rhs; }
 
-    cordz_info_t cordz_info() const { return as_tree.cordz_info; }
-    void set_cordz_info(cordz_info_t rhs) { as_tree.cordz_info = rhs; }
+    cordz_info_t cordz_info() const { return self()->as_tree.cordz_info; }
+    void set_cordz_info(cordz_info_t rhs) { self()->as_tree.cordz_info = rhs; }
 
     void make_tree(CordRep* tree) {
-      as_tree.rep = tree;
-      as_tree.cordz_info = kNullCordzInfo;
+      self()->as_tree.rep = tree;
+      self()->as_tree.cordz_info = kNullCordzInfo;
     }
 
 #ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER