From 313fca6520b43d95abb73e7c78a252a60ee4cf48 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Wed, 15 Jul 2020 15:42:00 +0200
Subject: [PATCH 1/8] [lldb/test] Remove JOIN_CMD from Makefile.rules

It's possible to achieve the same effect by providing multi-step recipe
instead of a single-step recipe where the step happens to contain
multiple commands.
---
 .../Python/lldbsuite/test/make/Makefile.rules | 34 +++++++++----------
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/lldb/packages/Python/lldbsuite/test/make/Makefile.rules b/lldb/packages/Python/lldbsuite/test/make/Makefile.rules
index 5316c51899c7a8..b9a6937650d058 100644
--- a/lldb/packages/Python/lldbsuite/test/make/Makefile.rules
+++ b/lldb/packages/Python/lldbsuite/test/make/Makefile.rules
@@ -82,11 +82,9 @@ endif
 # we strictly required double-quotes
 #----------------------------------------------------------------------
 ifeq "$(HOST_OS)" "Windows_NT"
-	JOIN_CMD = &
 	QUOTE = "
 	FIXUP_SYNTAX_HIGHLIGHTING_IN_MY_EDITOR = "
 else
-	JOIN_CMD = ;
 	QUOTE = '
 	FIXUP_SYNTAX_HIGHLIGHTING_IN_MY_EDITOR = '
 endif
@@ -729,28 +727,28 @@ endif
 # and the -MM option will list all non-system dependencies.
 #----------------------------------------------------------------------
 %.d: %.c
-	@rm -f $@ $(JOIN_CMD) \
-	$(CC) -M $(CFLAGS) $< > $@.tmp && \
-	sed $(QUOTE)s,\($*\)\.o[ :]*,\1.o $@ : ,g$(QUOTE) < $@.tmp > $@ $(JOIN_CMD) \
-	rm -f $@.tmp
+	@rm -f $@
+	@$(CC) -M $(CFLAGS) $< > $@.tmp && \
+	sed $(QUOTE)s,\($*\)\.o[ :]*,\1.o $@ : ,g$(QUOTE) < $@.tmp > $@
+	@rm -f $@.tmp
 
 %.d: %.cpp
-	@rm -f $@ $(JOIN_CMD) \
-	$(CXX) -M $(CXXFLAGS) $< > $@.tmp && \
-	sed $(QUOTE)s,\($*\)\.o[ :]*,\1.o $@ : ,g$(QUOTE) < $@.tmp > $@ $(JOIN_CMD) \
-	rm -f $@.tmp
+	@rm -f $@
+	@$(CXX) -M $(CXXFLAGS) $< > $@.tmp && \
+	sed $(QUOTE)s,\($*\)\.o[ :]*,\1.o $@ : ,g$(QUOTE) < $@.tmp > $@
+	@rm -f $@.tmp
 
 %.d: %.m
-	@rm -f $@ $(JOIN_CMD) \
-	$(CC) -M $(CFLAGS) $< > $@.tmp && \
-	sed $(QUOTE)s,\($*\)\.o[ :]*,\1.o $@ : ,g$(QUOTE) < $@.tmp > $@ $(JOIN_CMD) \
-	rm -f $@.tmp
+	@rm -f $@
+	@$(CC) -M $(CFLAGS) $< > $@.tmp && \
+	sed $(QUOTE)s,\($*\)\.o[ :]*,\1.o $@ : ,g$(QUOTE) < $@.tmp > $@
+	@rm -f $@.tmp
 
 %.d: %.mm
-	@rm -f $@ $(JOIN_CMD) \
-	$(CXX) -M $(CXXFLAGS) $< > $@.tmp && \
-	sed $(QUOTE)s,\($*\)\.o[ :]*,\1.o $@ : ,g$(QUOTE) < $@.tmp > $@ $(JOIN_CMD) \
-	rm -f $@.tmp
+	@rm -f $@
+	@$(CXX) -M $(CXXFLAGS) $< > $@.tmp && \
+	sed $(QUOTE)s,\($*\)\.o[ :]*,\1.o $@ : ,g$(QUOTE) < $@.tmp > $@
+	@rm -f $@.tmp
 
 #----------------------------------------------------------------------
 # Include all of the makefiles for each source file so we don't have

From 37b96d51d0cfc82a64598aaae2a567fa77e44de9 Mon Sep 17 00:00:00 2001
From: Tim Northover <t.p.northover@gmail.com>
Date: Wed, 15 Jul 2020 09:49:49 +0100
Subject: [PATCH 2/8] CodeGenPrep: remove AssertingVH references before
 deleting dead instructions.

CodeGenPrepare keeps fairly close track of various instructions it's
seen, particularly GEPs, in maps and vectors. However, sometimes those
instructions become dead and get removed while it's still executing.
This triggers AssertingVH references to them in an asserts build and
could lead to miscompiles in a release build (I've only seen a later
segfault though).

So this patch adds a callback to
RecursivelyDeleteTriviallyDeadInstructions which can make sure the
instruction about to be deleted is removed from CodeGenPrepare's data
structures.
---
 llvm/include/llvm/Transforms/Utils/Local.h    | 12 +++--
 llvm/lib/CodeGen/CodeGenPrepare.cpp           | 52 +++++++++++++++----
 llvm/lib/Transforms/Utils/Local.cpp           | 18 +++++--
 .../Transforms/CodeGenPrepare/ARM/dead-gep.ll | 19 +++++++
 4 files changed, 84 insertions(+), 17 deletions(-)
 create mode 100644 llvm/test/Transforms/CodeGenPrepare/ARM/dead-gep.ll

diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h
index f55e336f1f6aa6..3fab3bc21a0785 100644
--- a/llvm/include/llvm/Transforms/Utils/Local.h
+++ b/llvm/include/llvm/Transforms/Utils/Local.h
@@ -160,7 +160,9 @@ bool wouldInstructionBeTriviallyDead(Instruction *I,
 /// recursively. Return true if any instructions were deleted.
 bool RecursivelyDeleteTriviallyDeadInstructions(
     Value *V, const TargetLibraryInfo *TLI = nullptr,
-    MemorySSAUpdater *MSSAU = nullptr);
+    MemorySSAUpdater *MSSAU = nullptr,
+    std::function<void(Value *)> AboutToDeleteCallback =
+        std::function<void(Value *)>());
 
 /// Delete all of the instructions in `DeadInsts`, and all other instructions
 /// that deleting these in turn causes to be trivially dead.
@@ -172,7 +174,9 @@ bool RecursivelyDeleteTriviallyDeadInstructions(
 /// empty afterward.
 void RecursivelyDeleteTriviallyDeadInstructions(
     SmallVectorImpl<WeakTrackingVH> &DeadInsts,
-    const TargetLibraryInfo *TLI = nullptr, MemorySSAUpdater *MSSAU = nullptr);
+    const TargetLibraryInfo *TLI = nullptr, MemorySSAUpdater *MSSAU = nullptr,
+    std::function<void(Value *)> AboutToDeleteCallback =
+        std::function<void(Value *)>());
 
 /// Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow
 /// instructions that are not trivially dead. These will be ignored.
@@ -180,7 +184,9 @@ void RecursivelyDeleteTriviallyDeadInstructions(
 /// were found and deleted.
 bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(
     SmallVectorImpl<WeakTrackingVH> &DeadInsts,
-    const TargetLibraryInfo *TLI = nullptr, MemorySSAUpdater *MSSAU = nullptr);
+    const TargetLibraryInfo *TLI = nullptr, MemorySSAUpdater *MSSAU = nullptr,
+    std::function<void(Value *)> AboutToDeleteCallback =
+        std::function<void(Value *)>());
 
 /// If the specified value is an effectively dead PHI node, due to being a
 /// def-use chain of single-use nodes that either forms a cycle or is terminated
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index e8b8e6c93cf0d2..465ba08dbfcdb5 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -376,6 +376,7 @@ class TypePromotionTransaction;
       return *DT;
     }
 
+    void removeAllAssertingVHReferences(Value *V);
     bool eliminateFallThrough(Function &F);
     bool eliminateMostlyEmptyBlocks(Function &F);
     BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
@@ -383,6 +384,7 @@ class TypePromotionTransaction;
     void eliminateMostlyEmptyBlock(BasicBlock *BB);
     bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
                                        bool isPreheader);
+    bool makeBitReverse(Instruction &I);
     bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT);
     bool optimizeInst(Instruction *I, bool &ModifiedDT);
     bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
@@ -601,6 +603,33 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
   return EverMadeChange;
 }
 
+/// An instruction is about to be deleted, so remove all references to it in our
+/// GEP-tracking data strcutures.
+void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) {
+  LargeOffsetGEPMap.erase(V);
+  NewGEPBases.erase(V);
+
+  auto GEP = dyn_cast<GetElementPtrInst>(V);
+  if (!GEP)
+    return;
+
+  LargeOffsetGEPID.erase(GEP);
+
+  auto VecI = LargeOffsetGEPMap.find(GEP->getPointerOperand());
+  if (VecI == LargeOffsetGEPMap.end())
+    return;
+
+  auto &GEPVector = VecI->second;
+  const auto &I = std::find_if(GEPVector.begin(), GEPVector.end(),
+                               [=](auto &Elt) { return Elt.first == GEP; });
+  if (I == GEPVector.end())
+    return;
+
+  GEPVector.erase(I);
+  if (GEPVector.empty())
+    LargeOffsetGEPMap.erase(VecI);
+}
+
 // Verify BFI has been updated correctly by recomputing BFI and comparing them.
 void LLVM_ATTRIBUTE_UNUSED CodeGenPrepare::verifyBFIUpdates(Function &F) {
   DominatorTree NewDT(F);
@@ -5242,7 +5271,9 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
     WeakTrackingVH IterHandle(CurValue);
     BasicBlock *BB = CurInstIterator->getParent();
 
-    RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo);
+    RecursivelyDeleteTriviallyDeadInstructions(
+        Repl, TLInfo, nullptr,
+        [&](Value *V) { removeAllAssertingVHReferences(V); });
 
     if (IterHandle != CurValue) {
       // If the iterator instruction was recursively deleted, start over at the
@@ -5363,7 +5394,9 @@ bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
   // If we have no uses, recursively delete the value and all dead instructions
   // using it.
   if (Ptr->use_empty())
-    RecursivelyDeleteTriviallyDeadInstructions(Ptr, TLInfo);
+    RecursivelyDeleteTriviallyDeadInstructions(
+        Ptr, TLInfo, nullptr,
+        [&](Value *V) { removeAllAssertingVHReferences(V); });
 
   return true;
 }
@@ -6647,7 +6680,8 @@ bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
   Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType);
 
   SVI->replaceAllUsesWith(BC2);
-  RecursivelyDeleteTriviallyDeadInstructions(SVI);
+  RecursivelyDeleteTriviallyDeadInstructions(
+      SVI, TLInfo, nullptr, [&](Value *V) { removeAllAssertingVHReferences(V); });
 
   // Also hoist the bitcast up to its operand if it they are not in the same
   // block.
@@ -7604,11 +7638,10 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
 
 /// Given an OR instruction, check to see if this is a bitreverse
 /// idiom. If so, insert the new intrinsic and return true.
-static bool makeBitReverse(Instruction &I, const DataLayout &DL,
-                           const TargetLowering &TLI) {
+bool CodeGenPrepare::makeBitReverse(Instruction &I) {
   if (!I.getType()->isIntegerTy() ||
-      !TLI.isOperationLegalOrCustom(ISD::BITREVERSE,
-                                    TLI.getValueType(DL, I.getType(), true)))
+      !TLI->isOperationLegalOrCustom(ISD::BITREVERSE,
+                                     TLI->getValueType(*DL, I.getType(), true)))
     return false;
 
   SmallVector<Instruction*, 4> Insts;
@@ -7616,7 +7649,8 @@ static bool makeBitReverse(Instruction &I, const DataLayout &DL,
     return false;
   Instruction *LastInst = Insts.back();
   I.replaceAllUsesWith(LastInst);
-  RecursivelyDeleteTriviallyDeadInstructions(&I);
+  RecursivelyDeleteTriviallyDeadInstructions(
+      &I, TLInfo, nullptr, [&](Value *V) { removeAllAssertingVHReferences(V); });
   return true;
 }
 
@@ -7638,7 +7672,7 @@ bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) {
   while (MadeBitReverse) {
     MadeBitReverse = false;
     for (auto &I : reverse(BB)) {
-      if (makeBitReverse(I, *DL, *TLI)) {
+      if (makeBitReverse(I)) {
         MadeBitReverse = MadeChange = true;
         break;
       }
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index da40c342af3ac6..3d163b8a86bcc8 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -453,21 +453,24 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
 /// trivially dead, delete them too, recursively.  Return true if any
 /// instructions were deleted.
 bool llvm::RecursivelyDeleteTriviallyDeadInstructions(
-    Value *V, const TargetLibraryInfo *TLI, MemorySSAUpdater *MSSAU) {
+    Value *V, const TargetLibraryInfo *TLI, MemorySSAUpdater *MSSAU,
+    std::function<void(Value *)> AboutToDeleteCallback) {
   Instruction *I = dyn_cast<Instruction>(V);
   if (!I || !isInstructionTriviallyDead(I, TLI))
     return false;
 
   SmallVector<WeakTrackingVH, 16> DeadInsts;
   DeadInsts.push_back(I);
-  RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU);
+  RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU,
+                                             AboutToDeleteCallback);
 
   return true;
 }
 
 bool llvm::RecursivelyDeleteTriviallyDeadInstructionsPermissive(
     SmallVectorImpl<WeakTrackingVH> &DeadInsts, const TargetLibraryInfo *TLI,
-    MemorySSAUpdater *MSSAU) {
+    MemorySSAUpdater *MSSAU,
+    std::function<void(Value *)> AboutToDeleteCallback) {
   unsigned S = 0, E = DeadInsts.size(), Alive = 0;
   for (; S != E; ++S) {
     auto *I = cast<Instruction>(DeadInsts[S]);
@@ -478,13 +481,15 @@ bool llvm::RecursivelyDeleteTriviallyDeadInstructionsPermissive(
   }
   if (Alive == E)
     return false;
-  RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU);
+  RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU,
+                                             AboutToDeleteCallback);
   return true;
 }
 
 void llvm::RecursivelyDeleteTriviallyDeadInstructions(
     SmallVectorImpl<WeakTrackingVH> &DeadInsts, const TargetLibraryInfo *TLI,
-    MemorySSAUpdater *MSSAU) {
+    MemorySSAUpdater *MSSAU,
+    std::function<void(Value *)> AboutToDeleteCallback) {
   // Process the dead instruction list until empty.
   while (!DeadInsts.empty()) {
     Value *V = DeadInsts.pop_back_val();
@@ -498,6 +503,9 @@ void llvm::RecursivelyDeleteTriviallyDeadInstructions(
     // Don't lose the debug info while deleting the instructions.
     salvageDebugInfo(*I);
 
+    if (AboutToDeleteCallback)
+      AboutToDeleteCallback(I);
+
     // Null out all of the instruction's operands to see if any operand becomes
     // dead as we go.
     for (Use &OpU : I->operands()) {
diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/dead-gep.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/dead-gep.ll
new file mode 100644
index 00000000000000..a82cce01a29f5c
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/ARM/dead-gep.ll
@@ -0,0 +1,19 @@
+; RUN: opt -codegenprepare -S %s -o - | FileCheck %s
+target triple = "thumbv7-apple-ios7.0.0"
+
+
+%struct = type [1000 x i32]
+
+define void @test_dead_gep(%struct* %t0) {
+; CHECK-LABEL: define void @test_dead_gep
+; CHECK-NOT: getelementptr
+; CHECK: %t16 = load i32, i32* undef
+; CHECK: ret void
+
+  %t12 = getelementptr inbounds %struct, %struct* %t0, i32 1, i32 500
+  %t13 = load i32, i32* %t12, align 4
+  %t14 = icmp eq i32 %t13, 2
+  %t15 = select i1 %t14, i32* undef, i32* undef
+  %t16 = load i32, i32* %t15, align 4
+  ret void
+}

From 9c1c6a3fcca840b75a0ae818ac4e24e7460c397b Mon Sep 17 00:00:00 2001
From: Raphael Isemann <teemperor@gmail.com>
Date: Wed, 15 Jul 2020 16:26:37 +0200
Subject: [PATCH 3/8] Revert "[lldb] Use the basename of the Python test for
 the log name instead of the class name"

This reverts commit 29aab9b5c748b28b231e2ca0f9b95453638ade1a.

It seems on Windows the file name is just always "lldbsuite.test.lldbtest" for
all tests and that breaks pretty much everything. Reverting until we have
a better solution.
---
 lldb/test/API/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/test/API/CMakeLists.txt b/lldb/test/API/CMakeLists.txt
index 23b211e14cfa56..34f3522c8dfec8 100644
--- a/lldb/test/API/CMakeLists.txt
+++ b/lldb/test/API/CMakeLists.txt
@@ -38,7 +38,7 @@ set(LLDB_TEST_USER_ARGS
 set(LLDB_TEST_COMMON_ARGS
   -s
   ${CMAKE_BINARY_DIR}/lldb-test-traces
-  -S fm
+  -S nm
   -u CXXFLAGS
   -u CFLAGS
   )

From 00e3a1ddec95c0b48ce216220d7e3481dab3bc78 Mon Sep 17 00:00:00 2001
From: Joachim Protze <protze@itc.rwth-aachen.de>
Date: Wed, 15 Jul 2020 16:45:27 +0200
Subject: [PATCH 4/8] [TSan] Optimize handling of racy address

This patch splits the handling of racy address and racy stack into separate
functions. If a race was already reported for the address, we can avoid the
cost for collecting the involved stacks.

This patch also removes the race condition in storing the racy address / racy
stack. This race condition allowed all threads to report the race.

This patch changes the transitive suppression of reports. Previously
suppression could transitively chain memory location and racy stacks.
Now racy memory and racy stack are separate suppressions.

Reviewed by: dvyukov

Differential Revision: https://reviews.llvm.org/D83625
---
 compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp | 103 +++++++++----------
 1 file changed, 50 insertions(+), 53 deletions(-)

diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp
index 949beac1c5513f..3354546c2a1076 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp
@@ -439,65 +439,61 @@ void RestoreStack(int tid, const u64 epoch, VarSizeStackTrace *stk,
   ExtractTagFromStack(stk, tag);
 }
 
-static bool HandleRacyStacks(ThreadState *thr, VarSizeStackTrace traces[2],
-                             uptr addr_min, uptr addr_max) {
-  bool equal_stack = false;
-  RacyStacks hash;
-  bool equal_address = false;
-  RacyAddress ra0 = {addr_min, addr_max};
-  {
-    ReadLock lock(&ctx->racy_mtx);
-    if (flags()->suppress_equal_stacks) {
-      hash.hash[0] = md5_hash(traces[0].trace, traces[0].size * sizeof(uptr));
-      hash.hash[1] = md5_hash(traces[1].trace, traces[1].size * sizeof(uptr));
-      for (uptr i = 0; i < ctx->racy_stacks.Size(); i++) {
-        if (hash == ctx->racy_stacks[i]) {
-          VPrintf(2,
-              "ThreadSanitizer: suppressing report as doubled (stack)\n");
-          equal_stack = true;
-          break;
-        }
-      }
-    }
-    if (flags()->suppress_equal_addresses) {
-      for (uptr i = 0; i < ctx->racy_addresses.Size(); i++) {
-        RacyAddress ra2 = ctx->racy_addresses[i];
-        uptr maxbeg = max(ra0.addr_min, ra2.addr_min);
-        uptr minend = min(ra0.addr_max, ra2.addr_max);
-        if (maxbeg < minend) {
-          VPrintf(2, "ThreadSanitizer: suppressing report as doubled (addr)\n");
-          equal_address = true;
-          break;
-        }
-      }
+static bool FindRacyStacks(const RacyStacks &hash) {
+  for (uptr i = 0; i < ctx->racy_stacks.Size(); i++) {
+    if (hash == ctx->racy_stacks[i]) {
+      VPrintf(2, "ThreadSanitizer: suppressing report as doubled (stack)\n");
+      return true;
     }
   }
-  if (!equal_stack && !equal_address)
+  return false;
+}
+
+static bool HandleRacyStacks(ThreadState *thr, VarSizeStackTrace traces[2]) {
+  if (!flags()->suppress_equal_stacks)
     return false;
-  if (!equal_stack) {
-    Lock lock(&ctx->racy_mtx);
-    ctx->racy_stacks.PushBack(hash);
-  }
-  if (!equal_address) {
-    Lock lock(&ctx->racy_mtx);
-    ctx->racy_addresses.PushBack(ra0);
+  RacyStacks hash;
+  hash.hash[0] = md5_hash(traces[0].trace, traces[0].size * sizeof(uptr));
+  hash.hash[1] = md5_hash(traces[1].trace, traces[1].size * sizeof(uptr));
+  {
+    ReadLock lock(&ctx->racy_mtx);
+    if (FindRacyStacks(hash))
+      return true;
   }
-  return true;
+  Lock lock(&ctx->racy_mtx);
+  if (FindRacyStacks(hash))
+    return true;
+  ctx->racy_stacks.PushBack(hash);
+  return false;
 }
 
-static void AddRacyStacks(ThreadState *thr, VarSizeStackTrace traces[2],
-                          uptr addr_min, uptr addr_max) {
-  Lock lock(&ctx->racy_mtx);
-  if (flags()->suppress_equal_stacks) {
-    RacyStacks hash;
-    hash.hash[0] = md5_hash(traces[0].trace, traces[0].size * sizeof(uptr));
-    hash.hash[1] = md5_hash(traces[1].trace, traces[1].size * sizeof(uptr));
-    ctx->racy_stacks.PushBack(hash);
+static bool FindRacyAddress(const RacyAddress &ra0) {
+  for (uptr i = 0; i < ctx->racy_addresses.Size(); i++) {
+    RacyAddress ra2 = ctx->racy_addresses[i];
+    uptr maxbeg = max(ra0.addr_min, ra2.addr_min);
+    uptr minend = min(ra0.addr_max, ra2.addr_max);
+    if (maxbeg < minend) {
+      VPrintf(2, "ThreadSanitizer: suppressing report as doubled (addr)\n");
+      return true;
+    }
   }
-  if (flags()->suppress_equal_addresses) {
-    RacyAddress ra0 = {addr_min, addr_max};
-    ctx->racy_addresses.PushBack(ra0);
+  return false;
+}
+
+static bool HandleRacyAddress(ThreadState *thr, uptr addr_min, uptr addr_max) {
+  if (!flags()->suppress_equal_addresses)
+    return false;
+  RacyAddress ra0 = {addr_min, addr_max};
+  {
+    ReadLock lock(&ctx->racy_mtx);
+    if (FindRacyAddress(ra0))
+      return true;
   }
+  Lock lock(&ctx->racy_mtx);
+  if (FindRacyAddress(ra0))
+    return true;
+  ctx->racy_addresses.PushBack(ra0);
+  return false;
 }
 
 bool OutputReport(ThreadState *thr, const ScopedReport &srep) {
@@ -618,6 +614,8 @@ void ReportRace(ThreadState *thr) {
     if (IsExpectedReport(addr_min, addr_max - addr_min))
       return;
   }
+  if (HandleRacyAddress(thr, addr_min, addr_max))
+    return;
 
   ReportType typ = ReportTypeRace;
   if (thr->is_vptr_access && freed)
@@ -668,7 +666,7 @@ void ReportRace(ThreadState *thr) {
   if (IsFiredSuppression(ctx, typ, traces[1]))
     return;
 
-  if (HandleRacyStacks(thr, traces, addr_min, addr_max))
+  if (HandleRacyStacks(thr, traces))
     return;
 
   // If any of the accesses has a tag, treat this as an "external" race.
@@ -711,7 +709,6 @@ void ReportRace(ThreadState *thr) {
   if (!OutputReport(thr, rep))
     return;
 
-  AddRacyStacks(thr, traces, addr_min, addr_max);
 }
 
 void PrintCurrentStack(ThreadState *thr, uptr pc) {

From d6e79e3dd6df63425eb098f482be2c9744ad48eb Mon Sep 17 00:00:00 2001
From: "Joel E. Denny" <jdenny.ornl@gmail.com>
Date: Wed, 15 Jul 2020 11:17:00 -0400
Subject: [PATCH 5/8] [OpenMP][Docs] Update `present` map type modifier status

---
 clang/docs/OpenMPSupport.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst
index 000f23141af307..26fbfab96bc8c3 100644
--- a/clang/docs/OpenMPSupport.rst
+++ b/clang/docs/OpenMPSupport.rst
@@ -268,5 +268,5 @@ want to help with the implementation.
 +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
 | loop extension               | Loop tiling transformation                                   | :part:`claimed`          |                                                                       |
 +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device extension             | 'present' map type modifier                                  | :part:`claimed`          |                                                                       |
+| device extension             | 'present' map type modifier                                  | :part:`worked on`        | D83061, D83062                                                        |
 +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+

From 24cd66d2190a093ed580451496513a65a4c7b8ce Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Wed, 15 Jul 2020 14:49:57 +0100
Subject: [PATCH 6/8] [HardwareLoops] Add sibling loop test.

This missed being part of 9e03547cab69.
---
 .../Transforms/HardwareLoops/sibling-loops.ll | 94 +++++++++++++++++++
 1 file changed, 94 insertions(+)
 create mode 100644 llvm/test/Transforms/HardwareLoops/sibling-loops.ll

diff --git a/llvm/test/Transforms/HardwareLoops/sibling-loops.ll b/llvm/test/Transforms/HardwareLoops/sibling-loops.ll
new file mode 100644
index 00000000000000..e415e522da7b76
--- /dev/null
+++ b/llvm/test/Transforms/HardwareLoops/sibling-loops.ll
@@ -0,0 +1,94 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-DEC
+
+define arm_aapcs_vfpcc void @test(i16* noalias nocapture readonly %off, i16* noalias nocapture %data, i16* noalias nocapture %dst, i32 %n) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP252:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP252]], label [[FOR_COND1_PREHEADER_US:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.cond1.preheader.us:
+; CHECK-NEXT:    [[I_057_US:%.*]] = phi i32 [ [[INC29_US:%.*]], [[FOR_COND_CLEANUP14_US:%.*]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[MUL_US:%.*]] = mul i32 [[I_057_US]], [[N]]
+; CHECK-NEXT:    call void @llvm.set.loop.iterations.i32(i32 [[N]])
+; CHECK-NEXT:    br label [[FOR_BODY4_US:%.*]]
+; CHECK:       for.body4.us:
+; CHECK-NEXT:    [[J_053_US:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_US]] ], [ [[INC_US:%.*]], [[FOR_BODY4_US]] ]
+; CHECK-NEXT:    [[ARRAYIDX_US:%.*]] = getelementptr inbounds i16, i16* [[OFF:%.*]], i32 [[J_053_US]]
+; CHECK-NEXT:    [[L2:%.*]] = load i16, i16* [[ARRAYIDX_US]], align 2
+; CHECK-NEXT:    [[ARRAYIDX5_US:%.*]] = getelementptr inbounds i16, i16* [[DATA:%.*]], i32 [[J_053_US]]
+; CHECK-NEXT:    [[L3:%.*]] = load i16, i16* [[ARRAYIDX5_US]], align 2
+; CHECK-NEXT:    [[ADD_US:%.*]] = add i16 [[L3]], [[L2]]
+; CHECK-NEXT:    [[ADD8_US:%.*]] = add i32 [[J_053_US]], [[MUL_US]]
+; CHECK-NEXT:    [[ARRAYIDX9_US:%.*]] = getelementptr inbounds i16, i16* [[DATA]], i32 [[ADD8_US]]
+; CHECK-NEXT:    store i16 [[ADD_US]], i16* [[ARRAYIDX9_US]], align 2
+; CHECK-NEXT:    [[INC_US]] = add nuw nsw i32 [[J_053_US]], 1
+; CHECK-NEXT:    [[TMP0:%.*]] = call i1 @llvm.loop.decrement.i32(i32 1)
+; CHECK-NEXT:    br i1 [[TMP0]], label [[FOR_BODY4_US]], label [[FOR_BODY15_US_PREHEADER:%.*]]
+; CHECK:       for.body15.us.preheader:
+; CHECK-NEXT:    call void @llvm.set.loop.iterations.i32(i32 [[N]])
+; CHECK-NEXT:    br label [[FOR_BODY15_US:%.*]]
+; CHECK:       for.body15.us:
+; CHECK-NEXT:    [[J10_055_US:%.*]] = phi i32 [ [[INC26_US:%.*]], [[FOR_BODY15_US]] ], [ 0, [[FOR_BODY15_US_PREHEADER]] ]
+; CHECK-NEXT:    [[ARRAYIDX16_US:%.*]] = getelementptr inbounds i16, i16* [[OFF]], i32 [[J10_055_US]]
+; CHECK-NEXT:    [[L0:%.*]] = load i16, i16* [[ARRAYIDX16_US]], align 2
+; CHECK-NEXT:    [[ARRAYIDX18_US:%.*]] = getelementptr inbounds i16, i16* [[DATA]], i32 [[J10_055_US]]
+; CHECK-NEXT:    [[L1:%.*]] = load i16, i16* [[ARRAYIDX18_US]], align 2
+; CHECK-NEXT:    [[ADD20_US:%.*]] = add i16 [[L1]], [[L0]]
+; CHECK-NEXT:    [[ADD23_US:%.*]] = add i32 [[J10_055_US]], [[MUL_US]]
+; CHECK-NEXT:    [[ARRAYIDX24_US:%.*]] = getelementptr inbounds i16, i16* [[DST:%.*]], i32 [[ADD23_US]]
+; CHECK-NEXT:    store i16 [[ADD20_US]], i16* [[ARRAYIDX24_US]], align 2
+; CHECK-NEXT:    [[INC26_US]] = add nuw nsw i32 [[J10_055_US]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.loop.decrement.i32(i32 1)
+; CHECK-NEXT:    br i1 [[TMP1]], label [[FOR_BODY15_US]], label [[FOR_COND_CLEANUP14_US]]
+; CHECK:       for.cond.cleanup14.us:
+; CHECK-NEXT:    [[INC29_US]] = add nuw i32 [[I_057_US]], 1
+; CHECK-NEXT:    [[EXITCOND94:%.*]] = icmp eq i32 [[INC29_US]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND94]], label [[FOR_COND_CLEANUP]], label [[FOR_COND1_PREHEADER_US]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %cmp252 = icmp sgt i32 %n, 0
+  br i1 %cmp252, label %for.cond1.preheader.us, label %for.cond.cleanup
+
+for.cond1.preheader.us: ; preds = %entry, %for.cond.cleanup14.us
+  %i.057.us = phi i32 [ %inc29.us, %for.cond.cleanup14.us ], [ 0, %entry ]
+  %mul.us = mul i32 %i.057.us, %n
+  br label %for.body4.us
+
+for.body4.us: ; preds = %for.body4.us, %for.cond1.preheader.us
+  %j.053.us = phi i32 [ 0, %for.cond1.preheader.us ], [ %inc.us, %for.body4.us ]
+  %arrayidx.us = getelementptr inbounds i16, i16* %off, i32 %j.053.us
+  %l2 = load i16, i16* %arrayidx.us, align 2
+  %arrayidx5.us = getelementptr inbounds i16, i16* %data, i32 %j.053.us
+  %l3 = load i16, i16* %arrayidx5.us, align 2
+  %add.us = add i16 %l3, %l2
+  %add8.us = add i32 %j.053.us, %mul.us
+  %arrayidx9.us = getelementptr inbounds i16, i16* %data, i32 %add8.us
+  store i16 %add.us, i16* %arrayidx9.us, align 2
+  %inc.us = add nuw nsw i32 %j.053.us, 1
+  %exitcond = icmp eq i32 %inc.us, %n
+  br i1 %exitcond, label %for.body15.us, label %for.body4.us
+
+for.body15.us: ; preds = %for.body4.us, %for.body15.us
+  %j10.055.us = phi i32 [ %inc26.us, %for.body15.us ], [ 0, %for.body4.us ]
+  %arrayidx16.us = getelementptr inbounds i16, i16* %off, i32 %j10.055.us
+  %l0 = load i16, i16* %arrayidx16.us, align 2
+  %arrayidx18.us = getelementptr inbounds i16, i16* %data, i32 %j10.055.us
+  %l1 = load i16, i16* %arrayidx18.us, align 2
+  %add20.us = add i16 %l1, %l0
+  %add23.us = add i32 %j10.055.us, %mul.us
+  %arrayidx24.us = getelementptr inbounds i16, i16* %dst, i32 %add23.us
+  store i16 %add20.us, i16* %arrayidx24.us, align 2
+  %inc26.us = add nuw nsw i32 %j10.055.us, 1
+  %exitcond93 = icmp eq i32 %inc26.us, %n
+  br i1 %exitcond93, label %for.cond.cleanup14.us, label %for.body15.us
+
+for.cond.cleanup14.us: ; preds = %for.body15.us
+  %inc29.us = add nuw i32 %i.057.us, 1
+  %exitcond94 = icmp eq i32 %inc29.us, %n
+  br i1 %exitcond94, label %for.cond.cleanup, label %for.cond1.preheader.us
+
+for.cond.cleanup: ; preds = %for.cond.cleanup14.us, %entry
+  ret void
+}

From ad493300322099787cab5f3a9f7310af0f9b5e6c Mon Sep 17 00:00:00 2001
From: Frederik Gossen <frgossen@google.com>
Date: Wed, 15 Jul 2020 15:37:13 +0000
Subject: [PATCH 7/8] [MLIR][Shape] Fix `shape_of` lowering to `scf`

The use of the `scf.for` callback builder does not allow for a rollback of the
emitted conversions. Instead, we populate the loop body through the conversion
rewriter directly.

Differential Revision: https://reviews.llvm.org/D83873
---
 mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp b/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp
index 55ebae99af53fc..1f1134757b3a1b 100644
--- a/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp
+++ b/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp
@@ -103,14 +103,15 @@ ShapeOfOpConverter::matchAndRewrite(ShapeOfOp op, ArrayRef<Value> operands,
   // Copy shape extents to stack-allocated memory.
   auto zeroVal = rewriter.create<ConstantIndexOp>(loc, 0);
   auto oneVal = rewriter.create<ConstantIndexOp>(loc, 1);
-  rewriter.create<scf::ForOp>(
-      loc, zeroVal, rankVal, oneVal, ValueRange(),
-      [&](OpBuilder &b, Location loc, Value iVal, ValueRange args) {
-        auto dimVal = b.create<DimOp>(loc, tensorVal, iVal);
-        auto dimIntVal = b.create<IndexCastOp>(loc, dimVal, i64Ty);
-        b.create<StoreOp>(loc, dimIntVal, memVal, ValueRange({iVal}));
-        b.create<scf::YieldOp>(loc);
-      });
+  auto loop = rewriter.create<scf::ForOp>(loc, zeroVal, rankVal, oneVal);
+  {
+    OpBuilder::InsertionGuard guard(rewriter);
+    rewriter.setInsertionPointToStart(loop.getBody());
+    auto iVal = loop.getInductionVar();
+    auto dimVal = rewriter.create<DimOp>(loc, tensorVal, iVal);
+    auto dimIntVal = rewriter.create<IndexCastOp>(loc, dimVal, i64Ty);
+    rewriter.create<StoreOp>(loc, dimIntVal, memVal, ValueRange{iVal});
+  }
 
   // Load extents to tensor value.
   auto shapeIntVal = rewriter.create<TensorLoadOp>(loc, memVal);

From d3849dddd267af300d76b57c055e89f1ad2622d0 Mon Sep 17 00:00:00 2001
From: Joachim Protze <protze@itc.rwth-aachen.de>
Date: Wed, 15 Jul 2020 17:39:30 +0200
Subject: [PATCH 8/8] Revert "[TSan] Optimize handling of racy address"

This reverts commit 00e3a1ddec95c0b48ce216220d7e3481dab3bc78.
The commit broke most build bots, investigating.
---
 compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp | 103 ++++++++++---------
 1 file changed, 53 insertions(+), 50 deletions(-)

diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp
index 3354546c2a1076..949beac1c5513f 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp
@@ -439,61 +439,65 @@ void RestoreStack(int tid, const u64 epoch, VarSizeStackTrace *stk,
   ExtractTagFromStack(stk, tag);
 }
 
-static bool FindRacyStacks(const RacyStacks &hash) {
-  for (uptr i = 0; i < ctx->racy_stacks.Size(); i++) {
-    if (hash == ctx->racy_stacks[i]) {
-      VPrintf(2, "ThreadSanitizer: suppressing report as doubled (stack)\n");
-      return true;
-    }
-  }
-  return false;
-}
-
-static bool HandleRacyStacks(ThreadState *thr, VarSizeStackTrace traces[2]) {
-  if (!flags()->suppress_equal_stacks)
-    return false;
+static bool HandleRacyStacks(ThreadState *thr, VarSizeStackTrace traces[2],
+                             uptr addr_min, uptr addr_max) {
+  bool equal_stack = false;
   RacyStacks hash;
-  hash.hash[0] = md5_hash(traces[0].trace, traces[0].size * sizeof(uptr));
-  hash.hash[1] = md5_hash(traces[1].trace, traces[1].size * sizeof(uptr));
+  bool equal_address = false;
+  RacyAddress ra0 = {addr_min, addr_max};
   {
     ReadLock lock(&ctx->racy_mtx);
-    if (FindRacyStacks(hash))
-      return true;
-  }
-  Lock lock(&ctx->racy_mtx);
-  if (FindRacyStacks(hash))
-    return true;
-  ctx->racy_stacks.PushBack(hash);
-  return false;
-}
-
-static bool FindRacyAddress(const RacyAddress &ra0) {
-  for (uptr i = 0; i < ctx->racy_addresses.Size(); i++) {
-    RacyAddress ra2 = ctx->racy_addresses[i];
-    uptr maxbeg = max(ra0.addr_min, ra2.addr_min);
-    uptr minend = min(ra0.addr_max, ra2.addr_max);
-    if (maxbeg < minend) {
-      VPrintf(2, "ThreadSanitizer: suppressing report as doubled (addr)\n");
-      return true;
+    if (flags()->suppress_equal_stacks) {
+      hash.hash[0] = md5_hash(traces[0].trace, traces[0].size * sizeof(uptr));
+      hash.hash[1] = md5_hash(traces[1].trace, traces[1].size * sizeof(uptr));
+      for (uptr i = 0; i < ctx->racy_stacks.Size(); i++) {
+        if (hash == ctx->racy_stacks[i]) {
+          VPrintf(2,
+              "ThreadSanitizer: suppressing report as doubled (stack)\n");
+          equal_stack = true;
+          break;
+        }
+      }
+    }
+    if (flags()->suppress_equal_addresses) {
+      for (uptr i = 0; i < ctx->racy_addresses.Size(); i++) {
+        RacyAddress ra2 = ctx->racy_addresses[i];
+        uptr maxbeg = max(ra0.addr_min, ra2.addr_min);
+        uptr minend = min(ra0.addr_max, ra2.addr_max);
+        if (maxbeg < minend) {
+          VPrintf(2, "ThreadSanitizer: suppressing report as doubled (addr)\n");
+          equal_address = true;
+          break;
+        }
+      }
     }
   }
-  return false;
-}
-
-static bool HandleRacyAddress(ThreadState *thr, uptr addr_min, uptr addr_max) {
-  if (!flags()->suppress_equal_addresses)
+  if (!equal_stack && !equal_address)
     return false;
-  RacyAddress ra0 = {addr_min, addr_max};
-  {
-    ReadLock lock(&ctx->racy_mtx);
-    if (FindRacyAddress(ra0))
-      return true;
+  if (!equal_stack) {
+    Lock lock(&ctx->racy_mtx);
+    ctx->racy_stacks.PushBack(hash);
+  }
+  if (!equal_address) {
+    Lock lock(&ctx->racy_mtx);
+    ctx->racy_addresses.PushBack(ra0);
   }
+  return true;
+}
+
+static void AddRacyStacks(ThreadState *thr, VarSizeStackTrace traces[2],
+                          uptr addr_min, uptr addr_max) {
   Lock lock(&ctx->racy_mtx);
-  if (FindRacyAddress(ra0))
-    return true;
-  ctx->racy_addresses.PushBack(ra0);
-  return false;
+  if (flags()->suppress_equal_stacks) {
+    RacyStacks hash;
+    hash.hash[0] = md5_hash(traces[0].trace, traces[0].size * sizeof(uptr));
+    hash.hash[1] = md5_hash(traces[1].trace, traces[1].size * sizeof(uptr));
+    ctx->racy_stacks.PushBack(hash);
+  }
+  if (flags()->suppress_equal_addresses) {
+    RacyAddress ra0 = {addr_min, addr_max};
+    ctx->racy_addresses.PushBack(ra0);
+  }
 }
 
 bool OutputReport(ThreadState *thr, const ScopedReport &srep) {
@@ -614,8 +618,6 @@ void ReportRace(ThreadState *thr) {
     if (IsExpectedReport(addr_min, addr_max - addr_min))
       return;
   }
-  if (HandleRacyAddress(thr, addr_min, addr_max))
-    return;
 
   ReportType typ = ReportTypeRace;
   if (thr->is_vptr_access && freed)
@@ -666,7 +668,7 @@ void ReportRace(ThreadState *thr) {
   if (IsFiredSuppression(ctx, typ, traces[1]))
     return;
 
-  if (HandleRacyStacks(thr, traces))
+  if (HandleRacyStacks(thr, traces, addr_min, addr_max))
     return;
 
   // If any of the accesses has a tag, treat this as an "external" race.
@@ -709,6 +711,7 @@ void ReportRace(ThreadState *thr) {
   if (!OutputReport(thr, rep))
     return;
 
+  AddRacyStacks(thr, traces, addr_min, addr_max);
 }
 
 void PrintCurrentStack(ThreadState *thr, uptr pc) {