diff --git a/clang/cmake/caches/Fuchsia-stage2.cmake b/clang/cmake/caches/Fuchsia-stage2.cmake
index 9ecf8f300e75e7..4d9b8526d0c4d1 100644
--- a/clang/cmake/caches/Fuchsia-stage2.cmake
+++ b/clang/cmake/caches/Fuchsia-stage2.cmake
@@ -5,7 +5,6 @@ set(LLVM_TARGETS_TO_BUILD X86;ARM;AArch64;RISCV CACHE STRING "")
 set(PACKAGE_VENDOR Fuchsia CACHE STRING "")
 
 set(LLVM_ENABLE_PROJECTS "clang;clang-tools-extra;lld;llvm" CACHE STRING "")
-set(LLVM_ENABLE_RUNTIMES "compiler-rt;libcxx;libcxxabi;libunwind" CACHE STRING "")
 
 set(LLVM_ENABLE_BACKTRACES OFF CACHE BOOL "")
 if(NOT APPLE)
@@ -22,6 +21,10 @@ set(LLVM_INCLUDE_GO_TESTS OFF CACHE BOOL "")
 set(LLVM_USE_RELATIVE_PATHS_IN_FILES ON CACHE BOOL "")
 set(LLVM_ENABLE_Z3_SOLVER OFF CACHE BOOL "")
 
+if(MSVC)
+  set(LLVM_USE_CRT_RELEASE "MT" CACHE STRING "")
+endif()
+
 set(CLANG_DEFAULT_CXX_STDLIB libc++ CACHE STRING "")
 if(NOT APPLE)
   set(CLANG_DEFAULT_LINKER lld CACHE STRING "")
@@ -39,6 +42,8 @@ set(ENABLE_X86_RELAX_RELOCATIONS ON CACHE BOOL "")
 set(CMAKE_BUILD_TYPE Release CACHE STRING "")
 if (APPLE)
   set(MACOSX_DEPLOYMENT_TARGET 10.7 CACHE STRING "")
+elseif(MSVC)
+  set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded" CACHE STRING "")
 endif()
 
 if(APPLE)
@@ -65,6 +70,26 @@ if(APPLE)
   set(DARWIN_iossim_ARCHS i386;x86_64 CACHE STRING "")
   set(DARWIN_osx_ARCHS x86_64 CACHE STRING "")
   set(SANITIZER_MIN_OSX_VERSION 10.7 CACHE STRING "")
+  set(LLVM_ENABLE_RUNTIMES "compiler-rt;libcxx;libcxxabi;libunwind" CACHE STRING "")
+endif()
+
+if(WIN32)
+  set(target "x86_64-pc-windows-msvc")
+
+  list(APPEND BUILTIN_TARGETS "${target}")
+  set(BUILTINS_${target}_CMAKE_SYSTEM_NAME Windows CACHE STRING "")
+  set(BUILTINS_${target}_CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "")
+
+  list(APPEND RUNTIME_TARGETS "${target}")
+  set(RUNTIMES_${target}_CMAKE_SYSTEM_NAME Windows CACHE STRING "")
+  set(RUNTIMES_${target}_CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "")
+  set(RUNTIMES_${target}_LIBCXX_ABI_VERSION 2 CACHE STRING "")
+  set(RUNTIMES_${target}_LIBCXX_HAS_WIN32_THREAD_API ON CACHE BOOL "")
+  set(RUNTIMES_${target}_LIBCXX_ENABLE_EXPERIMENTAL_LIBRARY OFF CACHE BOOL "")
+  set(RUNTIMES_${target}_LIBCXX_ENABLE_FILESYSTEM OFF CACHE BOOL "")
+  set(RUNTIMES_${target}_LIBCXX_ENABLE_ABI_LINKER_SCRIPT OFF CACHE BOOL "")
+  set(RUNTIMES_${target}_LIBCXX_ENABLE_SHARED OFF CACHE BOOL "")
+  set(RUNTIMES_${target}_LLVM_ENABLE_RUNTIMES "compiler-rt;libcxx" CACHE STRING "")
 endif()
 
 foreach(target aarch64-unknown-linux-gnu;armv7-unknown-linux-gnueabihf;i386-unknown-linux-gnu;x86_64-unknown-linux-gnu)
@@ -73,6 +98,9 @@ foreach(target aarch64-unknown-linux-gnu;armv7-unknown-linux-gnueabihf;i386-unkn
     list(APPEND BUILTIN_TARGETS "${target}")
     set(BUILTINS_${target}_CMAKE_SYSTEM_NAME Linux CACHE STRING "")
     set(BUILTINS_${target}_CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "")
+    set(BUILTINS_${target}_CMAKE_C_FLAGS "--target=${target}" CACHE STRING "")
+    set(BUILTINS_${target}_CMAKE_CXX_FLAGS "--target=${target}" CACHE STRING "")
+    set(BUILTINS_${target}_CMAKE_ASM_FLAGS "--target=${target}" CACHE STRING "")
     set(BUILTINS_${target}_CMAKE_SYSROOT ${LINUX_${target}_SYSROOT} CACHE STRING "")
     set(BUILTINS_${target}_CMAKE_SHARED_LINKER_FLAGS "-fuse-ld=lld" CACHE STRING "")
     set(BUILTINS_${target}_CMAKE_MODULE_LINKER_FLAGS "-fuse-ld=lld" CACHE STRING "")
@@ -82,6 +110,9 @@ foreach(target aarch64-unknown-linux-gnu;armv7-unknown-linux-gnueabihf;i386-unkn
     list(APPEND RUNTIME_TARGETS "${target}")
     set(RUNTIMES_${target}_CMAKE_SYSTEM_NAME Linux CACHE STRING "")
     set(RUNTIMES_${target}_CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "")
+    set(RUNTIMES_${target}_CMAKE_C_FLAGS "--target=${target}" CACHE STRING "")
+    set(RUNTIMES_${target}_CMAKE_CXX_FLAGS "--target=${target}" CACHE STRING "")
+    set(RUNTIMES_${target}_CMAKE_ASM_FLAGS "--target=${target}" CACHE STRING "")
     set(RUNTIMES_${target}_CMAKE_SYSROOT ${LINUX_${target}_SYSROOT} CACHE STRING "")
     set(RUNTIMES_${target}_CMAKE_SHARED_LINKER_FLAGS "-fuse-ld=lld" CACHE STRING "")
     set(RUNTIMES_${target}_CMAKE_MODULE_LINKER_FLAGS "-fuse-ld=lld" CACHE STRING "")
@@ -100,9 +131,9 @@ foreach(target aarch64-unknown-linux-gnu;armv7-unknown-linux-gnueabihf;i386-unkn
     set(RUNTIMES_${target}_LIBCXX_ENABLE_STATIC_ABI_LIBRARY ON CACHE BOOL "")
     set(RUNTIMES_${target}_LIBCXX_ABI_VERSION 2 CACHE STRING "")
     set(RUNTIMES_${target}_LLVM_ENABLE_ASSERTIONS ON CACHE BOOL "")
-    set(RUNTIMES_${target}_LLVM_ENABLE_RUNTIMES "compiler-rt;libcxx;libcxxabi;libunwind" CACHE STRING "")
     set(RUNTIMES_${target}_SANITIZER_CXX_ABI "libc++" CACHE STRING "")
     set(RUNTIMES_${target}_SANITIZER_CXX_ABI_INTREE ON CACHE BOOL "")
+    set(RUNTIMES_${target}_LLVM_ENABLE_RUNTIMES "compiler-rt;libcxx;libcxxabi;libunwind" CACHE STRING "")
 
     # Use .build-id link.
     list(APPEND RUNTIME_BUILD_ID_LINK "${target}")
@@ -115,7 +146,7 @@ if(FUCHSIA_SDK)
   set(FUCHSIA_x86_64_NAME x64)
   set(FUCHSIA_riscv64_NAME riscv64)
   foreach(target i386;x86_64;aarch64;riscv64)
-    set(FUCHSIA_${target}_COMPILER_FLAGS "-I${FUCHSIA_SDK}/pkg/fdio/include")
+    set(FUCHSIA_${target}_COMPILER_FLAGS "--target=${target}-unknown-fuchsia -I${FUCHSIA_SDK}/pkg/fdio/include")
     set(FUCHSIA_${target}_LINKER_FLAGS "-L${FUCHSIA_SDK}/arch/${FUCHSIA_${target}_NAME}/lib")
     set(FUCHSIA_${target}_SYSROOT "${FUCHSIA_SDK}/arch/${FUCHSIA_${target}_NAME}/sysroot")
   endforeach()
diff --git a/clang/cmake/caches/Fuchsia.cmake b/clang/cmake/caches/Fuchsia.cmake
index bb22a00fa5c7f9..8688b71ecc7534 100644
--- a/clang/cmake/caches/Fuchsia.cmake
+++ b/clang/cmake/caches/Fuchsia.cmake
@@ -5,7 +5,6 @@ set(LLVM_TARGETS_TO_BUILD X86;ARM;AArch64;RISCV CACHE STRING "")
 set(PACKAGE_VENDOR Fuchsia CACHE STRING "")
 
 set(LLVM_ENABLE_PROJECTS "clang;clang-tools-extra;lld;llvm" CACHE STRING "")
-set(LLVM_ENABLE_RUNTIMES "compiler-rt;libcxx;libcxxabi;libunwind" CACHE STRING "")
 
 set(LLVM_ENABLE_BACKTRACES OFF CACHE BOOL "")
 set(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR ON CACHE BOOL "")
@@ -16,6 +15,10 @@ set(LLVM_INCLUDE_DOCS OFF CACHE BOOL "")
 set(LLVM_INCLUDE_EXAMPLES OFF CACHE BOOL "")
 set(LLVM_INCLUDE_GO_TESTS OFF CACHE BOOL "")
 
+if(MSVC)
+  set(LLVM_USE_CRT_RELEASE "MT" CACHE STRING "")
+endif()
+
 set(CLANG_DEFAULT_CXX_STDLIB libc++ CACHE STRING "")
 if(NOT APPLE)
   set(CLANG_DEFAULT_LINKER lld CACHE STRING "")
@@ -32,8 +35,10 @@ set(ENABLE_X86_RELAX_RELOCATIONS ON CACHE BOOL "")
 
 set(LLVM_ENABLE_ASSERTIONS ON CACHE BOOL "")
 set(CMAKE_BUILD_TYPE Release CACHE STRING "")
-if (APPLE)
+if(APPLE)
   set(MACOSX_DEPLOYMENT_TARGET 10.7 CACHE STRING "")
+elseif(MSVC)
+  set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded" CACHE STRING "")
 endif()
 
 if(APPLE)
@@ -52,8 +57,19 @@ set(LIBCXXABI_USE_COMPILER_RT ON CACHE BOOL "")
 set(LIBCXXABI_USE_LLVM_UNWINDER ON CACHE BOOL "")
 set(LIBCXX_ABI_VERSION 2 CACHE STRING "")
 set(LIBCXX_ENABLE_SHARED OFF CACHE BOOL "")
-set(LIBCXX_ENABLE_STATIC_ABI_LIBRARY ON CACHE BOOL "")
-set(LIBCXX_USE_COMPILER_RT ON CACHE BOOL "")
+if(WIN32)
+  set(LIBCXX_HAS_WIN32_THREAD_API ON CACHE BOOL "")
+  set(LIBCXX_ENABLE_EXPERIMENTAL_LIBRARY OFF CACHE BOOL "")
+  set(LIBCXX_ENABLE_FILESYSTEM OFF CACHE BOOL "")
+  set(LIBCXX_ENABLE_ABI_LINKER_SCRIPT OFF CACHE BOOL "")
+  set(LIBCXX_ENABLE_STATIC_ABI_LIBRARY OFF CACHE BOOL "")
+  set(BUILTINS_CMAKE_ARGS -DCMAKE_SYSTEM_NAME=Windows CACHE STRING "")
+  set(RUNTIMES_CMAKE_ARGS -DCMAKE_SYSTEM_NAME=Windows CACHE STRING "")
+  set(LLVM_ENABLE_RUNTIMES "compiler-rt;libcxx" CACHE STRING "")
+else()
+  set(LIBCXX_ENABLE_STATIC_ABI_LIBRARY ON CACHE BOOL "")
+  set(LLVM_ENABLE_RUNTIMES "compiler-rt;libcxx;libcxxabi;libunwind" CACHE STRING "")
+endif()
 
 if(BOOTSTRAP_CMAKE_SYSTEM_NAME)
   set(target "${BOOTSTRAP_CMAKE_CXX_COMPILER_TARGET}")
diff --git a/lld/unittests/MachOTests/MachONormalizedFileBinaryReaderTests.cpp b/lld/unittests/MachOTests/MachONormalizedFileBinaryReaderTests.cpp
index 07c1d4242e03ba..aad5f8afcfdc39 100644
--- a/lld/unittests/MachOTests/MachONormalizedFileBinaryReaderTests.cpp
+++ b/lld/unittests/MachOTests/MachONormalizedFileBinaryReaderTests.cpp
@@ -75,7 +75,7 @@ TEST(BinaryReaderTest, empty_obj_x86_64) {
       fromBinary(fileBytes, sizeof(fileBytes), "x86_64");
   EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86_64);
   EXPECT_EQ((int)(f->fileType), MH_OBJECT);
-  EXPECT_EQ(f->flags, MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f->flags), MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_TRUE(f->localSymbols.empty());
   EXPECT_TRUE(f->globalSymbols.empty());
   EXPECT_TRUE(f->undefinedSymbols.empty());
@@ -106,7 +106,7 @@ TEST(BinaryReaderTest, empty_obj_x86) {
       fromBinary(fileBytes, sizeof(fileBytes), "i386");
   EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86);
   EXPECT_EQ((int)(f->fileType), MH_OBJECT);
-  EXPECT_EQ(f->flags, MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f->flags), MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_TRUE(f->localSymbols.empty());
   EXPECT_TRUE(f->globalSymbols.empty());
   EXPECT_TRUE(f->undefinedSymbols.empty());
@@ -137,7 +137,7 @@ TEST(BinaryReaderTest, empty_obj_ppc) {
       fromBinary(fileBytes, sizeof(fileBytes), "ppc");
   EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_ppc);
   EXPECT_EQ((int)(f->fileType), MH_OBJECT);
-  EXPECT_EQ(f->flags, MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f->flags), MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_TRUE(f->localSymbols.empty());
   EXPECT_TRUE(f->globalSymbols.empty());
   EXPECT_TRUE(f->undefinedSymbols.empty());
@@ -168,7 +168,7 @@ TEST(BinaryReaderTest, empty_obj_armv7) {
       fromBinary(fileBytes, sizeof(fileBytes), "armv7");
   EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv7);
   EXPECT_EQ((int)(f->fileType), MH_OBJECT);
-  EXPECT_EQ(f->flags, MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f->flags), MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_TRUE(f->localSymbols.empty());
   EXPECT_TRUE(f->globalSymbols.empty());
   EXPECT_TRUE(f->undefinedSymbols.empty());
@@ -182,7 +182,7 @@ TEST(BinaryReaderTest, empty_obj_x86_64_arm7) {
       fromBinary(fileBytes, sizeof(fileBytes), "x86_64");
   EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86_64);
   EXPECT_EQ((int)(f->fileType), MH_OBJECT);
-  EXPECT_EQ(f->flags, MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f->flags), MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_TRUE(f->localSymbols.empty());
   EXPECT_TRUE(f->globalSymbols.empty());
   EXPECT_TRUE(f->undefinedSymbols.empty());
@@ -191,7 +191,7 @@ TEST(BinaryReaderTest, empty_obj_x86_64_arm7) {
       fromBinary(fileBytes, sizeof(fileBytes), "armv7");
   EXPECT_EQ(f2->arch, lld::MachOLinkingContext::arch_armv7);
   EXPECT_EQ((int)(f2->fileType), MH_OBJECT);
-  EXPECT_EQ(f2->flags, MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f2->flags), MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_TRUE(f2->localSymbols.empty());
   EXPECT_TRUE(f2->globalSymbols.empty());
   EXPECT_TRUE(f2->undefinedSymbols.empty());
@@ -268,7 +268,7 @@ TEST(BinaryReaderTest, hello_obj_x86_64) {
 
   EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86_64);
   EXPECT_EQ((int)(f->fileType), MH_OBJECT);
-  EXPECT_EQ(f->flags, MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f->flags), MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_EQ(f->sections.size(), 2UL);
   const Section& text = f->sections[0];
   EXPECT_TRUE(text.segmentName.equals("__TEXT"));
@@ -393,7 +393,7 @@ TEST(BinaryReaderTest, hello_obj_x86) {
 
   EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86);
   EXPECT_EQ((int)(f->fileType), MH_OBJECT);
-  EXPECT_EQ(f->flags, MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f->flags), MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_EQ(f->sections.size(), 2UL);
   const Section& text = f->sections[0];
   EXPECT_TRUE(text.segmentName.equals("__TEXT"));
@@ -525,7 +525,7 @@ TEST(BinaryReaderTest, hello_obj_armv7) {
 
   EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv7);
   EXPECT_EQ((int)(f->fileType), MH_OBJECT);
-  EXPECT_EQ(f->flags, MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f->flags), MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_EQ(f->sections.size(), 2UL);
   const Section& text = f->sections[0];
   EXPECT_TRUE(text.segmentName.equals("__TEXT"));
@@ -669,7 +669,7 @@ TEST(BinaryReaderTest, hello_obj_ppc) {
 
   EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_ppc);
   EXPECT_EQ((int)(f->fileType), MH_OBJECT);
-  EXPECT_EQ(f->flags, MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f->flags), MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_EQ(f->sections.size(), 2UL);
   const Section& text = f->sections[0];
   EXPECT_TRUE(text.segmentName.equals("__TEXT"));
diff --git a/lld/unittests/MachOTests/MachONormalizedFileYAMLTests.cpp b/lld/unittests/MachOTests/MachONormalizedFileYAMLTests.cpp
index c1445ea7eacd79..6ceb197b4b84a7 100644
--- a/lld/unittests/MachOTests/MachONormalizedFileYAMLTests.cpp
+++ b/lld/unittests/MachOTests/MachONormalizedFileYAMLTests.cpp
@@ -50,7 +50,7 @@ TEST(ObjectFileYAML, empty_ppc) {
     "...\n");
   EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_ppc);
   EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT);
-  EXPECT_EQ(f->flags, llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_TRUE(f->sections.empty());
   EXPECT_TRUE(f->localSymbols.empty());
   EXPECT_TRUE(f->globalSymbols.empty());
@@ -66,7 +66,7 @@ TEST(ObjectFileYAML, empty_x86_64) {
     "...\n");
   EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86_64);
   EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT);
-  EXPECT_EQ(f->flags, llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_TRUE(f->sections.empty());
   EXPECT_TRUE(f->localSymbols.empty());
   EXPECT_TRUE(f->globalSymbols.empty());
@@ -82,7 +82,7 @@ TEST(ObjectFileYAML, empty_x86) {
     "...\n");
   EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86);
   EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT);
-  EXPECT_EQ(f->flags, llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_TRUE(f->sections.empty());
   EXPECT_TRUE(f->localSymbols.empty());
   EXPECT_TRUE(f->globalSymbols.empty());
@@ -98,7 +98,7 @@ TEST(ObjectFileYAML, empty_armv6) {
     "...\n");
   EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv6);
   EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT);
-  EXPECT_EQ(f->flags, llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_TRUE(f->sections.empty());
   EXPECT_TRUE(f->localSymbols.empty());
   EXPECT_TRUE(f->globalSymbols.empty());
@@ -114,7 +114,7 @@ TEST(ObjectFileYAML, empty_armv7) {
     "...\n");
   EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv7);
   EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT);
-  EXPECT_EQ(f->flags, llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_TRUE(f->sections.empty());
   EXPECT_TRUE(f->localSymbols.empty());
   EXPECT_TRUE(f->globalSymbols.empty());
@@ -130,7 +130,7 @@ TEST(ObjectFileYAML, empty_armv7s) {
     "...\n");
   EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv7s);
   EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT);
-  EXPECT_EQ(f->flags, llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_TRUE(f->sections.empty());
   EXPECT_TRUE(f->localSymbols.empty());
   EXPECT_TRUE(f->globalSymbols.empty());
@@ -151,7 +151,7 @@ TEST(ObjectFileYAML, roundTrip) {
     std::unique_ptr<NormalizedFile> f2 = fromYAML(intermediate);
     EXPECT_EQ(f2->arch, lld::MachOLinkingContext::arch_x86_64);
     EXPECT_EQ((int)(f2->fileType), llvm::MachO::MH_OBJECT);
-    EXPECT_EQ(f2->flags, llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
+    EXPECT_EQ((int)(f2->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
     EXPECT_TRUE(f2->sections.empty());
     EXPECT_TRUE(f2->localSymbols.empty());
     EXPECT_TRUE(f2->globalSymbols.empty());
@@ -275,7 +275,7 @@ TEST(ObjectFileYAML, hello_x86_64) {
     "...\n");
   EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86_64);
   EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT);
-  EXPECT_EQ(f->flags, llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_EQ(f->sections.size(), 2UL);
 
   const Section& sect1 = f->sections[0];
@@ -405,7 +405,7 @@ TEST(ObjectFileYAML, hello_x86) {
     "...\n");
   EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86);
   EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT);
-  EXPECT_EQ(f->flags, llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_EQ(f->sections.size(), 2UL);
 
   const Section& sect1 = f->sections[0];
@@ -533,7 +533,7 @@ TEST(ObjectFileYAML, hello_armv6) {
     "...\n");
   EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv6);
   EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT);
-  EXPECT_EQ(f->flags, llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_EQ(f->sections.size(), 2UL);
 
   const Section& sect1 = f->sections[0];
@@ -673,7 +673,7 @@ TEST(ObjectFileYAML, hello_armv7) {
     "...\n");
   EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv7);
   EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT);
-  EXPECT_EQ(f->flags, llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_EQ(f->sections.size(), 2UL);
 
   const Section& sect1 = f->sections[0];
diff --git a/llvm/docs/CodingStandards.rst b/llvm/docs/CodingStandards.rst
index 861ab05420fb68..99fb6af02a282a 100644
--- a/llvm/docs/CodingStandards.rst
+++ b/llvm/docs/CodingStandards.rst
@@ -1302,6 +1302,9 @@ loops wherever possible for all newly added code. For example:
   for (Instruction &I : *BB)
     ... use I ...
 
+Usage of ``std::for_each()``/``llvm::for_each()`` functions is discouraged,
+unless the the callable object already exists.
+
 Don't evaluate ``end()`` every time through a loop
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index dd869f98b5bcc0..effd5d6ab7d815 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -707,6 +707,33 @@ namespace {
         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
         SDNode *RootNode);
 
+    /// This is a helper function for mergeConsecutiveStores. Given a list of
+    /// store candidates, find the first N that are consecutive in memory.
+    /// Returns 0 if there are not at least 2 consecutive stores to try merging.
+    unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
+                                  int64_t ElementSizeBytes) const;
+
+    /// This is a helper function for mergeConsecutiveStores. It is used for
+    /// store chains that are composed entirely of constant values.
+    bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
+                                  unsigned NumConsecutiveStores,
+                                  EVT MemVT, SDNode *Root, bool AllowVectors);
+
+    /// This is a helper function for mergeConsecutiveStores. It is used for
+    /// store chains that are composed entirely of extracted vector elements.
+    /// When extracting multiple vector elements, try to store them in one
+    /// vector store rather than a sequence of scalar stores.
+    bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
+                                 unsigned NumConsecutiveStores, EVT MemVT,
+                                 SDNode *Root);
+
+    /// This is a helper function for mergeConsecutiveStores. It is used for
+    /// store chains that are composed entirely of loaded values.
+    bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
+                              unsigned NumConsecutiveStores, EVT MemVT,
+                              SDNode *Root, bool AllowVectors,
+                              bool IsNonTemporalStore, bool IsNonTemporalLoad);
+
     /// Merge consecutive store operations into a wide store.
     /// This optimization uses wide integers or vectors when possible.
     /// \return true if stores were merged.
@@ -16237,76 +16264,22 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
   return true;
 }
 
-bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
-  if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
-    return false;
-
-  // TODO: Extend this function to merge stores of scalable vectors.
-  // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
-  // store since we know <vscale x 16 x i8> is exactly twice as large as
-  // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
-  EVT MemVT = St->getMemoryVT();
-  if (MemVT.isScalableVector())
-    return false;
-  if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
-    return false;
-
-  // This function cannot currently deal with non-byte-sized memory sizes.
-  int64_t ElementSizeBytes = MemVT.getStoreSize();
-  if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
-    return false;
-
-  // Do not bother looking at stored values that are not constants, loads, or
-  // extracted vector elements.
-  SDValue StoredVal = peekThroughBitcasts(St->getValue());
-  StoreSource StoreSrc = getStoreSource(StoredVal);
-  if (StoreSrc == StoreSource::Unknown)
-    return false;
-
-  SmallVector<MemOpLink, 8> StoreNodes;
-  SDNode *RootNode;
-  // Find potential store merge candidates by searching through chain sub-DAG
-  getStoreMergeCandidates(St, StoreNodes, RootNode);
-
-  // Check if there is anything to merge.
-  if (StoreNodes.size() < 2)
-    return false;
-
-  // Sort the memory operands according to their distance from the
-  // base pointer.
-  llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
-    return LHS.OffsetFromBase < RHS.OffsetFromBase;
-  });
-
-  unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
-  bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
-      Attribute::NoImplicitFloat);
-
-  bool IsNonTemporalStore = St->isNonTemporal();
-  bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
-                           cast<LoadSDNode>(StoredVal)->isNonTemporal();
-  LLVMContext &Context = *DAG.getContext();
-  const DataLayout &DL = DAG.getDataLayout();
-
-  // Store Merge attempts to merge the lowest stores. This generally
-  // works out as if successful, as the remaining stores are checked
-  // after the first collection of stores is merged. However, in the
-  // case that a non-mergeable store is found first, e.g., {p[-2],
-  // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
-  // mergeable cases. To prevent this, we prune such stores from the
-  // front of StoreNodes here.
-  bool MadeChange = false;
-  while (StoreNodes.size() > 1) {
+unsigned
+DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
+                                  int64_t ElementSizeBytes) const {
+  while (true) {
+    // Find a store past the width of the first store.
     size_t StartIdx = 0;
     while ((StartIdx + 1 < StoreNodes.size()) &&
            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
-               StoreNodes[StartIdx + 1].OffsetFromBase)
+              StoreNodes[StartIdx + 1].OffsetFromBase)
       ++StartIdx;
 
     // Bail if we don't have enough candidates to merge.
     if (StartIdx + 1 >= StoreNodes.size())
-      return MadeChange;
+      return 0;
 
+    // Trim stores that overlapped with the first store.
     if (StartIdx)
       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
 
@@ -16322,300 +16295,333 @@ bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
         break;
       NumConsecutiveStores = i + 1;
     }
+    if (NumConsecutiveStores > 1)
+      return NumConsecutiveStores;
 
-    if (NumConsecutiveStores < 2) {
-      StoreNodes.erase(StoreNodes.begin(),
-                       StoreNodes.begin() + NumConsecutiveStores);
-      continue;
-    }
+    // There are no consecutive stores at the start of the list.
+    // Remove the first store and try again.
+    StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
+  }
+}
 
-    if (StoreSrc == StoreSource::Constant) {
-      // Store the constants into memory as one consecutive store.
-      while (NumConsecutiveStores >= 2) {
-        LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
-        unsigned FirstStoreAS = FirstInChain->getAddressSpace();
-        unsigned FirstStoreAlign = FirstInChain->getAlignment();
-        unsigned LastLegalType = 1;
-        unsigned LastLegalVectorType = 1;
-        bool LastIntegerTrunc = false;
-        bool NonZero = false;
-        unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
-        for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
-          StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
-          SDValue StoredVal = ST->getValue();
-          bool IsElementZero = false;
-          if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
-            IsElementZero = C->isNullValue();
-          else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
-            IsElementZero = C->getConstantFPValue()->isNullValue();
-          if (IsElementZero) {
-            if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
-              FirstZeroAfterNonZero = i;
-          }
-          NonZero |= !IsElementZero;
+bool DAGCombiner::tryStoreMergeOfConstants(
+    SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
+    EVT MemVT, SDNode *RootNode, bool AllowVectors) {
+  LLVMContext &Context = *DAG.getContext();
+  const DataLayout &DL = DAG.getDataLayout();
+  int64_t ElementSizeBytes = MemVT.getStoreSize();
+  unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
+  bool MadeChange = false;
 
-          // Find a legal type for the constant store.
-          unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
-          EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
-          bool IsFast = false;
+  // Store the constants into memory as one consecutive store.
+  while (NumConsecutiveStores >= 2) {
+    LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+    unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+    unsigned FirstStoreAlign = FirstInChain->getAlignment();
+    unsigned LastLegalType = 1;
+    unsigned LastLegalVectorType = 1;
+    bool LastIntegerTrunc = false;
+    bool NonZero = false;
+    unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
+    for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
+      StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
+      SDValue StoredVal = ST->getValue();
+      bool IsElementZero = false;
+      if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
+        IsElementZero = C->isNullValue();
+      else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
+        IsElementZero = C->getConstantFPValue()->isNullValue();
+      if (IsElementZero) {
+        if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
+          FirstZeroAfterNonZero = i;
+      }
+      NonZero |= !IsElementZero;
 
-          // Break early when size is too large to be legal.
-          if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
-            break;
+      // Find a legal type for the constant store.
+      unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
+      EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
+      bool IsFast = false;
 
-          if (TLI.isTypeLegal(StoreTy) &&
-              TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
-              TLI.allowsMemoryAccess(Context, DL, StoreTy,
-                                     *FirstInChain->getMemOperand(), &IsFast) &&
-              IsFast) {
-            LastIntegerTrunc = false;
-            LastLegalType = i + 1;
-            // Or check whether a truncstore is legal.
-          } else if (TLI.getTypeAction(Context, StoreTy) ==
-                     TargetLowering::TypePromoteInteger) {
-            EVT LegalizedStoredValTy =
-                TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
-            if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
-                TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
-                TLI.allowsMemoryAccess(Context, DL, StoreTy,
-                                       *FirstInChain->getMemOperand(),
-                                       &IsFast) &&
-                IsFast) {
-              LastIntegerTrunc = true;
-              LastLegalType = i + 1;
-            }
-          }
-
-          // We only use vectors if the constant is known to be zero or the
-          // target allows it and the function is not marked with the
-          // noimplicitfloat attribute.
-          if ((!NonZero ||
-               TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
-              AllowVectors) {
-            // Find a legal type for the vector store.
-            unsigned Elts = (i + 1) * NumMemElts;
-            EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
-            if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
-                TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
-                TLI.allowsMemoryAccess(
-                    Context, DL, Ty, *FirstInChain->getMemOperand(), &IsFast) &&
-                IsFast)
-              LastLegalVectorType = i + 1;
-          }
-        }
+      // Break early when size is too large to be legal.
+      if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
+        break;
 
-        bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
-        unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
-
-        // Check if we found a legal integer type that creates a meaningful
-        // merge.
-        if (NumElem < 2) {
-          // We know that candidate stores are in order and of correct
-          // shape. While there is no mergeable sequence from the
-          // beginning one may start later in the sequence. The only
-          // reason a merge of size N could have failed where another of
-          // the same size would not have, is if the alignment has
-          // improved or we've dropped a non-zero value. Drop as many
-          // candidates as we can here.
-          unsigned NumSkip = 1;
-          while (
-              (NumSkip < NumConsecutiveStores) &&
-              (NumSkip < FirstZeroAfterNonZero) &&
-              (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
-            NumSkip++;
-
-          StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
-          NumConsecutiveStores -= NumSkip;
-          continue;
+      if (TLI.isTypeLegal(StoreTy) &&
+          TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
+          TLI.allowsMemoryAccess(Context, DL, StoreTy,
+                                 *FirstInChain->getMemOperand(), &IsFast) &&
+          IsFast) {
+        LastIntegerTrunc = false;
+        LastLegalType = i + 1;
+        // Or check whether a truncstore is legal.
+      } else if (TLI.getTypeAction(Context, StoreTy) ==
+                 TargetLowering::TypePromoteInteger) {
+        EVT LegalizedStoredValTy =
+            TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
+        if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
+            TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
+            TLI.allowsMemoryAccess(Context, DL, StoreTy,
+                                   *FirstInChain->getMemOperand(), &IsFast) &&
+            IsFast) {
+          LastIntegerTrunc = true;
+          LastLegalType = i + 1;
         }
+      }
 
-        // Check that we can merge these candidates without causing a cycle.
-        if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
-                                                      RootNode)) {
-          StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
-          NumConsecutiveStores -= NumElem;
-          continue;
-        }
+      // We only use vectors if the constant is known to be zero or the
+      // target allows it and the function is not marked with the
+      // noimplicitfloat attribute.
+      if ((!NonZero ||
+           TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
+          AllowVectors) {
+        // Find a legal type for the vector store.
+        unsigned Elts = (i + 1) * NumMemElts;
+        EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
+        if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
+            TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
+            TLI.allowsMemoryAccess(Context, DL, Ty,
+                                   *FirstInChain->getMemOperand(), &IsFast) &&
+            IsFast)
+          LastLegalVectorType = i + 1;
+      }
+    }
 
-        MadeChange |= mergeStoresOfConstantsOrVecElts(
-            StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc);
+    bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
+    unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
+
+    // Check if we found a legal integer type that creates a meaningful
+    // merge.
+    if (NumElem < 2) {
+      // We know that candidate stores are in order and of correct
+      // shape. While there is no mergeable sequence from the
+      // beginning one may start later in the sequence. The only
+      // reason a merge of size N could have failed where another of
+      // the same size would not have, is if the alignment has
+      // improved or we've dropped a non-zero value. Drop as many
+      // candidates as we can here.
+      unsigned NumSkip = 1;
+      while ((NumSkip < NumConsecutiveStores) &&
+             (NumSkip < FirstZeroAfterNonZero) &&
+             (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
+        NumSkip++;
+
+      StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
+      NumConsecutiveStores -= NumSkip;
+      continue;
+    }
 
-        // Remove merged stores for next iteration.
-        StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
-        NumConsecutiveStores -= NumElem;
-      }
+    // Check that we can merge these candidates without causing a cycle.
+    if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
+                                                  RootNode)) {
+      StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+      NumConsecutiveStores -= NumElem;
       continue;
     }
 
-    // When extracting multiple vector elements, try to store them
-    // in one vector store rather than a sequence of scalar stores.
-    if (StoreSrc == StoreSource::Extract) {
-      // Loop on Consecutive Stores on success.
-      while (NumConsecutiveStores >= 2) {
-        LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
-        unsigned FirstStoreAS = FirstInChain->getAddressSpace();
-        unsigned FirstStoreAlign = FirstInChain->getAlignment();
-        unsigned NumStoresToMerge = 1;
-        for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
-          // Find a legal type for the vector store.
-          unsigned Elts = (i + 1) * NumMemElts;
-          EVT Ty =
-              EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
-          bool IsFast = false;
-
-          // Break early when size is too large to be legal.
-          if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
-            break;
+    MadeChange |= mergeStoresOfConstantsOrVecElts(
+        StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc);
 
-          if (TLI.isTypeLegal(Ty) &&
-              TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
-              TLI.allowsMemoryAccess(Context, DL, Ty,
-                                     *FirstInChain->getMemOperand(), &IsFast) &&
-              IsFast)
-            NumStoresToMerge = i + 1;
-        }
+    // Remove merged stores for next iteration.
+    StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+    NumConsecutiveStores -= NumElem;
+  }
+  return MadeChange;
+}
 
-        // Check if we found a legal integer type creating a meaningful
-        // merge.
-        if (NumStoresToMerge < 2) {
-          // We know that candidate stores are in order and of correct
-          // shape. While there is no mergeable sequence from the
-          // beginning one may start later in the sequence. The only
-          // reason a merge of size N could have failed where another of
-          // the same size would not have, is if the alignment has
-          // improved. Drop as many candidates as we can here.
-          unsigned NumSkip = 1;
-          while (
-              (NumSkip < NumConsecutiveStores) &&
-              (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
-            NumSkip++;
-
-          StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
-          NumConsecutiveStores -= NumSkip;
-          continue;
-        }
+bool DAGCombiner::tryStoreMergeOfExtracts(
+    SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
+    EVT MemVT, SDNode *RootNode) {
+  LLVMContext &Context = *DAG.getContext();
+  const DataLayout &DL = DAG.getDataLayout();
+  unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
+  bool MadeChange = false;
 
-        // Check that we can merge these candidates without causing a cycle.
-        if (!checkMergeStoreCandidatesForDependencies(
-                StoreNodes, NumStoresToMerge, RootNode)) {
-          StoreNodes.erase(StoreNodes.begin(),
-                           StoreNodes.begin() + NumStoresToMerge);
-          NumConsecutiveStores -= NumStoresToMerge;
-          continue;
-        }
+  // Loop on Consecutive Stores on success.
+  while (NumConsecutiveStores >= 2) {
+    LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+    unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+    unsigned FirstStoreAlign = FirstInChain->getAlignment();
+    unsigned NumStoresToMerge = 1;
+    for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
+      // Find a legal type for the vector store.
+      unsigned Elts = (i + 1) * NumMemElts;
+      EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
+      bool IsFast = false;
 
-        MadeChange |= mergeStoresOfConstantsOrVecElts(
-            StoreNodes, MemVT, NumStoresToMerge, false, true, false);
+      // Break early when size is too large to be legal.
+      if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
+        break;
 
-        StoreNodes.erase(StoreNodes.begin(),
-                         StoreNodes.begin() + NumStoresToMerge);
-        NumConsecutiveStores -= NumStoresToMerge;
-      }
+      if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
+          TLI.allowsMemoryAccess(Context, DL, Ty,
+                                 *FirstInChain->getMemOperand(), &IsFast) &&
+          IsFast)
+        NumStoresToMerge = i + 1;
+    }
+
+    // Check if we found a legal integer type creating a meaningful
+    // merge.
+    if (NumStoresToMerge < 2) {
+      // We know that candidate stores are in order and of correct
+      // shape. While there is no mergeable sequence from the
+      // beginning one may start later in the sequence. The only
+      // reason a merge of size N could have failed where another of
+      // the same size would not have, is if the alignment has
+      // improved. Drop as many candidates as we can here.
+      unsigned NumSkip = 1;
+      while ((NumSkip < NumConsecutiveStores) &&
+             (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
+        NumSkip++;
+
+      StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
+      NumConsecutiveStores -= NumSkip;
       continue;
     }
 
-    // Below we handle the case of multiple consecutive stores that
-    // come from multiple consecutive loads. We merge them into a single
-    // wide load and a single wide store.
-    assert(StoreSrc == StoreSource::Load && "Expected load source for store");
+    // Check that we can merge these candidates without causing a cycle.
+    if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
+                                                  RootNode)) {
+      StoreNodes.erase(StoreNodes.begin(),
+                       StoreNodes.begin() + NumStoresToMerge);
+      NumConsecutiveStores -= NumStoresToMerge;
+      continue;
+    }
 
-    // Look for load nodes which are used by the stored values.
-    SmallVector<MemOpLink, 8> LoadNodes;
+    MadeChange |= mergeStoresOfConstantsOrVecElts(
+        StoreNodes, MemVT, NumStoresToMerge, false, true, false);
 
-    // Find acceptable loads. Loads need to have the same chain (token factor),
-    // must not be zext, volatile, indexed, and they must be consecutive.
-    BaseIndexOffset LdBasePtr;
+    StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
+    NumConsecutiveStores -= NumStoresToMerge;
+  }
+  return MadeChange;
+}
 
-    for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
-      StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
-      SDValue Val = peekThroughBitcasts(St->getValue());
-      LoadSDNode *Ld = cast<LoadSDNode>(Val);
-
-      BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
-      // If this is not the first ptr that we check.
-      int64_t LdOffset = 0;
-      if (LdBasePtr.getBase().getNode()) {
-        // The base ptr must be the same.
-        if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
-          break;
-      } else {
-        // Check that all other base pointers are the same as this one.
-        LdBasePtr = LdPtr;
-      }
+bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
+                                       unsigned NumConsecutiveStores, EVT MemVT,
+                                       SDNode *RootNode, bool AllowVectors,
+                                       bool IsNonTemporalStore,
+                                       bool IsNonTemporalLoad) {
+  LLVMContext &Context = *DAG.getContext();
+  const DataLayout &DL = DAG.getDataLayout();
+  int64_t ElementSizeBytes = MemVT.getStoreSize();
+  unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
+  bool MadeChange = false;
+
+  int64_t StartAddress = StoreNodes[0].OffsetFromBase;
+
+  // Look for load nodes which are used by the stored values.
+  SmallVector<MemOpLink, 8> LoadNodes;
 
-      // We found a potential memory operand to merge.
-      LoadNodes.push_back(MemOpLink(Ld, LdOffset));
+  // Find acceptable loads. Loads need to have the same chain (token factor),
+  // must not be zext, volatile, indexed, and they must be consecutive.
+  BaseIndexOffset LdBasePtr;
+
+  for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
+    StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+    SDValue Val = peekThroughBitcasts(St->getValue());
+    LoadSDNode *Ld = cast<LoadSDNode>(Val);
+
+    BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
+    // If this is not the first ptr that we check.
+    int64_t LdOffset = 0;
+    if (LdBasePtr.getBase().getNode()) {
+      // The base ptr must be the same.
+      if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
+        break;
+    } else {
+      // Check that all other base pointers are the same as this one.
+      LdBasePtr = LdPtr;
     }
 
-    while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
-      // If we have load/store pair instructions and we only have two values,
-      // don't bother merging.
-      Align RequiredAlignment;
-      if (LoadNodes.size() == 2 &&
-          TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
-          StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
-        StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
-        LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
+    // We found a potential memory operand to merge.
+    LoadNodes.push_back(MemOpLink(Ld, LdOffset));
+  }
+
+  while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
+    // If we have load/store pair instructions and we only have two values,
+    // don't bother merging.
+    Align RequiredAlignment;
+    if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
+        StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
+      StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
+      LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
+      break;
+    }
+    LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+    unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+    unsigned FirstStoreAlign = FirstInChain->getAlignment();
+    LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
+    unsigned FirstLoadAlign = FirstLoad->getAlignment();
+
+    // Scan the memory operations on the chain and find the first
+    // non-consecutive load memory address. These variables hold the index in
+    // the store node array.
+
+    unsigned LastConsecutiveLoad = 1;
+
+    // This variable refers to the size and not index in the array.
+    unsigned LastLegalVectorType = 1;
+    unsigned LastLegalIntegerType = 1;
+    bool isDereferenceable = true;
+    bool DoIntegerTruncate = false;
+    StartAddress = LoadNodes[0].OffsetFromBase;
+    SDValue FirstChain = FirstLoad->getChain();
+    for (unsigned i = 1; i < LoadNodes.size(); ++i) {
+      // All loads must share the same chain.
+      if (LoadNodes[i].MemNode->getChain() != FirstChain)
         break;
-      }
-      LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
-      unsigned FirstStoreAS = FirstInChain->getAddressSpace();
-      unsigned FirstStoreAlign = FirstInChain->getAlignment();
-      LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
-      unsigned FirstLoadAlign = FirstLoad->getAlignment();
-
-      // Scan the memory operations on the chain and find the first
-      // non-consecutive load memory address. These variables hold the index in
-      // the store node array.
-
-      unsigned LastConsecutiveLoad = 1;
-
-      // This variable refers to the size and not index in the array.
-      unsigned LastLegalVectorType = 1;
-      unsigned LastLegalIntegerType = 1;
-      bool isDereferenceable = true;
-      bool DoIntegerTruncate = false;
-      StartAddress = LoadNodes[0].OffsetFromBase;
-      SDValue FirstChain = FirstLoad->getChain();
-      for (unsigned i = 1; i < LoadNodes.size(); ++i) {
-        // All loads must share the same chain.
-        if (LoadNodes[i].MemNode->getChain() != FirstChain)
-          break;
 
-        int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
-        if (CurrAddress - StartAddress != (ElementSizeBytes * i))
-          break;
-        LastConsecutiveLoad = i;
+      int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
+      if (CurrAddress - StartAddress != (ElementSizeBytes * i))
+        break;
+      LastConsecutiveLoad = i;
 
-        if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
-          isDereferenceable = false;
+      if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
+        isDereferenceable = false;
 
-        // Find a legal type for the vector store.
-        unsigned Elts = (i + 1) * NumMemElts;
-        EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
+      // Find a legal type for the vector store.
+      unsigned Elts = (i + 1) * NumMemElts;
+      EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
 
-        // Break early when size is too large to be legal.
-        if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
-          break;
+      // Break early when size is too large to be legal.
+      if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
+        break;
 
-        bool IsFastSt = false;
-        bool IsFastLd = false;
-        if (TLI.isTypeLegal(StoreTy) &&
-            TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
-            TLI.allowsMemoryAccess(Context, DL, StoreTy,
-                                   *FirstInChain->getMemOperand(), &IsFastSt) &&
-            IsFastSt &&
-            TLI.allowsMemoryAccess(Context, DL, StoreTy,
-                                   *FirstLoad->getMemOperand(), &IsFastLd) &&
-            IsFastLd) {
-          LastLegalVectorType = i + 1;
-        }
+      bool IsFastSt = false;
+      bool IsFastLd = false;
+      if (TLI.isTypeLegal(StoreTy) &&
+          TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
+          TLI.allowsMemoryAccess(Context, DL, StoreTy,
+                                 *FirstInChain->getMemOperand(), &IsFastSt) &&
+          IsFastSt &&
+          TLI.allowsMemoryAccess(Context, DL, StoreTy,
+                                 *FirstLoad->getMemOperand(), &IsFastLd) &&
+          IsFastLd) {
+        LastLegalVectorType = i + 1;
+      }
 
-        // Find a legal type for the integer store.
-        unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
-        StoreTy = EVT::getIntegerVT(Context, SizeInBits);
-        if (TLI.isTypeLegal(StoreTy) &&
-            TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
+      // Find a legal type for the integer store.
+      unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
+      StoreTy = EVT::getIntegerVT(Context, SizeInBits);
+      if (TLI.isTypeLegal(StoreTy) &&
+          TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
+          TLI.allowsMemoryAccess(Context, DL, StoreTy,
+                                 *FirstInChain->getMemOperand(), &IsFastSt) &&
+          IsFastSt &&
+          TLI.allowsMemoryAccess(Context, DL, StoreTy,
+                                 *FirstLoad->getMemOperand(), &IsFastLd) &&
+          IsFastLd) {
+        LastLegalIntegerType = i + 1;
+        DoIntegerTruncate = false;
+        // Or check whether a truncstore and extload is legal.
+      } else if (TLI.getTypeAction(Context, StoreTy) ==
+                 TargetLowering::TypePromoteInteger) {
+        EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
+        if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
+            TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
+            TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
+            TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
+            TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
             TLI.allowsMemoryAccess(Context, DL, StoreTy,
                                    *FirstInChain->getMemOperand(), &IsFastSt) &&
             IsFastSt &&
@@ -16623,146 +16629,211 @@ bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
                                    *FirstLoad->getMemOperand(), &IsFastLd) &&
             IsFastLd) {
           LastLegalIntegerType = i + 1;
-          DoIntegerTruncate = false;
-          // Or check whether a truncstore and extload is legal.
-        } else if (TLI.getTypeAction(Context, StoreTy) ==
-                   TargetLowering::TypePromoteInteger) {
-          EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
-          if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
-              TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
-              TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy,
-                                 StoreTy) &&
-              TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy,
-                                 StoreTy) &&
-              TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
-              TLI.allowsMemoryAccess(Context, DL, StoreTy,
-                                     *FirstInChain->getMemOperand(),
-                                     &IsFastSt) &&
-              IsFastSt &&
-              TLI.allowsMemoryAccess(Context, DL, StoreTy,
-                                     *FirstLoad->getMemOperand(), &IsFastLd) &&
-              IsFastLd) {
-            LastLegalIntegerType = i + 1;
-            DoIntegerTruncate = true;
-          }
+          DoIntegerTruncate = true;
         }
       }
+    }
 
-      // Only use vector types if the vector type is larger than the integer
-      // type. If they are the same, use integers.
-      bool UseVectorTy =
-          LastLegalVectorType > LastLegalIntegerType && AllowVectors;
-      unsigned LastLegalType =
-          std::max(LastLegalVectorType, LastLegalIntegerType);
-
-      // We add +1 here because the LastXXX variables refer to location while
-      // the NumElem refers to array/index size.
-      unsigned NumElem =
-          std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
-      NumElem = std::min(LastLegalType, NumElem);
-
-      if (NumElem < 2) {
-        // We know that candidate stores are in order and of correct
-        // shape. While there is no mergeable sequence from the
-        // beginning one may start later in the sequence. The only
-        // reason a merge of size N could have failed where another of
-        // the same size would not have is if the alignment or either
-        // the load or store has improved. Drop as many candidates as we
-        // can here.
-        unsigned NumSkip = 1;
-        while ((NumSkip < LoadNodes.size()) &&
-               (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
-               (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
-          NumSkip++;
-        StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
-        LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
-        NumConsecutiveStores -= NumSkip;
-        continue;
-      }
+    // Only use vector types if the vector type is larger than the integer
+    // type. If they are the same, use integers.
+    bool UseVectorTy =
+        LastLegalVectorType > LastLegalIntegerType && AllowVectors;
+    unsigned LastLegalType =
+        std::max(LastLegalVectorType, LastLegalIntegerType);
+
+    // We add +1 here because the LastXXX variables refer to location while
+    // the NumElem refers to array/index size.
+    unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
+    NumElem = std::min(LastLegalType, NumElem);
+
+    if (NumElem < 2) {
+      // We know that candidate stores are in order and of correct
+      // shape. While there is no mergeable sequence from the
+      // beginning one may start later in the sequence. The only
+      // reason a merge of size N could have failed where another of
+      // the same size would not have is if the alignment or either
+      // the load or store has improved. Drop as many candidates as we
+      // can here.
+      unsigned NumSkip = 1;
+      while ((NumSkip < LoadNodes.size()) &&
+             (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
+             (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
+        NumSkip++;
+      StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
+      LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
+      NumConsecutiveStores -= NumSkip;
+      continue;
+    }
 
-      // Check that we can merge these candidates without causing a cycle.
-      if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
-                                                    RootNode)) {
-        StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
-        LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
-        NumConsecutiveStores -= NumElem;
-        continue;
-      }
+    // Check that we can merge these candidates without causing a cycle.
+    if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
+                                                  RootNode)) {
+      StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+      LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
+      NumConsecutiveStores -= NumElem;
+      continue;
+    }
 
-      // Find if it is better to use vectors or integers to load and store
-      // to memory.
-      EVT JointMemOpVT;
-      if (UseVectorTy) {
-        // Find a legal type for the vector store.
-        unsigned Elts = NumElem * NumMemElts;
-        JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
-      } else {
-        unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
-        JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
-      }
+    // Find if it is better to use vectors or integers to load and store
+    // to memory.
+    EVT JointMemOpVT;
+    if (UseVectorTy) {
+      // Find a legal type for the vector store.
+      unsigned Elts = NumElem * NumMemElts;
+      JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
+    } else {
+      unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
+      JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
+    }
+
+    SDLoc LoadDL(LoadNodes[0].MemNode);
+    SDLoc StoreDL(StoreNodes[0].MemNode);
+
+    // The merged loads are required to have the same incoming chain, so
+    // using the first's chain is acceptable.
+
+    SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
+    AddToWorklist(NewStoreChain.getNode());
+
+    MachineMemOperand::Flags LdMMOFlags =
+        isDereferenceable ? MachineMemOperand::MODereferenceable
+                          : MachineMemOperand::MONone;
+    if (IsNonTemporalLoad)
+      LdMMOFlags |= MachineMemOperand::MONonTemporal;
+
+    MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
+                                              ? MachineMemOperand::MONonTemporal
+                                              : MachineMemOperand::MONone;
+
+    SDValue NewLoad, NewStore;
+    if (UseVectorTy || !DoIntegerTruncate) {
+      NewLoad = DAG.getLoad(
+          JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
+          FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
+      NewStore = DAG.getStore(
+          NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
+          FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
+    } else { // This must be the truncstore/extload case
+      EVT ExtendedTy =
+          TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
+      NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
+                               FirstLoad->getChain(), FirstLoad->getBasePtr(),
+                               FirstLoad->getPointerInfo(), JointMemOpVT,
+                               FirstLoadAlign, LdMMOFlags);
+      NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
+                                   FirstInChain->getBasePtr(),
+                                   FirstInChain->getPointerInfo(), JointMemOpVT,
+                                   FirstInChain->getAlignment(),
+                                   FirstInChain->getMemOperand()->getFlags());
+    }
+
+    // Transfer chain users from old loads to the new load.
+    for (unsigned i = 0; i < NumElem; ++i) {
+      LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
+      DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
+                                    SDValue(NewLoad.getNode(), 1));
+    }
+
+    // Replace all stores with the new store. Recursively remove corresponding
+    // values if they are no longer used.
+    for (unsigned i = 0; i < NumElem; ++i) {
+      SDValue Val = StoreNodes[i].MemNode->getOperand(1);
+      CombineTo(StoreNodes[i].MemNode, NewStore);
+      if (Val.getNode()->use_empty())
+        recursivelyDeleteUnusedNodes(Val.getNode());
+    }
+
+    MadeChange = true;
+    StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+    LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
+    NumConsecutiveStores -= NumElem;
+  }
+  return MadeChange;
+}
 
-      SDLoc LoadDL(LoadNodes[0].MemNode);
-      SDLoc StoreDL(StoreNodes[0].MemNode);
-
-      // The merged loads are required to have the same incoming chain, so
-      // using the first's chain is acceptable.
-
-      SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
-      AddToWorklist(NewStoreChain.getNode());
-
-      MachineMemOperand::Flags LdMMOFlags =
-          isDereferenceable ? MachineMemOperand::MODereferenceable
-                            : MachineMemOperand::MONone;
-      if (IsNonTemporalLoad)
-        LdMMOFlags |= MachineMemOperand::MONonTemporal;
-
-      MachineMemOperand::Flags StMMOFlags =
-          IsNonTemporalStore ? MachineMemOperand::MONonTemporal
-                             : MachineMemOperand::MONone;
-
-      SDValue NewLoad, NewStore;
-      if (UseVectorTy || !DoIntegerTruncate) {
-        NewLoad =
-            DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
-                        FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
-                        FirstLoadAlign, LdMMOFlags);
-        NewStore = DAG.getStore(
-            NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
-            FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
-      } else { // This must be the truncstore/extload case
-        EVT ExtendedTy =
-            TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
-        NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
-                                 FirstLoad->getChain(), FirstLoad->getBasePtr(),
-                                 FirstLoad->getPointerInfo(), JointMemOpVT,
-                                 FirstLoadAlign, LdMMOFlags);
-        NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
-                                     FirstInChain->getBasePtr(),
-                                     FirstInChain->getPointerInfo(),
-                                     JointMemOpVT, FirstInChain->getAlignment(),
-                                     FirstInChain->getMemOperand()->getFlags());
-      }
+bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
+  if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
+    return false;
 
-      // Transfer chain users from old loads to the new load.
-      for (unsigned i = 0; i < NumElem; ++i) {
-        LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
-        DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
-                                      SDValue(NewLoad.getNode(), 1));
-      }
+  // TODO: Extend this function to merge stores of scalable vectors.
+  // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
+  // store since we know <vscale x 16 x i8> is exactly twice as large as
+  // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
+  EVT MemVT = St->getMemoryVT();
+  if (MemVT.isScalableVector())
+    return false;
+  if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
+    return false;
 
-      // Replace all stores with the new store. Recursively remove corresponding
-      // values if they are no longer used.
-      for (unsigned i = 0; i < NumElem; ++i) {
-        SDValue Val = StoreNodes[i].MemNode->getOperand(1);
-        CombineTo(StoreNodes[i].MemNode, NewStore);
-        if (Val.getNode()->use_empty())
-          recursivelyDeleteUnusedNodes(Val.getNode());
-      }
+  // This function cannot currently deal with non-byte-sized memory sizes.
+  int64_t ElementSizeBytes = MemVT.getStoreSize();
+  if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
+    return false;
 
-      MadeChange = true;
-      StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
-      LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
-      NumConsecutiveStores -= NumElem;
+  // Do not bother looking at stored values that are not constants, loads, or
+  // extracted vector elements.
+  SDValue StoredVal = peekThroughBitcasts(St->getValue());
+  const StoreSource StoreSrc = getStoreSource(StoredVal);
+  if (StoreSrc == StoreSource::Unknown)
+    return false;
+
+  SmallVector<MemOpLink, 8> StoreNodes;
+  SDNode *RootNode;
+  // Find potential store merge candidates by searching through chain sub-DAG
+  getStoreMergeCandidates(St, StoreNodes, RootNode);
+
+  // Check if there is anything to merge.
+  if (StoreNodes.size() < 2)
+    return false;
+
+  // Sort the memory operands according to their distance from the
+  // base pointer.
+  llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
+    return LHS.OffsetFromBase < RHS.OffsetFromBase;
+  });
+
+  bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
+      Attribute::NoImplicitFloat);
+  bool IsNonTemporalStore = St->isNonTemporal();
+  bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
+                           cast<LoadSDNode>(StoredVal)->isNonTemporal();
+
+  // Store Merge attempts to merge the lowest stores. This generally
+  // works out as if successful, as the remaining stores are checked
+  // after the first collection of stores is merged. However, in the
+  // case that a non-mergeable store is found first, e.g., {p[-2],
+  // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
+  // mergeable cases. To prevent this, we prune such stores from the
+  // front of StoreNodes here.
+  bool MadeChange = false;
+  while (StoreNodes.size() > 1) {
+    unsigned NumConsecutiveStores =
+        getConsecutiveStores(StoreNodes, ElementSizeBytes);
+    // There are no more stores in the list to examine.
+    if (NumConsecutiveStores == 0)
+      return MadeChange;
+
+    // We have at least 2 consecutive stores. Try to merge them.
+    assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
+    switch (StoreSrc) {
+    case StoreSource::Constant:
+      MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
+                                             MemVT, RootNode, AllowVectors);
+      break;
+
+    case StoreSource::Extract:
+      MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
+                                            MemVT, RootNode);
+      break;
+
+    case StoreSource::Load:
+      MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
+                                         MemVT, RootNode, AllowVectors,
+                                         IsNonTemporalStore, IsNonTemporalLoad);
+      break;
+
+    default:
+      llvm_unreachable("Unhandled store source type");
     }
   }
   return MadeChange;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index f6c29ac1d2fa92..755d5ef5cb10c1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -239,7 +239,7 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
 unsigned GCNTTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
   // The concept of vector registers doesn't really exist. Some packed vector
   // operations operate on the normal 32-bit registers.
-  return 256;
+  return MaxVGPRs;
 }
 
 unsigned GCNTTIImpl::getNumberOfRegisters(bool Vec) const {
@@ -248,6 +248,13 @@ unsigned GCNTTIImpl::getNumberOfRegisters(bool Vec) const {
   return getHardwareNumberOfRegisters(Vec) >> 3;
 }
 
+unsigned GCNTTIImpl::getNumberOfRegisters(unsigned RCID) const {
+  const SIRegisterInfo *TRI = ST->getRegisterInfo();
+  const TargetRegisterClass *RC = TRI->getRegClass(RCID);
+  unsigned NumVGPRs = (TRI->getRegSizeInBits(*RC) + 31) / 32;
+  return getHardwareNumberOfRegisters(false) / NumVGPRs;
+}
+
 unsigned GCNTTIImpl::getRegisterBitWidth(bool Vector) const {
   return 32;
 }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 508ed061e935c0..b8a027c79bfc23 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -74,6 +74,7 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
   AMDGPUTTIImpl CommonTTI;
   bool IsGraphicsShader;
   bool HasFP32Denormals;
+  unsigned MaxVGPRs;
 
   const FeatureBitset InlineFeatureIgnoreList = {
     // Codegen control options which don't matter.
@@ -133,7 +134,11 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
       TLI(ST->getTargetLowering()),
       CommonTTI(TM, F),
       IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())),
-      HasFP32Denormals(AMDGPU::SIModeRegisterDefaults(F).allFP32Denormals()) {}
+      HasFP32Denormals(AMDGPU::SIModeRegisterDefaults(F).allFP32Denormals()),
+      MaxVGPRs(ST->getMaxNumVGPRs(
+          std::max(ST->getWavesPerEU(F).first,
+                   ST->getWavesPerEUForWorkGroup(
+                       ST->getFlatWorkGroupSizes(F).second)))) {}
 
   bool hasBranchDivergence() { return true; }
   bool useGPUDivergenceAnalysis() const;
@@ -148,6 +153,7 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
 
   unsigned getHardwareNumberOfRegisters(bool Vector) const;
   unsigned getNumberOfRegisters(bool Vector) const;
+  unsigned getNumberOfRegisters(unsigned RCID) const;
   unsigned getRegisterBitWidth(bool Vector) const;
   unsigned getMinVectorRegisterBitWidth() const;
   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 59cb2b994a4b3b..dbaf221db9fc99 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -46,7 +46,6 @@ static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
   case PPC::fixup_ppc_half16ds:
     return Value & 0xfffc;
   case PPC::fixup_ppc_pcrel34:
-  case PPC::fixup_ppc_imm34:
     return Value & 0x3ffffffff;
   }
 }
@@ -69,7 +68,6 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
   case PPC::fixup_ppc_br24_notoc:
     return 4;
   case PPC::fixup_ppc_pcrel34:
-  case PPC::fixup_ppc_imm34:
   case FK_Data_8:
     return 8;
   case PPC::fixup_ppc_nofixup:
@@ -102,7 +100,6 @@ class PPCAsmBackend : public MCAsmBackend {
       { "fixup_ppc_half16",       0,     16,   0 },
       { "fixup_ppc_half16ds",     0,     14,   0 },
       { "fixup_ppc_pcrel34",     0,      34,   MCFixupKindInfo::FKF_IsPCRel },
-      { "fixup_ppc_imm34",       0,      34,   0 },
       { "fixup_ppc_nofixup",      0,      0,   0 }
     };
     const static MCFixupKindInfo InfosLE[PPC::NumTargetFixupKinds] = {
@@ -115,7 +112,6 @@ class PPCAsmBackend : public MCAsmBackend {
       { "fixup_ppc_half16",      0,      16,   0 },
       { "fixup_ppc_half16ds",    2,      14,   0 },
       { "fixup_ppc_pcrel34",     0,      34,   MCFixupKindInfo::FKF_IsPCRel },
-      { "fixup_ppc_imm34",       0,      34,   0 },
       { "fixup_ppc_nofixup",     0,       0,   0 }
     };
 
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index 1af08ec5539d35..d8b3301e97f129 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -409,9 +409,6 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
         break;
       }
       break;
-    case PPC::fixup_ppc_imm34:
-      llvm_unreachable("Unsupported Modifier for fixup_ppc_imm34.");
-      break;
     case FK_Data_8:
       switch (Modifier) {
       default: llvm_unreachable("Unsupported Modifier");
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
index 73292f7b7938f7..2fb8947fd4e0f0 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
@@ -43,9 +43,6 @@ enum Fixups {
   // A 34-bit fixup corresponding to PC-relative paddi.
   fixup_ppc_pcrel34,
 
-  // A 34-bit fixup corresponding to Non-PC-relative paddi.
-  fixup_ppc_imm34,
-
   /// Not a true fixup, but ties a symbol to a call to __tls_get_addr for the
   /// TLS general and local dynamic models, or inserts the thread-pointer
   /// register number.
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 8c0e0a80b1e2c7..fb65e7320f2b0c 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -104,36 +104,20 @@ unsigned PPCMCCodeEmitter::getImm16Encoding(const MCInst &MI, unsigned OpNo,
   return 0;
 }
 
-uint64_t PPCMCCodeEmitter::getImm34Encoding(const MCInst &MI, unsigned OpNo,
-                                            SmallVectorImpl<MCFixup> &Fixups,
-                                            const MCSubtargetInfo &STI,
-                                            MCFixupKind Fixup) const {
+uint64_t
+PPCMCCodeEmitter::getImm34Encoding(const MCInst &MI, unsigned OpNo,
+                                   SmallVectorImpl<MCFixup> &Fixups,
+                                   const MCSubtargetInfo &STI) const {
   const MCOperand &MO = MI.getOperand(OpNo);
-  assert(!MO.isReg() && "Not expecting a register for this operand.");
-  if (MO.isImm())
+  if (MO.isReg() || MO.isImm())
     return getMachineOpValue(MI, MO, Fixups, STI);
 
   // Add a fixup for the immediate field.
-  Fixups.push_back(MCFixup::create(0, MO.getExpr(), Fixup));
+  Fixups.push_back(MCFixup::create(0, MO.getExpr(),
+                                   (MCFixupKind)PPC::fixup_ppc_pcrel34));
   return 0;
 }
 
-uint64_t
-PPCMCCodeEmitter::getImm34EncodingNoPCRel(const MCInst &MI, unsigned OpNo,
-                                          SmallVectorImpl<MCFixup> &Fixups,
-                                          const MCSubtargetInfo &STI) const {
-  return getImm34Encoding(MI, OpNo, Fixups, STI,
-                          (MCFixupKind)PPC::fixup_ppc_imm34);
-}
-
-uint64_t
-PPCMCCodeEmitter::getImm34EncodingPCRel(const MCInst &MI, unsigned OpNo,
-                                        SmallVectorImpl<MCFixup> &Fixups,
-                                        const MCSubtargetInfo &STI) const {
-  return getImm34Encoding(MI, OpNo, Fixups, STI,
-                          (MCFixupKind)PPC::fixup_ppc_pcrel34);
-}
-
 unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo,
                                             SmallVectorImpl<MCFixup> &Fixups,
                                             const MCSubtargetInfo &STI) const {
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
index 4504cc6a7405ed..588aa76bd80642 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
@@ -52,14 +52,7 @@ class PPCMCCodeEmitter : public MCCodeEmitter {
                             const MCSubtargetInfo &STI) const;
   uint64_t getImm34Encoding(const MCInst &MI, unsigned OpNo,
                             SmallVectorImpl<MCFixup> &Fixups,
-                            const MCSubtargetInfo &STI,
-                            MCFixupKind Fixup) const;
-  uint64_t getImm34EncodingNoPCRel(const MCInst &MI, unsigned OpNo,
-                                   SmallVectorImpl<MCFixup> &Fixups,
-                                   const MCSubtargetInfo &STI) const;
-  uint64_t getImm34EncodingPCRel(const MCInst &MI, unsigned OpNo,
-                                 SmallVectorImpl<MCFixup> &Fixups,
-                                 const MCSubtargetInfo &STI) const;
+                            const MCSubtargetInfo &STI) const;
   unsigned getMemRIEncoding(const MCInst &MI, unsigned OpNo,
                             SmallVectorImpl<MCFixup> &Fixups,
                             const MCSubtargetInfo &STI) const;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 39a90bf9b346ec..673ab63039cf72 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -757,13 +757,7 @@ def PPCS34ImmAsmOperand : AsmOperandClass {
 }
 def s34imm : Operand<i64> {
   let PrintMethod = "printS34ImmOperand";
-  let EncoderMethod = "getImm34EncodingNoPCRel";
-  let ParserMatchClass = PPCS34ImmAsmOperand;
-  let DecoderMethod = "decodeSImmOperand<34>";
-}
-def s34imm_pcrel : Operand<i64> {
-  let PrintMethod = "printS34ImmOperand";
-  let EncoderMethod = "getImm34EncodingPCRel";
+  let EncoderMethod = "getImm34Encoding";
   let ParserMatchClass = PPCS34ImmAsmOperand;
   let DecoderMethod = "decodeSImmOperand<34>";
 }
diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
index 91bb912e572697..2c21d0a175ad48 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -432,7 +432,7 @@ let Predicates = [PrefixInstrs] in {
   let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
     defm PADDI8 :
       MLS_DForm_R_SI34_RTA5_p<14, (outs g8rc:$RT), (ins g8rc:$RA, s34imm:$SI),
-                              (ins immZero:$RA, s34imm_pcrel:$SI),
+                              (ins immZero:$RA, s34imm:$SI),
                               "paddi $RT, $RA, $SI", IIC_LdStLFD>;
     let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
       def PLI8 : MLS_DForm_SI34_RT5<14, (outs g8rc:$RT),
@@ -442,7 +442,7 @@ let Predicates = [PrefixInstrs] in {
   }
   defm PADDI :
     MLS_DForm_R_SI34_RTA5_p<14, (outs gprc:$RT), (ins gprc:$RA, s34imm:$SI),
-                            (ins immZero:$RA, s34imm_pcrel:$SI),
+                            (ins immZero:$RA, s34imm:$SI),
                             "paddi $RT, $RA, $SI", IIC_LdStLFD>;
   let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
     def PLI : MLS_DForm_SI34_RT5<14, (outs gprc:$RT),
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index e810b3de25bc85..d1c1e541882516 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -123,13 +123,8 @@ STATISTIC(NumReassoc  , "Number of reassociations");
 DEBUG_COUNTER(VisitCounter, "instcombine-visit",
               "Controls which instructions are visited");
 
-// FIXME: these limits eventually should be as low as 2.
 static constexpr unsigned InstCombineDefaultMaxIterations = 1000;
-#ifndef NDEBUG
-static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold = 100;
-#else
 static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold = 1000;
-#endif
 
 static cl::opt<bool>
 EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"),
diff --git a/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
index d320f488c5c511..c81efd77aa5ff3 100644
--- a/llvm/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
@@ -896,18 +896,21 @@ class PredicateInfoAnnotatedWriter : public AssemblyAnnotationWriter {
         PB->From->printAsOperand(OS);
         OS << ",";
         PB->To->printAsOperand(OS);
-        OS << "] }\n";
+        OS << "]";
       } else if (const auto *PS = dyn_cast<PredicateSwitch>(PI)) {
         OS << "; switch predicate info { CaseValue: " << *PS->CaseValue
            << " Switch:" << *PS->Switch << " Edge: [";
         PS->From->printAsOperand(OS);
         OS << ",";
         PS->To->printAsOperand(OS);
-        OS << "] }\n";
+        OS << "]";
       } else if (const auto *PA = dyn_cast<PredicateAssume>(PI)) {
         OS << "; assume predicate info {"
-           << " Comparison:" << *PA->Condition << " }\n";
+           << " Comparison:" << *PA->Condition;
       }
+      OS << ", RenamedOp: ";
+      PI->RenamedOp->printAsOperand(OS, false);
+      OS << " }\n";
     }
   }
 };
diff --git a/llvm/test/MC/PowerPC/ppc64-errors-emit-obj.s b/llvm/test/MC/PowerPC/ppc64-errors-emit-obj.s
deleted file mode 100644
index 0d2c879380e0a6..00000000000000
--- a/llvm/test/MC/PowerPC/ppc64-errors-emit-obj.s
+++ /dev/null
@@ -1,7 +0,0 @@
-# RUN: not --crash llvm-mc -triple powerpc64-- --filetype=obj < %s 2> %t
-# RUN: FileCheck < %t %s
-# RUN: not --crash llvm-mc -triple powerpc64le-- --filetype=obj < %s 2> %t
-# RUN: FileCheck < %t %s
-
-# CHECK: Unsupported Modifier for fixup_ppc_imm34.
-paddi 3, 13, symbol@toc, 0
diff --git a/llvm/test/Reduce/remove-attributes-from-intrinsic-like-functions.ll b/llvm/test/Reduce/remove-attributes-from-intrinsic-like-functions.ll
new file mode 100644
index 00000000000000..60df12e94feb26
--- /dev/null
+++ b/llvm/test/Reduce/remove-attributes-from-intrinsic-like-functions.ll
@@ -0,0 +1,40 @@
+; Just because a function is named like an intrinsic does not mean we should skip it's attributes.
+;
+; RUN: llvm-reduce --test FileCheck --test-arg --check-prefixes=CHECK-ALL,CHECK-INTERESTINGNESS --test-arg %s --test-arg --input-file %s -o %t
+; RUN: cat %t | FileCheck --check-prefixes=CHECK-ALL,CHECK-FINAL %s
+
+; CHECK-ALL: declare i32 @llvm.not.really.an.intrinsic(i32, i32) #0
+declare i32 @llvm.not.really.an.intrinsic(i32, i32) #0
+
+define i32 @t(i32 %a) {
+; CHECK-ALL-LABEL: @t(
+
+; CHECK-INTERESTINGNESS: %r =
+; CHECK-INTERESTINGNESS-SAME: call
+; CHECK-INTERESTINGNESS-SAME: "arg0"
+; CHECK-INTERESTINGNESS-SAME: i32 @llvm.not.really.an.intrinsic(i32
+; CHECK-INTERESTINGNESS-SAME: "arg3"
+; CHECK-INTERESTINGNESS-SAME: %a
+; CHECK-INTERESTINGNESS-SAME: i32
+; CHECK-INTERESTINGNESS-SAME: %a
+; CHECK-INTERESTINGNESS-SAME: #1
+
+; CHECK-FINAL: %r = call "arg0" i32 @llvm.not.really.an.intrinsic(i32 "arg3" %a, i32 %a) #1
+; CHECK-ALL: ret i32 %r
+
+  %r = call "arg0" "arg1" i32 @llvm.not.really.an.intrinsic(i32 "arg2" "arg3" %a, i32 %a) "arg4" "arg5"
+  ret i32 %r
+}
+
+; CHECK-INTERESTINGNESS: attributes #0 = {
+; CHECK-INTERESTINGNESS-SAME: "arg6"
+
+; CHECK-INTERESTINGNESS: attributes #1 = {
+; CHECK-INTERESTINGNESS-SAME: "arg4"
+
+; CHECK-FINAL: attributes #0 = { "arg6" }
+; CHECK-FINAL: attributes #1 = { "arg4" }
+
+; CHECK-ALL-NOT: attributes #
+
+attributes #0 = { "arg6" "arg7" }
diff --git a/llvm/test/Reduce/remove-attributes-from-intrinsics.ll b/llvm/test/Reduce/remove-attributes-from-intrinsics.ll
new file mode 100644
index 00000000000000..7a8a8f0eb1146c
--- /dev/null
+++ b/llvm/test/Reduce/remove-attributes-from-intrinsics.ll
@@ -0,0 +1,38 @@
+; We can't actually put attributes on intrinsic declarations, only on call sites.
+;
+; RUN: llvm-reduce --test FileCheck --test-arg --check-prefixes=CHECK-ALL,CHECK-INTERESTINGNESS --test-arg %s --test-arg --input-file %s -o %t
+; RUN: cat %t | FileCheck --check-prefixes=CHECK-ALL,CHECK-FINAL %s
+
+define i32 @t(i32 %a) {
+; CHECK-ALL-LABEL: @t(
+
+; CHECK-INTERESTINGNESS: %r =
+; CHECK-INTERESTINGNESS-SAME: call
+; CHECK-INTERESTINGNESS-SAME: "arg0"
+; CHECK-INTERESTINGNESS-SAME: i32 @llvm.uadd.sat.i32(i32
+; CHECK-INTERESTINGNESS-SAME: "arg3"
+; CHECK-INTERESTINGNESS-SAME: %a
+; CHECK-INTERESTINGNESS-SAME: i32
+; CHECK-INTERESTINGNESS-SAME: %a
+; CHECK-INTERESTINGNESS-SAME: #1
+
+; CHECK-FINAL: %r = call "arg0" i32 @llvm.uadd.sat.i32(i32 "arg3" %a, i32 %a) #1
+; CHECK-ALL: ret i32 %r
+
+  %r = call "arg0" "arg1" i32 @llvm.uadd.sat.i32(i32 "arg2" "arg3" %a, i32 %a) "arg4" "arg5"
+  ret i32 %r
+}
+
+; CHECK-ALL: declare i32 @llvm.uadd.sat.i32(i32, i32) #0
+declare i32 @llvm.uadd.sat.i32(i32, i32) #0
+
+; CHECK-ALL: attributes #0 = { nounwind readnone speculatable willreturn }
+
+; CHECK-INTERESTINGNESS: attributes #1 = {
+; CHECK-INTERESTINGNESS-SAME: "arg4"
+
+; CHECK-FINAL: attributes #1 = { "arg4" }
+
+; CHECK-ALL-NOT: attributes #
+
+attributes #0 = { "arg6" "arg7" }
diff --git a/llvm/test/Reduce/remove-call-site-attributes.ll b/llvm/test/Reduce/remove-call-site-attributes.ll
new file mode 100644
index 00000000000000..e8f50355812abc
--- /dev/null
+++ b/llvm/test/Reduce/remove-call-site-attributes.ll
@@ -0,0 +1,38 @@
+; Test that llvm-reduce can remove uninteresting operand bundles from calls.
+;
+; RUN: llvm-reduce --test FileCheck --test-arg --check-prefixes=CHECK-ALL,CHECK-INTERESTINGNESS --test-arg %s --test-arg --input-file %s -o %t
+; RUN: cat %t | FileCheck --check-prefixes=CHECK-ALL,CHECK-FINAL %s
+
+; CHECK-ALL: declare i32 @f1(i32, i32)
+declare i32 @f1(i32, i32)
+
+; CHECK-FINAL-LABEL: define i32 @interesting(i32 %arg0, i32 %arg1) {
+; CHECK-FINAL-NEXT:  entry:
+; CHECK-FINAL-NEXT:    %r = call "attr0" i32 @f1(i32 "attr4" %arg0, i32 %arg1) #0
+; CHECK-FINAL-NEXT:    ret i32 %r
+; CHECK-FINAL-NEXT:  }
+define i32 @interesting(i32 %arg0, i32 %arg1) {
+entry:
+; CHECK-INTERESTINGNESS-LABEL: @interesting(
+
+; CHECK-INTERESTINGNESS: %r = call
+; CHECK-INTERESTINGNESS-SAME: "attr0"
+; CHECK-INTERESTINGNESS-SAME: i32 @f1(
+; CHECK-INTERESTINGNESS-SAME: i32
+; CHECK-INTERESTINGNESS-SAME: "attr4"
+; CHECK-INTERESTINGNESS-SAME: %arg0
+; CHECK-INTERESTINGNESS-SAME: i32
+; CHECK-INTERESTINGNESS-SAME: %arg1
+; CHECK-INTERESTINGNESS-SAME: #0
+; CHECK-INTERESTINGNESS: ret i32 %r
+
+  %r = call "attr0" "attr1" "attr2" i32 @f1(i32 "attr3" "attr4" "attr5" %arg0, i32 "attr6" "attr7" "attr8" %arg1) #0
+  ret i32 %r
+}
+
+; CHECK-INTERESTINGNESS: attributes #0 = {
+; CHECK-INTERESTINGNESS-SAME: "attr10"
+
+; CHECK-FINAL:  attributes #0 = { "attr10" }
+
+attributes #0 = { "attr9" "attr10" "attr11" }
diff --git a/llvm/test/Reduce/remove-function-attributes.ll b/llvm/test/Reduce/remove-function-attributes.ll
new file mode 100644
index 00000000000000..52bbda36f332ac
--- /dev/null
+++ b/llvm/test/Reduce/remove-function-attributes.ll
@@ -0,0 +1,23 @@
+; Test that llvm-reduce can remove uninteresting attributes.
+;
+; RUN: llvm-reduce --test FileCheck --test-arg --check-prefixes=CHECK-ALL,CHECK-INTERESTINGNESS --test-arg %s --test-arg --input-file %s -o %t
+; RUN: cat %t | FileCheck --check-prefixes=CHECK-ALL,CHECK-FINAL %s
+
+; CHECK-INTERESTINGNESS: declare
+; CHECK-INTERESTINGNESS-SAME: "attr0"
+; CHECK-INTERESTINGNESS-SAME: void @f0
+; CHECK-INTERESTINGNESS-SAME: i32
+; CHECK-INTERESTINGNESS-SAME: i32
+; CHECK-INTERESTINGNESS-SAME: "attr6"
+; CHECK-INTERESTINGNESS-SAME: #0
+
+; CHECK-FINAL: declare "attr0" void @f0(i32, i32 "attr6") #0
+
+declare "attr0" "attr1" "attr2" void @f0(i32 "attr3" "attr4" "attr5", i32 "attr6" "attr7" "attr8") #0
+
+; CHECK-INTERESTINGNESS: attributes #0 = {
+; CHECK-INTERESTINGNESS-SAME: "attr10"
+
+; CHECK-FINAL:  attributes #0 = { "attr10" }
+
+attributes #0 = { "attr9" "attr10" "attr11" }
diff --git a/llvm/test/Reduce/remove-global-variable-attributes.ll b/llvm/test/Reduce/remove-global-variable-attributes.ll
new file mode 100644
index 00000000000000..bec3afd960e903
--- /dev/null
+++ b/llvm/test/Reduce/remove-global-variable-attributes.ll
@@ -0,0 +1,27 @@
+; Test that llvm-reduce can remove uninteresting attributes.
+;
+; RUN: llvm-reduce --test FileCheck --test-arg --check-prefixes=CHECK-ALL,CHECK-INTERESTINGNESS --test-arg %s --test-arg --input-file %s -o %t
+; RUN: cat %t | FileCheck --check-prefixes=CHECK-ALL,CHECK-FINAL %s
+
+; CHECK-ALL: @gv0 = global i32 0 #0
+; CHECK-ALL-NEXT: @gv1 = global i32 0 #1
+; CHECK-ALL-NEXT: @gv2 = global i32 0
+@gv0 = global i32 0 #0
+@gv1 = global i32 0 #1
+@gv2 = global i32 0 #2
+
+; CHECK-INTERESTINGNESS: attributes #0 = {
+; CHECK-INTERESTINGNESS-SAME: "attr0"
+; CHECK-INTERESTINGNESS-SAME: "attr2"
+
+; CHECK-INTERESTINGNESS-NEXT: attributes #1 = {
+; CHECK-INTERESTINGNESS-SAME: "attr4"
+
+; CHECK-FINAL:  attributes #0 = { "attr0" "attr2" }
+; CHECK-FINAL-NEXT:  attributes #1 = { "attr4" }
+
+; CHECK-FINAL-NOT:  attributes #2
+
+attributes #0 = { "attr0" "attr1" "attr2"}
+attributes #1 = { "attr3" "attr4" "attr5"}
+attributes #2 = { "attr6" "attr7" "attr8"}
diff --git a/llvm/test/Transforms/Util/PredicateInfo/condprop.ll b/llvm/test/Transforms/Util/PredicateInfo/condprop.ll
index daf6bb8b40a8ae..756457ab7fa9a9 100644
--- a/llvm/test/Transforms/Util/PredicateInfo/condprop.ll
+++ b/llvm/test/Transforms/Util/PredicateInfo/condprop.ll
@@ -138,7 +138,7 @@ define void @test4(i1 %b, i32 %x) {
 ; CHECK-NEXT:    i32 2, label [[CASE0]]
 ; CHECK-NEXT:    i32 3, label [[CASE3]]
 ; CHECK-NEXT:    i32 4, label [[DEFAULT:%.*]]
-; CHECK-NEXT:    ] Edge: [label [[SW]],label %case1] }
+; CHECK-NEXT:    ] Edge: [label [[SW]],label %case1]
 ; CHECK-NEXT:    [[X_0:%.*]] = call i32 @llvm.ssa.copy.{{.+}}(i32 [[X:%.*]])
 ; CHECK-NEXT:    switch i32 [[X]], label [[DEFAULT]] [
 ; CHECK-NEXT:    i32 0, label [[CASE0]]
diff --git a/llvm/test/Transforms/Util/PredicateInfo/unnamed-types.ll b/llvm/test/Transforms/Util/PredicateInfo/unnamed-types.ll
index 21e702178fd72a..d1e0f358fc9fc1 100644
--- a/llvm/test/Transforms/Util/PredicateInfo/unnamed-types.ll
+++ b/llvm/test/Transforms/Util/PredicateInfo/unnamed-types.ll
@@ -7,12 +7,12 @@
 
 ; CHECK-LABEL: bb:
 ; CHECK: Has predicate info
-; CHECK: branch predicate info { TrueEdge: 1 Comparison:  %cmp1 = icmp ne %0* %arg, null Edge: [label %bb,label %bb1] }
+; CHECK: branch predicate info { TrueEdge: 1 Comparison:  %cmp1 = icmp ne %0* %arg, null Edge: [label %bb,label %bb1], RenamedOp: %arg }
 ; CHECK-NEXT:  %arg.0 = call %0* @llvm.ssa.copy.{{.+}}(%0* %arg)
 
 ; CHECK-LABEL: bb1:
 ; CHECK: Has predicate info
-; CHECK-NEXT: branch predicate info { TrueEdge: 0 Comparison:  %cmp2 = icmp ne %1* null, %tmp Edge: [label %bb1,label %bb3] }
+; CHECK-NEXT: branch predicate info { TrueEdge: 0 Comparison:  %cmp2 = icmp ne %1* null, %tmp Edge: [label %bb1,label %bb3], RenamedOp: %tmp }
 ; CHECK-NEXT: %tmp.0 = call %1* @llvm.ssa.copy.{{.+}}(%1* %tmp)
 
 define void @f0(%0* %arg, %1* %tmp) {
diff --git a/llvm/tools/llvm-reduce/CMakeLists.txt b/llvm/tools/llvm-reduce/CMakeLists.txt
index 24eedac613f544..01b9d0b4afe1ac 100644
--- a/llvm/tools/llvm-reduce/CMakeLists.txt
+++ b/llvm/tools/llvm-reduce/CMakeLists.txt
@@ -14,6 +14,7 @@ add_llvm_tool(llvm-reduce
   TestRunner.cpp
   deltas/Delta.cpp
   deltas/ReduceArguments.cpp
+  deltas/ReduceAttributes.cpp
   deltas/ReduceBasicBlocks.cpp
   deltas/ReduceFunctions.cpp
   deltas/ReduceGlobalVars.cpp
diff --git a/llvm/tools/llvm-reduce/DeltaManager.h b/llvm/tools/llvm-reduce/DeltaManager.h
index 5635352b43d8a1..b1a4ee0df4dbe6 100644
--- a/llvm/tools/llvm-reduce/DeltaManager.h
+++ b/llvm/tools/llvm-reduce/DeltaManager.h
@@ -14,6 +14,7 @@
 #include "TestRunner.h"
 #include "deltas/Delta.h"
 #include "deltas/ReduceArguments.h"
+#include "deltas/ReduceAttributes.h"
 #include "deltas/ReduceBasicBlocks.h"
 #include "deltas/ReduceFunctions.h"
 #include "deltas/ReduceGlobalVars.h"
@@ -32,6 +33,7 @@ inline void runDeltaPasses(TestRunner &Tester) {
   reduceArgumentsDeltaPass(Tester);
   reduceInstructionsDeltaPass(Tester);
   reduceOperandBundesDeltaPass(Tester);
+  reduceAttributesDeltaPass(Tester);
   // TODO: Implement the remaining Delta Passes
 }
 
diff --git a/llvm/tools/llvm-reduce/deltas/ReduceAttributes.cpp b/llvm/tools/llvm-reduce/deltas/ReduceAttributes.cpp
new file mode 100644
index 00000000000000..cbaf5d5efd346a
--- /dev/null
+++ b/llvm/tools/llvm-reduce/deltas/ReduceAttributes.cpp
@@ -0,0 +1,200 @@
+//===- ReduceAttributes.cpp - Specialized Delta Pass -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a function which calls the Generic Delta pass in order
+// to reduce uninteresting attributes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ReduceAttributes.h"
+#include "Delta.h"
+#include "TestRunner.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Sequence.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <utility>
+#include <vector>
+
+namespace llvm {
+class LLVMContext;
+} // namespace llvm
+
+using namespace llvm;
+
+namespace {
+
+using AttrPtrVecTy = std::vector<const Attribute *>;
+using AttrPtrIdxVecVecTy = std::pair<unsigned, AttrPtrVecTy>;
+using AttrPtrVecVecTy = SmallVector<AttrPtrIdxVecVecTy, 3>;
+
+/// Given ChunksToKeep, produce a map of global variables/functions/calls
+/// and indexes of attributes to be preserved for each of them.
+class AttributeRemapper : public InstVisitor<AttributeRemapper> {
+  Oracle O;
+
+public:
+  DenseMap<GlobalVariable *, AttrPtrVecTy> GlobalVariablesToRefine;
+  DenseMap<Function *, AttrPtrVecVecTy> FunctionsToRefine;
+  DenseMap<CallBase *, AttrPtrVecVecTy> CallsToRefine;
+
+  explicit AttributeRemapper(ArrayRef<Chunk> ChunksToKeep) : O(ChunksToKeep) {}
+
+  void visitModule(Module &M) {
+    for (GlobalVariable &GV : M.getGlobalList())
+      visitGlobalVariable(GV);
+  }
+
+  void visitGlobalVariable(GlobalVariable &GV) {
+    // Global variables only have one attribute set.
+    const AttributeSet &AS = GV.getAttributes();
+    if (AS.hasAttributes())
+      visitAttributeSet(AS, GlobalVariablesToRefine[&GV]);
+  }
+
+  void visitFunction(Function &F) {
+    if (F.getIntrinsicID() != Intrinsic::not_intrinsic)
+      return; // We can neither add nor remove attributes from intrinsics.
+    visitAttributeList(F.getAttributes(), FunctionsToRefine[&F]);
+  }
+
+  void visitCallBase(CallBase &I) {
+    visitAttributeList(I.getAttributes(), CallsToRefine[&I]);
+  }
+
+  void visitAttributeList(const AttributeList &AL,
+                          AttrPtrVecVecTy &AttributeSetsToPreserve) {
+    assert(AttributeSetsToPreserve.empty() && "Should not be sharing vectors.");
+    AttributeSetsToPreserve.reserve(AL.getNumAttrSets());
+    for (unsigned SetIdx : seq(AL.index_begin(), AL.index_end())) {
+      AttrPtrIdxVecVecTy AttributesToPreserve;
+      AttributesToPreserve.first = SetIdx;
+      visitAttributeSet(AL.getAttributes(AttributesToPreserve.first),
+                        AttributesToPreserve.second);
+      if (!AttributesToPreserve.second.empty())
+        AttributeSetsToPreserve.emplace_back(std::move(AttributesToPreserve));
+    }
+  }
+
+  void visitAttributeSet(const AttributeSet &AS,
+                         AttrPtrVecTy &AttrsToPreserve) {
+    assert(AttrsToPreserve.empty() && "Should not be sharing vectors.");
+    AttrsToPreserve.reserve(AS.getNumAttributes());
+    for (const Attribute &A : AS)
+      if (O.shouldKeep())
+        AttrsToPreserve.emplace_back(&A);
+  }
+};
+
+struct AttributeCounter : public InstVisitor<AttributeCounter> {
+  /// How many features (in this case, attributes) did we count, total?
+  int AttributeCount = 0;
+
+  void visitModule(Module &M) {
+    for (GlobalVariable &GV : M.getGlobalList())
+      visitGlobalVariable(GV);
+  }
+
+  void visitGlobalVariable(GlobalVariable &GV) {
+    // Global variables only have one attribute set.
+    visitAttributeSet(GV.getAttributes());
+  }
+
+  void visitFunction(Function &F) {
+    if (F.getIntrinsicID() != Intrinsic::not_intrinsic)
+      return; // We can neither add nor remove attributes from intrinsics.
+    visitAttributeList(F.getAttributes());
+  }
+
+  void visitCallBase(CallBase &I) { visitAttributeList(I.getAttributes()); }
+
+  void visitAttributeList(const AttributeList &AL) {
+    for (const AttributeSet &AS : AL)
+      visitAttributeSet(AS);
+  }
+
+  void visitAttributeSet(const AttributeSet &AS) {
+    AttributeCount += AS.getNumAttributes();
+  }
+};
+
+} // namespace
+
+AttributeSet
+convertAttributeRefToAttributeSet(LLVMContext &C,
+                                  ArrayRef<const Attribute *> Attributes) {
+  AttrBuilder B;
+  for (const Attribute *A : Attributes)
+    B.addAttribute(*A);
+  return AttributeSet::get(C, B);
+}
+
+AttributeList convertAttributeRefVecToAttributeList(
+    LLVMContext &C, ArrayRef<AttrPtrIdxVecVecTy> AttributeSets) {
+  std::vector<std::pair<unsigned, AttributeSet>> SetVec;
+  SetVec.reserve(AttributeSets.size());
+
+  transform(AttributeSets, std::back_inserter(SetVec),
+            [&C](const AttrPtrIdxVecVecTy &V) {
+              return std::make_pair(
+                  V.first, convertAttributeRefToAttributeSet(C, V.second));
+            });
+
+  sort(SetVec, [](const std::pair<unsigned, AttributeSet> &LHS,
+                  const std::pair<unsigned, AttributeSet> &RHS) {
+    return LHS.first < RHS.first; // All values are unique.
+  });
+
+  return AttributeList::get(C, SetVec);
+}
+
+/// Removes out-of-chunk attributes from module.
+static void extractAttributesFromModule(std::vector<Chunk> ChunksToKeep,
+                                        Module *Program) {
+  AttributeRemapper R(ChunksToKeep);
+  R.visit(Program);
+
+  LLVMContext &C = Program->getContext();
+  for (const auto &I : R.GlobalVariablesToRefine)
+    I.first->setAttributes(convertAttributeRefToAttributeSet(C, I.second));
+  for (const auto &I : R.FunctionsToRefine)
+    I.first->setAttributes(convertAttributeRefVecToAttributeList(C, I.second));
+  for (const auto &I : R.CallsToRefine)
+    I.first->setAttributes(convertAttributeRefVecToAttributeList(C, I.second));
+}
+
+/// Counts the amount of attributes.
+static int countAttributes(Module *Program) {
+  AttributeCounter C;
+
+  // TODO: Silence index with --quiet flag
+  outs() << "----------------------------\n";
+  C.visit(Program);
+  outs() << "Number of attributes: " << C.AttributeCount << "\n";
+
+  return C.AttributeCount;
+}
+
+void llvm::reduceAttributesDeltaPass(TestRunner &Test) {
+  outs() << "*** Reducing Attributes...\n";
+  int AttributeCount = countAttributes(Test.getProgram());
+  runDeltaPass(Test, AttributeCount, extractAttributesFromModule);
+}
diff --git a/llvm/tools/llvm-reduce/deltas/ReduceAttributes.h b/llvm/tools/llvm-reduce/deltas/ReduceAttributes.h
new file mode 100644
index 00000000000000..f8deb045560ff5
--- /dev/null
+++ b/llvm/tools/llvm-reduce/deltas/ReduceAttributes.h
@@ -0,0 +1,20 @@
+//===- ReduceAttributes.h - Specialized Delta Pass ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a function which calls the Generic Delta pass in order
+// to reduce uninteresting attributes.
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+class TestRunner;
+
+void reduceAttributesDeltaPass(TestRunner &Test);
+
+} // namespace llvm
diff --git a/llvm/tools/llvm-reduce/deltas/ReduceOperandBundles.cpp b/llvm/tools/llvm-reduce/deltas/ReduceOperandBundles.cpp
index 3f1cb374081311..77cb73837c8270 100644
--- a/llvm/tools/llvm-reduce/deltas/ReduceOperandBundles.cpp
+++ b/llvm/tools/llvm-reduce/deltas/ReduceOperandBundles.cpp
@@ -56,10 +56,9 @@ class OperandBundleRemapper : public InstVisitor<OperandBundleRemapper> {
     OperandBundlesToKeepIndexes.reserve(Call.getNumOperandBundles());
 
     // Enumerate every operand bundle on this call.
-    for_each(seq(0U, Call.getNumOperandBundles()), [&](unsigned BundleIndex) {
+    for (unsigned BundleIndex : seq(0U, Call.getNumOperandBundles()))
       if (O.shouldKeep()) // Should we keep this one?
         OperandBundlesToKeepIndexes.emplace_back(BundleIndex);
-    });
   }
 };
 
@@ -67,8 +66,6 @@ struct OperandBundleCounter : public InstVisitor<OperandBundleCounter> {
   /// How many features (in this case, operand bundles) did we count, total?
   int OperandBundeCount = 0;
 
-  OperandBundleCounter() {}
-
   /// So far only CallBase sub-classes can have operand bundles.
   void visitCallBase(CallBase &Call) {
     // Just accumulate the total number of operand bundles.
@@ -104,9 +101,8 @@ static void extractOperandBundesFromModule(std::vector<Chunk> ChunksToKeep,
   OperandBundleRemapper R(ChunksToKeep);
   R.visit(Program);
 
-  for_each(R.CallsToRefine, [](const auto &P) {
-    return maybeRewriteCallWithDifferentBundles(P.first, P.second);
-  });
+  for (const auto &I : R.CallsToRefine)
+    maybeRewriteCallWithDifferentBundles(I.first, I.second);
 }
 
 /// Counts the amount of operand bundles.
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-reduce/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-reduce/BUILD.gn
index efb8e40850c335..a8648d73ca0d14 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-reduce/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-reduce/BUILD.gn
@@ -12,6 +12,7 @@ executable("llvm-reduce") {
     "TestRunner.cpp",
     "deltas/Delta.cpp",
     "deltas/ReduceArguments.cpp",
+    "deltas/ReduceAttributes.cpp",
     "deltas/ReduceBasicBlocks.cpp",
     "deltas/ReduceFunctions.cpp",
     "deltas/ReduceGlobalVars.cpp",