diff --git a/clang/cmake/caches/Fuchsia-stage2.cmake b/clang/cmake/caches/Fuchsia-stage2.cmake index 9ecf8f300e75e7..4d9b8526d0c4d1 100644 --- a/clang/cmake/caches/Fuchsia-stage2.cmake +++ b/clang/cmake/caches/Fuchsia-stage2.cmake @@ -5,7 +5,6 @@ set(LLVM_TARGETS_TO_BUILD X86;ARM;AArch64;RISCV CACHE STRING "") set(PACKAGE_VENDOR Fuchsia CACHE STRING "") set(LLVM_ENABLE_PROJECTS "clang;clang-tools-extra;lld;llvm" CACHE STRING "") -set(LLVM_ENABLE_RUNTIMES "compiler-rt;libcxx;libcxxabi;libunwind" CACHE STRING "") set(LLVM_ENABLE_BACKTRACES OFF CACHE BOOL "") if(NOT APPLE) @@ -22,6 +21,10 @@ set(LLVM_INCLUDE_GO_TESTS OFF CACHE BOOL "") set(LLVM_USE_RELATIVE_PATHS_IN_FILES ON CACHE BOOL "") set(LLVM_ENABLE_Z3_SOLVER OFF CACHE BOOL "") +if(MSVC) + set(LLVM_USE_CRT_RELEASE "MT" CACHE STRING "") +endif() + set(CLANG_DEFAULT_CXX_STDLIB libc++ CACHE STRING "") if(NOT APPLE) set(CLANG_DEFAULT_LINKER lld CACHE STRING "") @@ -39,6 +42,8 @@ set(ENABLE_X86_RELAX_RELOCATIONS ON CACHE BOOL "") set(CMAKE_BUILD_TYPE Release CACHE STRING "") if (APPLE) set(MACOSX_DEPLOYMENT_TARGET 10.7 CACHE STRING "") +elseif(MSVC) + set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded" CACHE STRING "") endif() if(APPLE) @@ -65,6 +70,26 @@ if(APPLE) set(DARWIN_iossim_ARCHS i386;x86_64 CACHE STRING "") set(DARWIN_osx_ARCHS x86_64 CACHE STRING "") set(SANITIZER_MIN_OSX_VERSION 10.7 CACHE STRING "") + set(LLVM_ENABLE_RUNTIMES "compiler-rt;libcxx;libcxxabi;libunwind" CACHE STRING "") +endif() + +if(WIN32) + set(target "x86_64-pc-windows-msvc") + + list(APPEND BUILTIN_TARGETS "${target}") + set(BUILTINS_${target}_CMAKE_SYSTEM_NAME Windows CACHE STRING "") + set(BUILTINS_${target}_CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "") + + list(APPEND RUNTIME_TARGETS "${target}") + set(RUNTIMES_${target}_CMAKE_SYSTEM_NAME Windows CACHE STRING "") + set(RUNTIMES_${target}_CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "") + set(RUNTIMES_${target}_LIBCXX_ABI_VERSION 2 CACHE STRING "") + set(RUNTIMES_${target}_LIBCXX_HAS_WIN32_THREAD_API ON CACHE BOOL "") + set(RUNTIMES_${target}_LIBCXX_ENABLE_EXPERIMENTAL_LIBRARY OFF CACHE BOOL "") + set(RUNTIMES_${target}_LIBCXX_ENABLE_FILESYSTEM OFF CACHE BOOL "") + set(RUNTIMES_${target}_LIBCXX_ENABLE_ABI_LINKER_SCRIPT OFF CACHE BOOL "") + set(RUNTIMES_${target}_LIBCXX_ENABLE_SHARED OFF CACHE BOOL "") + set(RUNTIMES_${target}_LLVM_ENABLE_RUNTIMES "compiler-rt;libcxx" CACHE STRING "") endif() foreach(target aarch64-unknown-linux-gnu;armv7-unknown-linux-gnueabihf;i386-unknown-linux-gnu;x86_64-unknown-linux-gnu) @@ -73,6 +98,9 @@ foreach(target aarch64-unknown-linux-gnu;armv7-unknown-linux-gnueabihf;i386-unkn list(APPEND BUILTIN_TARGETS "${target}") set(BUILTINS_${target}_CMAKE_SYSTEM_NAME Linux CACHE STRING "") set(BUILTINS_${target}_CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "") + set(BUILTINS_${target}_CMAKE_C_FLAGS "--target=${target}" CACHE STRING "") + set(BUILTINS_${target}_CMAKE_CXX_FLAGS "--target=${target}" CACHE STRING "") + set(BUILTINS_${target}_CMAKE_ASM_FLAGS "--target=${target}" CACHE STRING "") set(BUILTINS_${target}_CMAKE_SYSROOT ${LINUX_${target}_SYSROOT} CACHE STRING "") set(BUILTINS_${target}_CMAKE_SHARED_LINKER_FLAGS "-fuse-ld=lld" CACHE STRING "") set(BUILTINS_${target}_CMAKE_MODULE_LINKER_FLAGS "-fuse-ld=lld" CACHE STRING "") @@ -82,6 +110,9 @@ foreach(target aarch64-unknown-linux-gnu;armv7-unknown-linux-gnueabihf;i386-unkn list(APPEND RUNTIME_TARGETS "${target}") set(RUNTIMES_${target}_CMAKE_SYSTEM_NAME Linux CACHE STRING "") set(RUNTIMES_${target}_CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "") + set(RUNTIMES_${target}_CMAKE_C_FLAGS "--target=${target}" CACHE STRING "") + set(RUNTIMES_${target}_CMAKE_CXX_FLAGS "--target=${target}" CACHE STRING "") + set(RUNTIMES_${target}_CMAKE_ASM_FLAGS "--target=${target}" CACHE STRING "") set(RUNTIMES_${target}_CMAKE_SYSROOT ${LINUX_${target}_SYSROOT} CACHE STRING "") set(RUNTIMES_${target}_CMAKE_SHARED_LINKER_FLAGS "-fuse-ld=lld" CACHE STRING "") set(RUNTIMES_${target}_CMAKE_MODULE_LINKER_FLAGS "-fuse-ld=lld" CACHE STRING "") @@ -100,9 +131,9 @@ foreach(target aarch64-unknown-linux-gnu;armv7-unknown-linux-gnueabihf;i386-unkn set(RUNTIMES_${target}_LIBCXX_ENABLE_STATIC_ABI_LIBRARY ON CACHE BOOL "") set(RUNTIMES_${target}_LIBCXX_ABI_VERSION 2 CACHE STRING "") set(RUNTIMES_${target}_LLVM_ENABLE_ASSERTIONS ON CACHE BOOL "") - set(RUNTIMES_${target}_LLVM_ENABLE_RUNTIMES "compiler-rt;libcxx;libcxxabi;libunwind" CACHE STRING "") set(RUNTIMES_${target}_SANITIZER_CXX_ABI "libc++" CACHE STRING "") set(RUNTIMES_${target}_SANITIZER_CXX_ABI_INTREE ON CACHE BOOL "") + set(RUNTIMES_${target}_LLVM_ENABLE_RUNTIMES "compiler-rt;libcxx;libcxxabi;libunwind" CACHE STRING "") # Use .build-id link. list(APPEND RUNTIME_BUILD_ID_LINK "${target}") @@ -115,7 +146,7 @@ if(FUCHSIA_SDK) set(FUCHSIA_x86_64_NAME x64) set(FUCHSIA_riscv64_NAME riscv64) foreach(target i386;x86_64;aarch64;riscv64) - set(FUCHSIA_${target}_COMPILER_FLAGS "-I${FUCHSIA_SDK}/pkg/fdio/include") + set(FUCHSIA_${target}_COMPILER_FLAGS "--target=${target}-unknown-fuchsia -I${FUCHSIA_SDK}/pkg/fdio/include") set(FUCHSIA_${target}_LINKER_FLAGS "-L${FUCHSIA_SDK}/arch/${FUCHSIA_${target}_NAME}/lib") set(FUCHSIA_${target}_SYSROOT "${FUCHSIA_SDK}/arch/${FUCHSIA_${target}_NAME}/sysroot") endforeach() diff --git a/clang/cmake/caches/Fuchsia.cmake b/clang/cmake/caches/Fuchsia.cmake index bb22a00fa5c7f9..8688b71ecc7534 100644 --- a/clang/cmake/caches/Fuchsia.cmake +++ b/clang/cmake/caches/Fuchsia.cmake @@ -5,7 +5,6 @@ set(LLVM_TARGETS_TO_BUILD X86;ARM;AArch64;RISCV CACHE STRING "") set(PACKAGE_VENDOR Fuchsia CACHE STRING "") set(LLVM_ENABLE_PROJECTS "clang;clang-tools-extra;lld;llvm" CACHE STRING "") -set(LLVM_ENABLE_RUNTIMES "compiler-rt;libcxx;libcxxabi;libunwind" CACHE STRING "") set(LLVM_ENABLE_BACKTRACES OFF CACHE BOOL "") set(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR ON CACHE BOOL "") @@ -16,6 +15,10 @@ set(LLVM_INCLUDE_DOCS OFF CACHE BOOL "") set(LLVM_INCLUDE_EXAMPLES OFF CACHE BOOL "") set(LLVM_INCLUDE_GO_TESTS OFF CACHE BOOL "") +if(MSVC) + set(LLVM_USE_CRT_RELEASE "MT" CACHE STRING "") +endif() + set(CLANG_DEFAULT_CXX_STDLIB libc++ CACHE STRING "") if(NOT APPLE) set(CLANG_DEFAULT_LINKER lld CACHE STRING "") @@ -32,8 +35,10 @@ set(ENABLE_X86_RELAX_RELOCATIONS ON CACHE BOOL "") set(LLVM_ENABLE_ASSERTIONS ON CACHE BOOL "") set(CMAKE_BUILD_TYPE Release CACHE STRING "") -if (APPLE) +if(APPLE) set(MACOSX_DEPLOYMENT_TARGET 10.7 CACHE STRING "") +elseif(MSVC) + set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded" CACHE STRING "") endif() if(APPLE) @@ -52,8 +57,19 @@ set(LIBCXXABI_USE_COMPILER_RT ON CACHE BOOL "") set(LIBCXXABI_USE_LLVM_UNWINDER ON CACHE BOOL "") set(LIBCXX_ABI_VERSION 2 CACHE STRING "") set(LIBCXX_ENABLE_SHARED OFF CACHE BOOL "") -set(LIBCXX_ENABLE_STATIC_ABI_LIBRARY ON CACHE BOOL "") -set(LIBCXX_USE_COMPILER_RT ON CACHE BOOL "") +if(WIN32) + set(LIBCXX_HAS_WIN32_THREAD_API ON CACHE BOOL "") + set(LIBCXX_ENABLE_EXPERIMENTAL_LIBRARY OFF CACHE BOOL "") + set(LIBCXX_ENABLE_FILESYSTEM OFF CACHE BOOL "") + set(LIBCXX_ENABLE_ABI_LINKER_SCRIPT OFF CACHE BOOL "") + set(LIBCXX_ENABLE_STATIC_ABI_LIBRARY OFF CACHE BOOL "") + set(BUILTINS_CMAKE_ARGS -DCMAKE_SYSTEM_NAME=Windows CACHE STRING "") + set(RUNTIMES_CMAKE_ARGS -DCMAKE_SYSTEM_NAME=Windows CACHE STRING "") + set(LLVM_ENABLE_RUNTIMES "compiler-rt;libcxx" CACHE STRING "") +else() + set(LIBCXX_ENABLE_STATIC_ABI_LIBRARY ON CACHE BOOL "") + set(LLVM_ENABLE_RUNTIMES "compiler-rt;libcxx;libcxxabi;libunwind" CACHE STRING "") +endif() if(BOOTSTRAP_CMAKE_SYSTEM_NAME) set(target "${BOOTSTRAP_CMAKE_CXX_COMPILER_TARGET}") diff --git a/lld/unittests/MachOTests/MachONormalizedFileBinaryReaderTests.cpp b/lld/unittests/MachOTests/MachONormalizedFileBinaryReaderTests.cpp index 07c1d4242e03ba..aad5f8afcfdc39 100644 --- a/lld/unittests/MachOTests/MachONormalizedFileBinaryReaderTests.cpp +++ b/lld/unittests/MachOTests/MachONormalizedFileBinaryReaderTests.cpp @@ -75,7 +75,7 @@ TEST(BinaryReaderTest, empty_obj_x86_64) { fromBinary(fileBytes, sizeof(fileBytes), "x86_64"); EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86_64); EXPECT_EQ((int)(f->fileType), MH_OBJECT); - EXPECT_EQ(f->flags, MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f->flags), MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_TRUE(f->localSymbols.empty()); EXPECT_TRUE(f->globalSymbols.empty()); EXPECT_TRUE(f->undefinedSymbols.empty()); @@ -106,7 +106,7 @@ TEST(BinaryReaderTest, empty_obj_x86) { fromBinary(fileBytes, sizeof(fileBytes), "i386"); EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86); EXPECT_EQ((int)(f->fileType), MH_OBJECT); - EXPECT_EQ(f->flags, MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f->flags), MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_TRUE(f->localSymbols.empty()); EXPECT_TRUE(f->globalSymbols.empty()); EXPECT_TRUE(f->undefinedSymbols.empty()); @@ -137,7 +137,7 @@ TEST(BinaryReaderTest, empty_obj_ppc) { fromBinary(fileBytes, sizeof(fileBytes), "ppc"); EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_ppc); EXPECT_EQ((int)(f->fileType), MH_OBJECT); - EXPECT_EQ(f->flags, MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f->flags), MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_TRUE(f->localSymbols.empty()); EXPECT_TRUE(f->globalSymbols.empty()); EXPECT_TRUE(f->undefinedSymbols.empty()); @@ -168,7 +168,7 @@ TEST(BinaryReaderTest, empty_obj_armv7) { fromBinary(fileBytes, sizeof(fileBytes), "armv7"); EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv7); EXPECT_EQ((int)(f->fileType), MH_OBJECT); - EXPECT_EQ(f->flags, MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f->flags), MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_TRUE(f->localSymbols.empty()); EXPECT_TRUE(f->globalSymbols.empty()); EXPECT_TRUE(f->undefinedSymbols.empty()); @@ -182,7 +182,7 @@ TEST(BinaryReaderTest, empty_obj_x86_64_arm7) { fromBinary(fileBytes, sizeof(fileBytes), "x86_64"); EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86_64); EXPECT_EQ((int)(f->fileType), MH_OBJECT); - EXPECT_EQ(f->flags, MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f->flags), MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_TRUE(f->localSymbols.empty()); EXPECT_TRUE(f->globalSymbols.empty()); EXPECT_TRUE(f->undefinedSymbols.empty()); @@ -191,7 +191,7 @@ TEST(BinaryReaderTest, empty_obj_x86_64_arm7) { fromBinary(fileBytes, sizeof(fileBytes), "armv7"); EXPECT_EQ(f2->arch, lld::MachOLinkingContext::arch_armv7); EXPECT_EQ((int)(f2->fileType), MH_OBJECT); - EXPECT_EQ(f2->flags, MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f2->flags), MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_TRUE(f2->localSymbols.empty()); EXPECT_TRUE(f2->globalSymbols.empty()); EXPECT_TRUE(f2->undefinedSymbols.empty()); @@ -268,7 +268,7 @@ TEST(BinaryReaderTest, hello_obj_x86_64) { EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86_64); EXPECT_EQ((int)(f->fileType), MH_OBJECT); - EXPECT_EQ(f->flags, MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f->flags), MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_EQ(f->sections.size(), 2UL); const Section& text = f->sections[0]; EXPECT_TRUE(text.segmentName.equals("__TEXT")); @@ -393,7 +393,7 @@ TEST(BinaryReaderTest, hello_obj_x86) { EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86); EXPECT_EQ((int)(f->fileType), MH_OBJECT); - EXPECT_EQ(f->flags, MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f->flags), MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_EQ(f->sections.size(), 2UL); const Section& text = f->sections[0]; EXPECT_TRUE(text.segmentName.equals("__TEXT")); @@ -525,7 +525,7 @@ TEST(BinaryReaderTest, hello_obj_armv7) { EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv7); EXPECT_EQ((int)(f->fileType), MH_OBJECT); - EXPECT_EQ(f->flags, MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f->flags), MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_EQ(f->sections.size(), 2UL); const Section& text = f->sections[0]; EXPECT_TRUE(text.segmentName.equals("__TEXT")); @@ -669,7 +669,7 @@ TEST(BinaryReaderTest, hello_obj_ppc) { EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_ppc); EXPECT_EQ((int)(f->fileType), MH_OBJECT); - EXPECT_EQ(f->flags, MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f->flags), MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_EQ(f->sections.size(), 2UL); const Section& text = f->sections[0]; EXPECT_TRUE(text.segmentName.equals("__TEXT")); diff --git a/lld/unittests/MachOTests/MachONormalizedFileYAMLTests.cpp b/lld/unittests/MachOTests/MachONormalizedFileYAMLTests.cpp index c1445ea7eacd79..6ceb197b4b84a7 100644 --- a/lld/unittests/MachOTests/MachONormalizedFileYAMLTests.cpp +++ b/lld/unittests/MachOTests/MachONormalizedFileYAMLTests.cpp @@ -50,7 +50,7 @@ TEST(ObjectFileYAML, empty_ppc) { "...\n"); EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_ppc); EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); - EXPECT_EQ(f->flags, llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_TRUE(f->sections.empty()); EXPECT_TRUE(f->localSymbols.empty()); EXPECT_TRUE(f->globalSymbols.empty()); @@ -66,7 +66,7 @@ TEST(ObjectFileYAML, empty_x86_64) { "...\n"); EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86_64); EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); - EXPECT_EQ(f->flags, llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_TRUE(f->sections.empty()); EXPECT_TRUE(f->localSymbols.empty()); EXPECT_TRUE(f->globalSymbols.empty()); @@ -82,7 +82,7 @@ TEST(ObjectFileYAML, empty_x86) { "...\n"); EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86); EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); - EXPECT_EQ(f->flags, llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_TRUE(f->sections.empty()); EXPECT_TRUE(f->localSymbols.empty()); EXPECT_TRUE(f->globalSymbols.empty()); @@ -98,7 +98,7 @@ TEST(ObjectFileYAML, empty_armv6) { "...\n"); EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv6); EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); - EXPECT_EQ(f->flags, llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_TRUE(f->sections.empty()); EXPECT_TRUE(f->localSymbols.empty()); EXPECT_TRUE(f->globalSymbols.empty()); @@ -114,7 +114,7 @@ TEST(ObjectFileYAML, empty_armv7) { "...\n"); EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv7); EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); - EXPECT_EQ(f->flags, llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_TRUE(f->sections.empty()); EXPECT_TRUE(f->localSymbols.empty()); EXPECT_TRUE(f->globalSymbols.empty()); @@ -130,7 +130,7 @@ TEST(ObjectFileYAML, empty_armv7s) { "...\n"); EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv7s); EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); - EXPECT_EQ(f->flags, llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_TRUE(f->sections.empty()); EXPECT_TRUE(f->localSymbols.empty()); EXPECT_TRUE(f->globalSymbols.empty()); @@ -151,7 +151,7 @@ TEST(ObjectFileYAML, roundTrip) { std::unique_ptr f2 = fromYAML(intermediate); EXPECT_EQ(f2->arch, lld::MachOLinkingContext::arch_x86_64); EXPECT_EQ((int)(f2->fileType), llvm::MachO::MH_OBJECT); - EXPECT_EQ(f2->flags, llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f2->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_TRUE(f2->sections.empty()); EXPECT_TRUE(f2->localSymbols.empty()); EXPECT_TRUE(f2->globalSymbols.empty()); @@ -275,7 +275,7 @@ TEST(ObjectFileYAML, hello_x86_64) { "...\n"); EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86_64); EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); - EXPECT_EQ(f->flags, llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_EQ(f->sections.size(), 2UL); const Section& sect1 = f->sections[0]; @@ -405,7 +405,7 @@ TEST(ObjectFileYAML, hello_x86) { "...\n"); EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86); EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); - EXPECT_EQ(f->flags, llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_EQ(f->sections.size(), 2UL); const Section& sect1 = f->sections[0]; @@ -533,7 +533,7 @@ TEST(ObjectFileYAML, hello_armv6) { "...\n"); EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv6); EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); - EXPECT_EQ(f->flags, llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_EQ(f->sections.size(), 2UL); const Section& sect1 = f->sections[0]; @@ -673,7 +673,7 @@ TEST(ObjectFileYAML, hello_armv7) { "...\n"); EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv7); EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT); - EXPECT_EQ(f->flags, llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); + EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); EXPECT_EQ(f->sections.size(), 2UL); const Section& sect1 = f->sections[0]; diff --git a/llvm/docs/CodingStandards.rst b/llvm/docs/CodingStandards.rst index 861ab05420fb68..99fb6af02a282a 100644 --- a/llvm/docs/CodingStandards.rst +++ b/llvm/docs/CodingStandards.rst @@ -1302,6 +1302,9 @@ loops wherever possible for all newly added code. For example: for (Instruction &I : *BB) ... use I ... +Usage of ``std::for_each()``/``llvm::for_each()`` functions is discouraged, +unless the the callable object already exists. + Don't evaluate ``end()`` every time through a loop ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index dd869f98b5bcc0..effd5d6ab7d815 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -707,6 +707,33 @@ namespace { SmallVectorImpl &StoreNodes, unsigned NumStores, SDNode *RootNode); + /// This is a helper function for mergeConsecutiveStores. Given a list of + /// store candidates, find the first N that are consecutive in memory. + /// Returns 0 if there are not at least 2 consecutive stores to try merging. + unsigned getConsecutiveStores(SmallVectorImpl &StoreNodes, + int64_t ElementSizeBytes) const; + + /// This is a helper function for mergeConsecutiveStores. It is used for + /// store chains that are composed entirely of constant values. + bool tryStoreMergeOfConstants(SmallVectorImpl &StoreNodes, + unsigned NumConsecutiveStores, + EVT MemVT, SDNode *Root, bool AllowVectors); + + /// This is a helper function for mergeConsecutiveStores. It is used for + /// store chains that are composed entirely of extracted vector elements. + /// When extracting multiple vector elements, try to store them in one + /// vector store rather than a sequence of scalar stores. + bool tryStoreMergeOfExtracts(SmallVectorImpl &StoreNodes, + unsigned NumConsecutiveStores, EVT MemVT, + SDNode *Root); + + /// This is a helper function for mergeConsecutiveStores. It is used for + /// store chains that are composed entirely of loaded values. + bool tryStoreMergeOfLoads(SmallVectorImpl &StoreNodes, + unsigned NumConsecutiveStores, EVT MemVT, + SDNode *Root, bool AllowVectors, + bool IsNonTemporalStore, bool IsNonTemporalLoad); + /// Merge consecutive store operations into a wide store. /// This optimization uses wide integers or vectors when possible. /// \return true if stores were merged. @@ -16237,76 +16264,22 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies( return true; } -bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) { - if (OptLevel == CodeGenOpt::None || !EnableStoreMerging) - return false; - - // TODO: Extend this function to merge stores of scalable vectors. - // (i.e. two stores can be merged to one - // store since we know is exactly twice as large as - // ). Until then, bail out for scalable vectors. - EVT MemVT = St->getMemoryVT(); - if (MemVT.isScalableVector()) - return false; - if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits) - return false; - - // This function cannot currently deal with non-byte-sized memory sizes. - int64_t ElementSizeBytes = MemVT.getStoreSize(); - if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits()) - return false; - - // Do not bother looking at stored values that are not constants, loads, or - // extracted vector elements. - SDValue StoredVal = peekThroughBitcasts(St->getValue()); - StoreSource StoreSrc = getStoreSource(StoredVal); - if (StoreSrc == StoreSource::Unknown) - return false; - - SmallVector StoreNodes; - SDNode *RootNode; - // Find potential store merge candidates by searching through chain sub-DAG - getStoreMergeCandidates(St, StoreNodes, RootNode); - - // Check if there is anything to merge. - if (StoreNodes.size() < 2) - return false; - - // Sort the memory operands according to their distance from the - // base pointer. - llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) { - return LHS.OffsetFromBase < RHS.OffsetFromBase; - }); - - unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; - bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute( - Attribute::NoImplicitFloat); - - bool IsNonTemporalStore = St->isNonTemporal(); - bool IsNonTemporalLoad = StoreSrc == StoreSource::Load && - cast(StoredVal)->isNonTemporal(); - LLVMContext &Context = *DAG.getContext(); - const DataLayout &DL = DAG.getDataLayout(); - - // Store Merge attempts to merge the lowest stores. This generally - // works out as if successful, as the remaining stores are checked - // after the first collection of stores is merged. However, in the - // case that a non-mergeable store is found first, e.g., {p[-2], - // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent - // mergeable cases. To prevent this, we prune such stores from the - // front of StoreNodes here. - bool MadeChange = false; - while (StoreNodes.size() > 1) { +unsigned +DAGCombiner::getConsecutiveStores(SmallVectorImpl &StoreNodes, + int64_t ElementSizeBytes) const { + while (true) { + // Find a store past the width of the first store. size_t StartIdx = 0; while ((StartIdx + 1 < StoreNodes.size()) && StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes != - StoreNodes[StartIdx + 1].OffsetFromBase) + StoreNodes[StartIdx + 1].OffsetFromBase) ++StartIdx; // Bail if we don't have enough candidates to merge. if (StartIdx + 1 >= StoreNodes.size()) - return MadeChange; + return 0; + // Trim stores that overlapped with the first store. if (StartIdx) StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx); @@ -16322,300 +16295,333 @@ bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) { break; NumConsecutiveStores = i + 1; } + if (NumConsecutiveStores > 1) + return NumConsecutiveStores; - if (NumConsecutiveStores < 2) { - StoreNodes.erase(StoreNodes.begin(), - StoreNodes.begin() + NumConsecutiveStores); - continue; - } + // There are no consecutive stores at the start of the list. + // Remove the first store and try again. + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1); + } +} - if (StoreSrc == StoreSource::Constant) { - // Store the constants into memory as one consecutive store. - while (NumConsecutiveStores >= 2) { - LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; - unsigned FirstStoreAS = FirstInChain->getAddressSpace(); - unsigned FirstStoreAlign = FirstInChain->getAlignment(); - unsigned LastLegalType = 1; - unsigned LastLegalVectorType = 1; - bool LastIntegerTrunc = false; - bool NonZero = false; - unsigned FirstZeroAfterNonZero = NumConsecutiveStores; - for (unsigned i = 0; i < NumConsecutiveStores; ++i) { - StoreSDNode *ST = cast(StoreNodes[i].MemNode); - SDValue StoredVal = ST->getValue(); - bool IsElementZero = false; - if (ConstantSDNode *C = dyn_cast(StoredVal)) - IsElementZero = C->isNullValue(); - else if (ConstantFPSDNode *C = dyn_cast(StoredVal)) - IsElementZero = C->getConstantFPValue()->isNullValue(); - if (IsElementZero) { - if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores) - FirstZeroAfterNonZero = i; - } - NonZero |= !IsElementZero; +bool DAGCombiner::tryStoreMergeOfConstants( + SmallVectorImpl &StoreNodes, unsigned NumConsecutiveStores, + EVT MemVT, SDNode *RootNode, bool AllowVectors) { + LLVMContext &Context = *DAG.getContext(); + const DataLayout &DL = DAG.getDataLayout(); + int64_t ElementSizeBytes = MemVT.getStoreSize(); + unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; + bool MadeChange = false; - // Find a legal type for the constant store. - unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; - EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits); - bool IsFast = false; + // Store the constants into memory as one consecutive store. + while (NumConsecutiveStores >= 2) { + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + unsigned FirstStoreAS = FirstInChain->getAddressSpace(); + unsigned FirstStoreAlign = FirstInChain->getAlignment(); + unsigned LastLegalType = 1; + unsigned LastLegalVectorType = 1; + bool LastIntegerTrunc = false; + bool NonZero = false; + unsigned FirstZeroAfterNonZero = NumConsecutiveStores; + for (unsigned i = 0; i < NumConsecutiveStores; ++i) { + StoreSDNode *ST = cast(StoreNodes[i].MemNode); + SDValue StoredVal = ST->getValue(); + bool IsElementZero = false; + if (ConstantSDNode *C = dyn_cast(StoredVal)) + IsElementZero = C->isNullValue(); + else if (ConstantFPSDNode *C = dyn_cast(StoredVal)) + IsElementZero = C->getConstantFPValue()->isNullValue(); + if (IsElementZero) { + if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores) + FirstZeroAfterNonZero = i; + } + NonZero |= !IsElementZero; - // Break early when size is too large to be legal. - if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits) - break; + // Find a legal type for the constant store. + unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; + EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits); + bool IsFast = false; - if (TLI.isTypeLegal(StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && - TLI.allowsMemoryAccess(Context, DL, StoreTy, - *FirstInChain->getMemOperand(), &IsFast) && - IsFast) { - LastIntegerTrunc = false; - LastLegalType = i + 1; - // Or check whether a truncstore is legal. - } else if (TLI.getTypeAction(Context, StoreTy) == - TargetLowering::TypePromoteInteger) { - EVT LegalizedStoredValTy = - TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); - if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) && - TLI.allowsMemoryAccess(Context, DL, StoreTy, - *FirstInChain->getMemOperand(), - &IsFast) && - IsFast) { - LastIntegerTrunc = true; - LastLegalType = i + 1; - } - } - - // We only use vectors if the constant is known to be zero or the - // target allows it and the function is not marked with the - // noimplicitfloat attribute. - if ((!NonZero || - TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) && - AllowVectors) { - // Find a legal type for the vector store. - unsigned Elts = (i + 1) * NumMemElts; - EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); - if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) && - TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && - TLI.allowsMemoryAccess( - Context, DL, Ty, *FirstInChain->getMemOperand(), &IsFast) && - IsFast) - LastLegalVectorType = i + 1; - } - } + // Break early when size is too large to be legal. + if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits) + break; - bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors; - unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType; - - // Check if we found a legal integer type that creates a meaningful - // merge. - if (NumElem < 2) { - // We know that candidate stores are in order and of correct - // shape. While there is no mergeable sequence from the - // beginning one may start later in the sequence. The only - // reason a merge of size N could have failed where another of - // the same size would not have, is if the alignment has - // improved or we've dropped a non-zero value. Drop as many - // candidates as we can here. - unsigned NumSkip = 1; - while ( - (NumSkip < NumConsecutiveStores) && - (NumSkip < FirstZeroAfterNonZero) && - (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) - NumSkip++; - - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); - NumConsecutiveStores -= NumSkip; - continue; + if (TLI.isTypeLegal(StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstInChain->getMemOperand(), &IsFast) && + IsFast) { + LastIntegerTrunc = false; + LastLegalType = i + 1; + // Or check whether a truncstore is legal. + } else if (TLI.getTypeAction(Context, StoreTy) == + TargetLowering::TypePromoteInteger) { + EVT LegalizedStoredValTy = + TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); + if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstInChain->getMemOperand(), &IsFast) && + IsFast) { + LastIntegerTrunc = true; + LastLegalType = i + 1; } + } - // Check that we can merge these candidates without causing a cycle. - if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem, - RootNode)) { - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); - NumConsecutiveStores -= NumElem; - continue; - } + // We only use vectors if the constant is known to be zero or the + // target allows it and the function is not marked with the + // noimplicitfloat attribute. + if ((!NonZero || + TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) && + AllowVectors) { + // Find a legal type for the vector store. + unsigned Elts = (i + 1) * NumMemElts; + EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); + if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) && + TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && + TLI.allowsMemoryAccess(Context, DL, Ty, + *FirstInChain->getMemOperand(), &IsFast) && + IsFast) + LastLegalVectorType = i + 1; + } + } - MadeChange |= mergeStoresOfConstantsOrVecElts( - StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc); + bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors; + unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType; + + // Check if we found a legal integer type that creates a meaningful + // merge. + if (NumElem < 2) { + // We know that candidate stores are in order and of correct + // shape. While there is no mergeable sequence from the + // beginning one may start later in the sequence. The only + // reason a merge of size N could have failed where another of + // the same size would not have, is if the alignment has + // improved or we've dropped a non-zero value. Drop as many + // candidates as we can here. + unsigned NumSkip = 1; + while ((NumSkip < NumConsecutiveStores) && + (NumSkip < FirstZeroAfterNonZero) && + (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) + NumSkip++; + + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); + NumConsecutiveStores -= NumSkip; + continue; + } - // Remove merged stores for next iteration. - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); - NumConsecutiveStores -= NumElem; - } + // Check that we can merge these candidates without causing a cycle. + if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem, + RootNode)) { + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); + NumConsecutiveStores -= NumElem; continue; } - // When extracting multiple vector elements, try to store them - // in one vector store rather than a sequence of scalar stores. - if (StoreSrc == StoreSource::Extract) { - // Loop on Consecutive Stores on success. - while (NumConsecutiveStores >= 2) { - LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; - unsigned FirstStoreAS = FirstInChain->getAddressSpace(); - unsigned FirstStoreAlign = FirstInChain->getAlignment(); - unsigned NumStoresToMerge = 1; - for (unsigned i = 0; i < NumConsecutiveStores; ++i) { - // Find a legal type for the vector store. - unsigned Elts = (i + 1) * NumMemElts; - EVT Ty = - EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); - bool IsFast = false; - - // Break early when size is too large to be legal. - if (Ty.getSizeInBits() > MaximumLegalStoreInBits) - break; + MadeChange |= mergeStoresOfConstantsOrVecElts( + StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc); - if (TLI.isTypeLegal(Ty) && - TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && - TLI.allowsMemoryAccess(Context, DL, Ty, - *FirstInChain->getMemOperand(), &IsFast) && - IsFast) - NumStoresToMerge = i + 1; - } + // Remove merged stores for next iteration. + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); + NumConsecutiveStores -= NumElem; + } + return MadeChange; +} - // Check if we found a legal integer type creating a meaningful - // merge. - if (NumStoresToMerge < 2) { - // We know that candidate stores are in order and of correct - // shape. While there is no mergeable sequence from the - // beginning one may start later in the sequence. The only - // reason a merge of size N could have failed where another of - // the same size would not have, is if the alignment has - // improved. Drop as many candidates as we can here. - unsigned NumSkip = 1; - while ( - (NumSkip < NumConsecutiveStores) && - (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) - NumSkip++; - - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); - NumConsecutiveStores -= NumSkip; - continue; - } +bool DAGCombiner::tryStoreMergeOfExtracts( + SmallVectorImpl &StoreNodes, unsigned NumConsecutiveStores, + EVT MemVT, SDNode *RootNode) { + LLVMContext &Context = *DAG.getContext(); + const DataLayout &DL = DAG.getDataLayout(); + unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; + bool MadeChange = false; - // Check that we can merge these candidates without causing a cycle. - if (!checkMergeStoreCandidatesForDependencies( - StoreNodes, NumStoresToMerge, RootNode)) { - StoreNodes.erase(StoreNodes.begin(), - StoreNodes.begin() + NumStoresToMerge); - NumConsecutiveStores -= NumStoresToMerge; - continue; - } + // Loop on Consecutive Stores on success. + while (NumConsecutiveStores >= 2) { + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + unsigned FirstStoreAS = FirstInChain->getAddressSpace(); + unsigned FirstStoreAlign = FirstInChain->getAlignment(); + unsigned NumStoresToMerge = 1; + for (unsigned i = 0; i < NumConsecutiveStores; ++i) { + // Find a legal type for the vector store. + unsigned Elts = (i + 1) * NumMemElts; + EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); + bool IsFast = false; - MadeChange |= mergeStoresOfConstantsOrVecElts( - StoreNodes, MemVT, NumStoresToMerge, false, true, false); + // Break early when size is too large to be legal. + if (Ty.getSizeInBits() > MaximumLegalStoreInBits) + break; - StoreNodes.erase(StoreNodes.begin(), - StoreNodes.begin() + NumStoresToMerge); - NumConsecutiveStores -= NumStoresToMerge; - } + if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && + TLI.allowsMemoryAccess(Context, DL, Ty, + *FirstInChain->getMemOperand(), &IsFast) && + IsFast) + NumStoresToMerge = i + 1; + } + + // Check if we found a legal integer type creating a meaningful + // merge. + if (NumStoresToMerge < 2) { + // We know that candidate stores are in order and of correct + // shape. While there is no mergeable sequence from the + // beginning one may start later in the sequence. The only + // reason a merge of size N could have failed where another of + // the same size would not have, is if the alignment has + // improved. Drop as many candidates as we can here. + unsigned NumSkip = 1; + while ((NumSkip < NumConsecutiveStores) && + (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) + NumSkip++; + + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); + NumConsecutiveStores -= NumSkip; continue; } - // Below we handle the case of multiple consecutive stores that - // come from multiple consecutive loads. We merge them into a single - // wide load and a single wide store. - assert(StoreSrc == StoreSource::Load && "Expected load source for store"); + // Check that we can merge these candidates without causing a cycle. + if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge, + RootNode)) { + StoreNodes.erase(StoreNodes.begin(), + StoreNodes.begin() + NumStoresToMerge); + NumConsecutiveStores -= NumStoresToMerge; + continue; + } - // Look for load nodes which are used by the stored values. - SmallVector LoadNodes; + MadeChange |= mergeStoresOfConstantsOrVecElts( + StoreNodes, MemVT, NumStoresToMerge, false, true, false); - // Find acceptable loads. Loads need to have the same chain (token factor), - // must not be zext, volatile, indexed, and they must be consecutive. - BaseIndexOffset LdBasePtr; + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge); + NumConsecutiveStores -= NumStoresToMerge; + } + return MadeChange; +} - for (unsigned i = 0; i < NumConsecutiveStores; ++i) { - StoreSDNode *St = cast(StoreNodes[i].MemNode); - SDValue Val = peekThroughBitcasts(St->getValue()); - LoadSDNode *Ld = cast(Val); - - BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG); - // If this is not the first ptr that we check. - int64_t LdOffset = 0; - if (LdBasePtr.getBase().getNode()) { - // The base ptr must be the same. - if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset)) - break; - } else { - // Check that all other base pointers are the same as this one. - LdBasePtr = LdPtr; - } +bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl &StoreNodes, + unsigned NumConsecutiveStores, EVT MemVT, + SDNode *RootNode, bool AllowVectors, + bool IsNonTemporalStore, + bool IsNonTemporalLoad) { + LLVMContext &Context = *DAG.getContext(); + const DataLayout &DL = DAG.getDataLayout(); + int64_t ElementSizeBytes = MemVT.getStoreSize(); + unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; + bool MadeChange = false; + + int64_t StartAddress = StoreNodes[0].OffsetFromBase; + + // Look for load nodes which are used by the stored values. + SmallVector LoadNodes; - // We found a potential memory operand to merge. - LoadNodes.push_back(MemOpLink(Ld, LdOffset)); + // Find acceptable loads. Loads need to have the same chain (token factor), + // must not be zext, volatile, indexed, and they must be consecutive. + BaseIndexOffset LdBasePtr; + + for (unsigned i = 0; i < NumConsecutiveStores; ++i) { + StoreSDNode *St = cast(StoreNodes[i].MemNode); + SDValue Val = peekThroughBitcasts(St->getValue()); + LoadSDNode *Ld = cast(Val); + + BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG); + // If this is not the first ptr that we check. + int64_t LdOffset = 0; + if (LdBasePtr.getBase().getNode()) { + // The base ptr must be the same. + if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset)) + break; + } else { + // Check that all other base pointers are the same as this one. + LdBasePtr = LdPtr; } - while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) { - // If we have load/store pair instructions and we only have two values, - // don't bother merging. - Align RequiredAlignment; - if (LoadNodes.size() == 2 && - TLI.hasPairedLoad(MemVT, RequiredAlignment) && - StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) { - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2); - LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2); + // We found a potential memory operand to merge. + LoadNodes.push_back(MemOpLink(Ld, LdOffset)); + } + + while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) { + // If we have load/store pair instructions and we only have two values, + // don't bother merging. + Align RequiredAlignment; + if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) && + StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) { + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2); + LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2); + break; + } + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + unsigned FirstStoreAS = FirstInChain->getAddressSpace(); + unsigned FirstStoreAlign = FirstInChain->getAlignment(); + LoadSDNode *FirstLoad = cast(LoadNodes[0].MemNode); + unsigned FirstLoadAlign = FirstLoad->getAlignment(); + + // Scan the memory operations on the chain and find the first + // non-consecutive load memory address. These variables hold the index in + // the store node array. + + unsigned LastConsecutiveLoad = 1; + + // This variable refers to the size and not index in the array. + unsigned LastLegalVectorType = 1; + unsigned LastLegalIntegerType = 1; + bool isDereferenceable = true; + bool DoIntegerTruncate = false; + StartAddress = LoadNodes[0].OffsetFromBase; + SDValue FirstChain = FirstLoad->getChain(); + for (unsigned i = 1; i < LoadNodes.size(); ++i) { + // All loads must share the same chain. + if (LoadNodes[i].MemNode->getChain() != FirstChain) break; - } - LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; - unsigned FirstStoreAS = FirstInChain->getAddressSpace(); - unsigned FirstStoreAlign = FirstInChain->getAlignment(); - LoadSDNode *FirstLoad = cast(LoadNodes[0].MemNode); - unsigned FirstLoadAlign = FirstLoad->getAlignment(); - - // Scan the memory operations on the chain and find the first - // non-consecutive load memory address. These variables hold the index in - // the store node array. - - unsigned LastConsecutiveLoad = 1; - - // This variable refers to the size and not index in the array. - unsigned LastLegalVectorType = 1; - unsigned LastLegalIntegerType = 1; - bool isDereferenceable = true; - bool DoIntegerTruncate = false; - StartAddress = LoadNodes[0].OffsetFromBase; - SDValue FirstChain = FirstLoad->getChain(); - for (unsigned i = 1; i < LoadNodes.size(); ++i) { - // All loads must share the same chain. - if (LoadNodes[i].MemNode->getChain() != FirstChain) - break; - int64_t CurrAddress = LoadNodes[i].OffsetFromBase; - if (CurrAddress - StartAddress != (ElementSizeBytes * i)) - break; - LastConsecutiveLoad = i; + int64_t CurrAddress = LoadNodes[i].OffsetFromBase; + if (CurrAddress - StartAddress != (ElementSizeBytes * i)) + break; + LastConsecutiveLoad = i; - if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable()) - isDereferenceable = false; + if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable()) + isDereferenceable = false; - // Find a legal type for the vector store. - unsigned Elts = (i + 1) * NumMemElts; - EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); + // Find a legal type for the vector store. + unsigned Elts = (i + 1) * NumMemElts; + EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); - // Break early when size is too large to be legal. - if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits) - break; + // Break early when size is too large to be legal. + if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits) + break; - bool IsFastSt = false; - bool IsFastLd = false; - if (TLI.isTypeLegal(StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && - TLI.allowsMemoryAccess(Context, DL, StoreTy, - *FirstInChain->getMemOperand(), &IsFastSt) && - IsFastSt && - TLI.allowsMemoryAccess(Context, DL, StoreTy, - *FirstLoad->getMemOperand(), &IsFastLd) && - IsFastLd) { - LastLegalVectorType = i + 1; - } + bool IsFastSt = false; + bool IsFastLd = false; + if (TLI.isTypeLegal(StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstInChain->getMemOperand(), &IsFastSt) && + IsFastSt && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstLoad->getMemOperand(), &IsFastLd) && + IsFastLd) { + LastLegalVectorType = i + 1; + } - // Find a legal type for the integer store. - unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; - StoreTy = EVT::getIntegerVT(Context, SizeInBits); - if (TLI.isTypeLegal(StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && + // Find a legal type for the integer store. + unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; + StoreTy = EVT::getIntegerVT(Context, SizeInBits); + if (TLI.isTypeLegal(StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstInChain->getMemOperand(), &IsFastSt) && + IsFastSt && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstLoad->getMemOperand(), &IsFastLd) && + IsFastLd) { + LastLegalIntegerType = i + 1; + DoIntegerTruncate = false; + // Or check whether a truncstore and extload is legal. + } else if (TLI.getTypeAction(Context, StoreTy) == + TargetLowering::TypePromoteInteger) { + EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy); + if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) && + TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) && + TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) && + TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) && TLI.allowsMemoryAccess(Context, DL, StoreTy, *FirstInChain->getMemOperand(), &IsFastSt) && IsFastSt && @@ -16623,146 +16629,211 @@ bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) { *FirstLoad->getMemOperand(), &IsFastLd) && IsFastLd) { LastLegalIntegerType = i + 1; - DoIntegerTruncate = false; - // Or check whether a truncstore and extload is legal. - } else if (TLI.getTypeAction(Context, StoreTy) == - TargetLowering::TypePromoteInteger) { - EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy); - if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) && - TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, - StoreTy) && - TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, - StoreTy) && - TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) && - TLI.allowsMemoryAccess(Context, DL, StoreTy, - *FirstInChain->getMemOperand(), - &IsFastSt) && - IsFastSt && - TLI.allowsMemoryAccess(Context, DL, StoreTy, - *FirstLoad->getMemOperand(), &IsFastLd) && - IsFastLd) { - LastLegalIntegerType = i + 1; - DoIntegerTruncate = true; - } + DoIntegerTruncate = true; } } + } - // Only use vector types if the vector type is larger than the integer - // type. If they are the same, use integers. - bool UseVectorTy = - LastLegalVectorType > LastLegalIntegerType && AllowVectors; - unsigned LastLegalType = - std::max(LastLegalVectorType, LastLegalIntegerType); - - // We add +1 here because the LastXXX variables refer to location while - // the NumElem refers to array/index size. - unsigned NumElem = - std::min(NumConsecutiveStores, LastConsecutiveLoad + 1); - NumElem = std::min(LastLegalType, NumElem); - - if (NumElem < 2) { - // We know that candidate stores are in order and of correct - // shape. While there is no mergeable sequence from the - // beginning one may start later in the sequence. The only - // reason a merge of size N could have failed where another of - // the same size would not have is if the alignment or either - // the load or store has improved. Drop as many candidates as we - // can here. - unsigned NumSkip = 1; - while ((NumSkip < LoadNodes.size()) && - (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) && - (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) - NumSkip++; - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); - LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip); - NumConsecutiveStores -= NumSkip; - continue; - } + // Only use vector types if the vector type is larger than the integer + // type. If they are the same, use integers. + bool UseVectorTy = + LastLegalVectorType > LastLegalIntegerType && AllowVectors; + unsigned LastLegalType = + std::max(LastLegalVectorType, LastLegalIntegerType); + + // We add +1 here because the LastXXX variables refer to location while + // the NumElem refers to array/index size. + unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1); + NumElem = std::min(LastLegalType, NumElem); + + if (NumElem < 2) { + // We know that candidate stores are in order and of correct + // shape. While there is no mergeable sequence from the + // beginning one may start later in the sequence. The only + // reason a merge of size N could have failed where another of + // the same size would not have is if the alignment or either + // the load or store has improved. Drop as many candidates as we + // can here. + unsigned NumSkip = 1; + while ((NumSkip < LoadNodes.size()) && + (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) && + (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) + NumSkip++; + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); + LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip); + NumConsecutiveStores -= NumSkip; + continue; + } - // Check that we can merge these candidates without causing a cycle. - if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem, - RootNode)) { - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); - LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem); - NumConsecutiveStores -= NumElem; - continue; - } + // Check that we can merge these candidates without causing a cycle. + if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem, + RootNode)) { + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); + LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem); + NumConsecutiveStores -= NumElem; + continue; + } - // Find if it is better to use vectors or integers to load and store - // to memory. - EVT JointMemOpVT; - if (UseVectorTy) { - // Find a legal type for the vector store. - unsigned Elts = NumElem * NumMemElts; - JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); - } else { - unsigned SizeInBits = NumElem * ElementSizeBytes * 8; - JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits); - } + // Find if it is better to use vectors or integers to load and store + // to memory. + EVT JointMemOpVT; + if (UseVectorTy) { + // Find a legal type for the vector store. + unsigned Elts = NumElem * NumMemElts; + JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); + } else { + unsigned SizeInBits = NumElem * ElementSizeBytes * 8; + JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits); + } + + SDLoc LoadDL(LoadNodes[0].MemNode); + SDLoc StoreDL(StoreNodes[0].MemNode); + + // The merged loads are required to have the same incoming chain, so + // using the first's chain is acceptable. + + SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem); + AddToWorklist(NewStoreChain.getNode()); + + MachineMemOperand::Flags LdMMOFlags = + isDereferenceable ? MachineMemOperand::MODereferenceable + : MachineMemOperand::MONone; + if (IsNonTemporalLoad) + LdMMOFlags |= MachineMemOperand::MONonTemporal; + + MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore + ? MachineMemOperand::MONonTemporal + : MachineMemOperand::MONone; + + SDValue NewLoad, NewStore; + if (UseVectorTy || !DoIntegerTruncate) { + NewLoad = DAG.getLoad( + JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(), + FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags); + NewStore = DAG.getStore( + NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags); + } else { // This must be the truncstore/extload case + EVT ExtendedTy = + TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT); + NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, + FirstLoad->getChain(), FirstLoad->getBasePtr(), + FirstLoad->getPointerInfo(), JointMemOpVT, + FirstLoadAlign, LdMMOFlags); + NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad, + FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), JointMemOpVT, + FirstInChain->getAlignment(), + FirstInChain->getMemOperand()->getFlags()); + } + + // Transfer chain users from old loads to the new load. + for (unsigned i = 0; i < NumElem; ++i) { + LoadSDNode *Ld = cast(LoadNodes[i].MemNode); + DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), + SDValue(NewLoad.getNode(), 1)); + } + + // Replace all stores with the new store. Recursively remove corresponding + // values if they are no longer used. + for (unsigned i = 0; i < NumElem; ++i) { + SDValue Val = StoreNodes[i].MemNode->getOperand(1); + CombineTo(StoreNodes[i].MemNode, NewStore); + if (Val.getNode()->use_empty()) + recursivelyDeleteUnusedNodes(Val.getNode()); + } + + MadeChange = true; + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); + LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem); + NumConsecutiveStores -= NumElem; + } + return MadeChange; +} - SDLoc LoadDL(LoadNodes[0].MemNode); - SDLoc StoreDL(StoreNodes[0].MemNode); - - // The merged loads are required to have the same incoming chain, so - // using the first's chain is acceptable. - - SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem); - AddToWorklist(NewStoreChain.getNode()); - - MachineMemOperand::Flags LdMMOFlags = - isDereferenceable ? MachineMemOperand::MODereferenceable - : MachineMemOperand::MONone; - if (IsNonTemporalLoad) - LdMMOFlags |= MachineMemOperand::MONonTemporal; - - MachineMemOperand::Flags StMMOFlags = - IsNonTemporalStore ? MachineMemOperand::MONonTemporal - : MachineMemOperand::MONone; - - SDValue NewLoad, NewStore; - if (UseVectorTy || !DoIntegerTruncate) { - NewLoad = - DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(), - FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(), - FirstLoadAlign, LdMMOFlags); - NewStore = DAG.getStore( - NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), - FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags); - } else { // This must be the truncstore/extload case - EVT ExtendedTy = - TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT); - NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, - FirstLoad->getChain(), FirstLoad->getBasePtr(), - FirstLoad->getPointerInfo(), JointMemOpVT, - FirstLoadAlign, LdMMOFlags); - NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad, - FirstInChain->getBasePtr(), - FirstInChain->getPointerInfo(), - JointMemOpVT, FirstInChain->getAlignment(), - FirstInChain->getMemOperand()->getFlags()); - } +bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) { + if (OptLevel == CodeGenOpt::None || !EnableStoreMerging) + return false; - // Transfer chain users from old loads to the new load. - for (unsigned i = 0; i < NumElem; ++i) { - LoadSDNode *Ld = cast(LoadNodes[i].MemNode); - DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), - SDValue(NewLoad.getNode(), 1)); - } + // TODO: Extend this function to merge stores of scalable vectors. + // (i.e. two stores can be merged to one + // store since we know is exactly twice as large as + // ). Until then, bail out for scalable vectors. + EVT MemVT = St->getMemoryVT(); + if (MemVT.isScalableVector()) + return false; + if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits) + return false; - // Replace all stores with the new store. Recursively remove corresponding - // values if they are no longer used. - for (unsigned i = 0; i < NumElem; ++i) { - SDValue Val = StoreNodes[i].MemNode->getOperand(1); - CombineTo(StoreNodes[i].MemNode, NewStore); - if (Val.getNode()->use_empty()) - recursivelyDeleteUnusedNodes(Val.getNode()); - } + // This function cannot currently deal with non-byte-sized memory sizes. + int64_t ElementSizeBytes = MemVT.getStoreSize(); + if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits()) + return false; - MadeChange = true; - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); - LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem); - NumConsecutiveStores -= NumElem; + // Do not bother looking at stored values that are not constants, loads, or + // extracted vector elements. + SDValue StoredVal = peekThroughBitcasts(St->getValue()); + const StoreSource StoreSrc = getStoreSource(StoredVal); + if (StoreSrc == StoreSource::Unknown) + return false; + + SmallVector StoreNodes; + SDNode *RootNode; + // Find potential store merge candidates by searching through chain sub-DAG + getStoreMergeCandidates(St, StoreNodes, RootNode); + + // Check if there is anything to merge. + if (StoreNodes.size() < 2) + return false; + + // Sort the memory operands according to their distance from the + // base pointer. + llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) { + return LHS.OffsetFromBase < RHS.OffsetFromBase; + }); + + bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute( + Attribute::NoImplicitFloat); + bool IsNonTemporalStore = St->isNonTemporal(); + bool IsNonTemporalLoad = StoreSrc == StoreSource::Load && + cast(StoredVal)->isNonTemporal(); + + // Store Merge attempts to merge the lowest stores. This generally + // works out as if successful, as the remaining stores are checked + // after the first collection of stores is merged. However, in the + // case that a non-mergeable store is found first, e.g., {p[-2], + // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent + // mergeable cases. To prevent this, we prune such stores from the + // front of StoreNodes here. + bool MadeChange = false; + while (StoreNodes.size() > 1) { + unsigned NumConsecutiveStores = + getConsecutiveStores(StoreNodes, ElementSizeBytes); + // There are no more stores in the list to examine. + if (NumConsecutiveStores == 0) + return MadeChange; + + // We have at least 2 consecutive stores. Try to merge them. + assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores"); + switch (StoreSrc) { + case StoreSource::Constant: + MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores, + MemVT, RootNode, AllowVectors); + break; + + case StoreSource::Extract: + MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores, + MemVT, RootNode); + break; + + case StoreSource::Load: + MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores, + MemVT, RootNode, AllowVectors, + IsNonTemporalStore, IsNonTemporalLoad); + break; + + default: + llvm_unreachable("Unhandled store source type"); } } return MadeChange; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index f6c29ac1d2fa92..755d5ef5cb10c1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -239,7 +239,7 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, unsigned GCNTTIImpl::getHardwareNumberOfRegisters(bool Vec) const { // The concept of vector registers doesn't really exist. Some packed vector // operations operate on the normal 32-bit registers. - return 256; + return MaxVGPRs; } unsigned GCNTTIImpl::getNumberOfRegisters(bool Vec) const { @@ -248,6 +248,13 @@ unsigned GCNTTIImpl::getNumberOfRegisters(bool Vec) const { return getHardwareNumberOfRegisters(Vec) >> 3; } +unsigned GCNTTIImpl::getNumberOfRegisters(unsigned RCID) const { + const SIRegisterInfo *TRI = ST->getRegisterInfo(); + const TargetRegisterClass *RC = TRI->getRegClass(RCID); + unsigned NumVGPRs = (TRI->getRegSizeInBits(*RC) + 31) / 32; + return getHardwareNumberOfRegisters(false) / NumVGPRs; +} + unsigned GCNTTIImpl::getRegisterBitWidth(bool Vector) const { return 32; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index 508ed061e935c0..b8a027c79bfc23 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -74,6 +74,7 @@ class GCNTTIImpl final : public BasicTTIImplBase { AMDGPUTTIImpl CommonTTI; bool IsGraphicsShader; bool HasFP32Denormals; + unsigned MaxVGPRs; const FeatureBitset InlineFeatureIgnoreList = { // Codegen control options which don't matter. @@ -133,7 +134,11 @@ class GCNTTIImpl final : public BasicTTIImplBase { TLI(ST->getTargetLowering()), CommonTTI(TM, F), IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())), - HasFP32Denormals(AMDGPU::SIModeRegisterDefaults(F).allFP32Denormals()) {} + HasFP32Denormals(AMDGPU::SIModeRegisterDefaults(F).allFP32Denormals()), + MaxVGPRs(ST->getMaxNumVGPRs( + std::max(ST->getWavesPerEU(F).first, + ST->getWavesPerEUForWorkGroup( + ST->getFlatWorkGroupSizes(F).second)))) {} bool hasBranchDivergence() { return true; } bool useGPUDivergenceAnalysis() const; @@ -148,6 +153,7 @@ class GCNTTIImpl final : public BasicTTIImplBase { unsigned getHardwareNumberOfRegisters(bool Vector) const; unsigned getNumberOfRegisters(bool Vector) const; + unsigned getNumberOfRegisters(unsigned RCID) const; unsigned getRegisterBitWidth(bool Vector) const; unsigned getMinVectorRegisterBitWidth() const; unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 59cb2b994a4b3b..dbaf221db9fc99 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -46,7 +46,6 @@ static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) { case PPC::fixup_ppc_half16ds: return Value & 0xfffc; case PPC::fixup_ppc_pcrel34: - case PPC::fixup_ppc_imm34: return Value & 0x3ffffffff; } } @@ -69,7 +68,6 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { case PPC::fixup_ppc_br24_notoc: return 4; case PPC::fixup_ppc_pcrel34: - case PPC::fixup_ppc_imm34: case FK_Data_8: return 8; case PPC::fixup_ppc_nofixup: @@ -102,7 +100,6 @@ class PPCAsmBackend : public MCAsmBackend { { "fixup_ppc_half16", 0, 16, 0 }, { "fixup_ppc_half16ds", 0, 14, 0 }, { "fixup_ppc_pcrel34", 0, 34, MCFixupKindInfo::FKF_IsPCRel }, - { "fixup_ppc_imm34", 0, 34, 0 }, { "fixup_ppc_nofixup", 0, 0, 0 } }; const static MCFixupKindInfo InfosLE[PPC::NumTargetFixupKinds] = { @@ -115,7 +112,6 @@ class PPCAsmBackend : public MCAsmBackend { { "fixup_ppc_half16", 0, 16, 0 }, { "fixup_ppc_half16ds", 2, 14, 0 }, { "fixup_ppc_pcrel34", 0, 34, MCFixupKindInfo::FKF_IsPCRel }, - { "fixup_ppc_imm34", 0, 34, 0 }, { "fixup_ppc_nofixup", 0, 0, 0 } }; diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index 1af08ec5539d35..d8b3301e97f129 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -409,9 +409,6 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, break; } break; - case PPC::fixup_ppc_imm34: - llvm_unreachable("Unsupported Modifier for fixup_ppc_imm34."); - break; case FK_Data_8: switch (Modifier) { default: llvm_unreachable("Unsupported Modifier"); diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h index 73292f7b7938f7..2fb8947fd4e0f0 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h @@ -43,9 +43,6 @@ enum Fixups { // A 34-bit fixup corresponding to PC-relative paddi. fixup_ppc_pcrel34, - // A 34-bit fixup corresponding to Non-PC-relative paddi. - fixup_ppc_imm34, - /// Not a true fixup, but ties a symbol to a call to __tls_get_addr for the /// TLS general and local dynamic models, or inserts the thread-pointer /// register number. diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index 8c0e0a80b1e2c7..fb65e7320f2b0c 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -104,36 +104,20 @@ unsigned PPCMCCodeEmitter::getImm16Encoding(const MCInst &MI, unsigned OpNo, return 0; } -uint64_t PPCMCCodeEmitter::getImm34Encoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI, - MCFixupKind Fixup) const { +uint64_t +PPCMCCodeEmitter::getImm34Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpNo); - assert(!MO.isReg() && "Not expecting a register for this operand."); - if (MO.isImm()) + if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI); // Add a fixup for the immediate field. - Fixups.push_back(MCFixup::create(0, MO.getExpr(), Fixup)); + Fixups.push_back(MCFixup::create(0, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_pcrel34)); return 0; } -uint64_t -PPCMCCodeEmitter::getImm34EncodingNoPCRel(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - return getImm34Encoding(MI, OpNo, Fixups, STI, - (MCFixupKind)PPC::fixup_ppc_imm34); -} - -uint64_t -PPCMCCodeEmitter::getImm34EncodingPCRel(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - return getImm34Encoding(MI, OpNo, Fixups, STI, - (MCFixupKind)PPC::fixup_ppc_pcrel34); -} - unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h index 4504cc6a7405ed..588aa76bd80642 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h @@ -52,14 +52,7 @@ class PPCMCCodeEmitter : public MCCodeEmitter { const MCSubtargetInfo &STI) const; uint64_t getImm34Encoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI, - MCFixupKind Fixup) const; - uint64_t getImm34EncodingNoPCRel(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - uint64_t getImm34EncodingPCRel(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; + const MCSubtargetInfo &STI) const; unsigned getMemRIEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 39a90bf9b346ec..673ab63039cf72 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -757,13 +757,7 @@ def PPCS34ImmAsmOperand : AsmOperandClass { } def s34imm : Operand { let PrintMethod = "printS34ImmOperand"; - let EncoderMethod = "getImm34EncodingNoPCRel"; - let ParserMatchClass = PPCS34ImmAsmOperand; - let DecoderMethod = "decodeSImmOperand<34>"; -} -def s34imm_pcrel : Operand { - let PrintMethod = "printS34ImmOperand"; - let EncoderMethod = "getImm34EncodingPCRel"; + let EncoderMethod = "getImm34Encoding"; let ParserMatchClass = PPCS34ImmAsmOperand; let DecoderMethod = "decodeSImmOperand<34>"; } diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td index 91bb912e572697..2c21d0a175ad48 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -432,7 +432,7 @@ let Predicates = [PrefixInstrs] in { let Interpretation64Bit = 1, isCodeGenOnly = 1 in { defm PADDI8 : MLS_DForm_R_SI34_RTA5_p<14, (outs g8rc:$RT), (ins g8rc:$RA, s34imm:$SI), - (ins immZero:$RA, s34imm_pcrel:$SI), + (ins immZero:$RA, s34imm:$SI), "paddi $RT, $RA, $SI", IIC_LdStLFD>; let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { def PLI8 : MLS_DForm_SI34_RT5<14, (outs g8rc:$RT), @@ -442,7 +442,7 @@ let Predicates = [PrefixInstrs] in { } defm PADDI : MLS_DForm_R_SI34_RTA5_p<14, (outs gprc:$RT), (ins gprc:$RA, s34imm:$SI), - (ins immZero:$RA, s34imm_pcrel:$SI), + (ins immZero:$RA, s34imm:$SI), "paddi $RT, $RA, $SI", IIC_LdStLFD>; let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { def PLI : MLS_DForm_SI34_RT5<14, (outs gprc:$RT), diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index e810b3de25bc85..d1c1e541882516 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -123,13 +123,8 @@ STATISTIC(NumReassoc , "Number of reassociations"); DEBUG_COUNTER(VisitCounter, "instcombine-visit", "Controls which instructions are visited"); -// FIXME: these limits eventually should be as low as 2. static constexpr unsigned InstCombineDefaultMaxIterations = 1000; -#ifndef NDEBUG -static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold = 100; -#else static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold = 1000; -#endif static cl::opt EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"), diff --git a/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/llvm/lib/Transforms/Utils/PredicateInfo.cpp index d320f488c5c511..c81efd77aa5ff3 100644 --- a/llvm/lib/Transforms/Utils/PredicateInfo.cpp +++ b/llvm/lib/Transforms/Utils/PredicateInfo.cpp @@ -896,18 +896,21 @@ class PredicateInfoAnnotatedWriter : public AssemblyAnnotationWriter { PB->From->printAsOperand(OS); OS << ","; PB->To->printAsOperand(OS); - OS << "] }\n"; + OS << "]"; } else if (const auto *PS = dyn_cast(PI)) { OS << "; switch predicate info { CaseValue: " << *PS->CaseValue << " Switch:" << *PS->Switch << " Edge: ["; PS->From->printAsOperand(OS); OS << ","; PS->To->printAsOperand(OS); - OS << "] }\n"; + OS << "]"; } else if (const auto *PA = dyn_cast(PI)) { OS << "; assume predicate info {" - << " Comparison:" << *PA->Condition << " }\n"; + << " Comparison:" << *PA->Condition; } + OS << ", RenamedOp: "; + PI->RenamedOp->printAsOperand(OS, false); + OS << " }\n"; } } }; diff --git a/llvm/test/MC/PowerPC/ppc64-errors-emit-obj.s b/llvm/test/MC/PowerPC/ppc64-errors-emit-obj.s deleted file mode 100644 index 0d2c879380e0a6..00000000000000 --- a/llvm/test/MC/PowerPC/ppc64-errors-emit-obj.s +++ /dev/null @@ -1,7 +0,0 @@ -# RUN: not --crash llvm-mc -triple powerpc64-- --filetype=obj < %s 2> %t -# RUN: FileCheck < %t %s -# RUN: not --crash llvm-mc -triple powerpc64le-- --filetype=obj < %s 2> %t -# RUN: FileCheck < %t %s - -# CHECK: Unsupported Modifier for fixup_ppc_imm34. -paddi 3, 13, symbol@toc, 0 diff --git a/llvm/test/Reduce/remove-attributes-from-intrinsic-like-functions.ll b/llvm/test/Reduce/remove-attributes-from-intrinsic-like-functions.ll new file mode 100644 index 00000000000000..60df12e94feb26 --- /dev/null +++ b/llvm/test/Reduce/remove-attributes-from-intrinsic-like-functions.ll @@ -0,0 +1,40 @@ +; Just because a function is named like an intrinsic does not mean we should skip it's attributes. +; +; RUN: llvm-reduce --test FileCheck --test-arg --check-prefixes=CHECK-ALL,CHECK-INTERESTINGNESS --test-arg %s --test-arg --input-file %s -o %t +; RUN: cat %t | FileCheck --check-prefixes=CHECK-ALL,CHECK-FINAL %s + +; CHECK-ALL: declare i32 @llvm.not.really.an.intrinsic(i32, i32) #0 +declare i32 @llvm.not.really.an.intrinsic(i32, i32) #0 + +define i32 @t(i32 %a) { +; CHECK-ALL-LABEL: @t( + +; CHECK-INTERESTINGNESS: %r = +; CHECK-INTERESTINGNESS-SAME: call +; CHECK-INTERESTINGNESS-SAME: "arg0" +; CHECK-INTERESTINGNESS-SAME: i32 @llvm.not.really.an.intrinsic(i32 +; CHECK-INTERESTINGNESS-SAME: "arg3" +; CHECK-INTERESTINGNESS-SAME: %a +; CHECK-INTERESTINGNESS-SAME: i32 +; CHECK-INTERESTINGNESS-SAME: %a +; CHECK-INTERESTINGNESS-SAME: #1 + +; CHECK-FINAL: %r = call "arg0" i32 @llvm.not.really.an.intrinsic(i32 "arg3" %a, i32 %a) #1 +; CHECK-ALL: ret i32 %r + + %r = call "arg0" "arg1" i32 @llvm.not.really.an.intrinsic(i32 "arg2" "arg3" %a, i32 %a) "arg4" "arg5" + ret i32 %r +} + +; CHECK-INTERESTINGNESS: attributes #0 = { +; CHECK-INTERESTINGNESS-SAME: "arg6" + +; CHECK-INTERESTINGNESS: attributes #1 = { +; CHECK-INTERESTINGNESS-SAME: "arg4" + +; CHECK-FINAL: attributes #0 = { "arg6" } +; CHECK-FINAL: attributes #1 = { "arg4" } + +; CHECK-ALL-NOT: attributes # + +attributes #0 = { "arg6" "arg7" } diff --git a/llvm/test/Reduce/remove-attributes-from-intrinsics.ll b/llvm/test/Reduce/remove-attributes-from-intrinsics.ll new file mode 100644 index 00000000000000..7a8a8f0eb1146c --- /dev/null +++ b/llvm/test/Reduce/remove-attributes-from-intrinsics.ll @@ -0,0 +1,38 @@ +; We can't actually put attributes on intrinsic declarations, only on call sites. +; +; RUN: llvm-reduce --test FileCheck --test-arg --check-prefixes=CHECK-ALL,CHECK-INTERESTINGNESS --test-arg %s --test-arg --input-file %s -o %t +; RUN: cat %t | FileCheck --check-prefixes=CHECK-ALL,CHECK-FINAL %s + +define i32 @t(i32 %a) { +; CHECK-ALL-LABEL: @t( + +; CHECK-INTERESTINGNESS: %r = +; CHECK-INTERESTINGNESS-SAME: call +; CHECK-INTERESTINGNESS-SAME: "arg0" +; CHECK-INTERESTINGNESS-SAME: i32 @llvm.uadd.sat.i32(i32 +; CHECK-INTERESTINGNESS-SAME: "arg3" +; CHECK-INTERESTINGNESS-SAME: %a +; CHECK-INTERESTINGNESS-SAME: i32 +; CHECK-INTERESTINGNESS-SAME: %a +; CHECK-INTERESTINGNESS-SAME: #1 + +; CHECK-FINAL: %r = call "arg0" i32 @llvm.uadd.sat.i32(i32 "arg3" %a, i32 %a) #1 +; CHECK-ALL: ret i32 %r + + %r = call "arg0" "arg1" i32 @llvm.uadd.sat.i32(i32 "arg2" "arg3" %a, i32 %a) "arg4" "arg5" + ret i32 %r +} + +; CHECK-ALL: declare i32 @llvm.uadd.sat.i32(i32, i32) #0 +declare i32 @llvm.uadd.sat.i32(i32, i32) #0 + +; CHECK-ALL: attributes #0 = { nounwind readnone speculatable willreturn } + +; CHECK-INTERESTINGNESS: attributes #1 = { +; CHECK-INTERESTINGNESS-SAME: "arg4" + +; CHECK-FINAL: attributes #1 = { "arg4" } + +; CHECK-ALL-NOT: attributes # + +attributes #0 = { "arg6" "arg7" } diff --git a/llvm/test/Reduce/remove-call-site-attributes.ll b/llvm/test/Reduce/remove-call-site-attributes.ll new file mode 100644 index 00000000000000..e8f50355812abc --- /dev/null +++ b/llvm/test/Reduce/remove-call-site-attributes.ll @@ -0,0 +1,38 @@ +; Test that llvm-reduce can remove uninteresting operand bundles from calls. +; +; RUN: llvm-reduce --test FileCheck --test-arg --check-prefixes=CHECK-ALL,CHECK-INTERESTINGNESS --test-arg %s --test-arg --input-file %s -o %t +; RUN: cat %t | FileCheck --check-prefixes=CHECK-ALL,CHECK-FINAL %s + +; CHECK-ALL: declare i32 @f1(i32, i32) +declare i32 @f1(i32, i32) + +; CHECK-FINAL-LABEL: define i32 @interesting(i32 %arg0, i32 %arg1) { +; CHECK-FINAL-NEXT: entry: +; CHECK-FINAL-NEXT: %r = call "attr0" i32 @f1(i32 "attr4" %arg0, i32 %arg1) #0 +; CHECK-FINAL-NEXT: ret i32 %r +; CHECK-FINAL-NEXT: } +define i32 @interesting(i32 %arg0, i32 %arg1) { +entry: +; CHECK-INTERESTINGNESS-LABEL: @interesting( + +; CHECK-INTERESTINGNESS: %r = call +; CHECK-INTERESTINGNESS-SAME: "attr0" +; CHECK-INTERESTINGNESS-SAME: i32 @f1( +; CHECK-INTERESTINGNESS-SAME: i32 +; CHECK-INTERESTINGNESS-SAME: "attr4" +; CHECK-INTERESTINGNESS-SAME: %arg0 +; CHECK-INTERESTINGNESS-SAME: i32 +; CHECK-INTERESTINGNESS-SAME: %arg1 +; CHECK-INTERESTINGNESS-SAME: #0 +; CHECK-INTERESTINGNESS: ret i32 %r + + %r = call "attr0" "attr1" "attr2" i32 @f1(i32 "attr3" "attr4" "attr5" %arg0, i32 "attr6" "attr7" "attr8" %arg1) #0 + ret i32 %r +} + +; CHECK-INTERESTINGNESS: attributes #0 = { +; CHECK-INTERESTINGNESS-SAME: "attr10" + +; CHECK-FINAL: attributes #0 = { "attr10" } + +attributes #0 = { "attr9" "attr10" "attr11" } diff --git a/llvm/test/Reduce/remove-function-attributes.ll b/llvm/test/Reduce/remove-function-attributes.ll new file mode 100644 index 00000000000000..52bbda36f332ac --- /dev/null +++ b/llvm/test/Reduce/remove-function-attributes.ll @@ -0,0 +1,23 @@ +; Test that llvm-reduce can remove uninteresting attributes. +; +; RUN: llvm-reduce --test FileCheck --test-arg --check-prefixes=CHECK-ALL,CHECK-INTERESTINGNESS --test-arg %s --test-arg --input-file %s -o %t +; RUN: cat %t | FileCheck --check-prefixes=CHECK-ALL,CHECK-FINAL %s + +; CHECK-INTERESTINGNESS: declare +; CHECK-INTERESTINGNESS-SAME: "attr0" +; CHECK-INTERESTINGNESS-SAME: void @f0 +; CHECK-INTERESTINGNESS-SAME: i32 +; CHECK-INTERESTINGNESS-SAME: i32 +; CHECK-INTERESTINGNESS-SAME: "attr6" +; CHECK-INTERESTINGNESS-SAME: #0 + +; CHECK-FINAL: declare "attr0" void @f0(i32, i32 "attr6") #0 + +declare "attr0" "attr1" "attr2" void @f0(i32 "attr3" "attr4" "attr5", i32 "attr6" "attr7" "attr8") #0 + +; CHECK-INTERESTINGNESS: attributes #0 = { +; CHECK-INTERESTINGNESS-SAME: "attr10" + +; CHECK-FINAL: attributes #0 = { "attr10" } + +attributes #0 = { "attr9" "attr10" "attr11" } diff --git a/llvm/test/Reduce/remove-global-variable-attributes.ll b/llvm/test/Reduce/remove-global-variable-attributes.ll new file mode 100644 index 00000000000000..bec3afd960e903 --- /dev/null +++ b/llvm/test/Reduce/remove-global-variable-attributes.ll @@ -0,0 +1,27 @@ +; Test that llvm-reduce can remove uninteresting attributes. +; +; RUN: llvm-reduce --test FileCheck --test-arg --check-prefixes=CHECK-ALL,CHECK-INTERESTINGNESS --test-arg %s --test-arg --input-file %s -o %t +; RUN: cat %t | FileCheck --check-prefixes=CHECK-ALL,CHECK-FINAL %s + +; CHECK-ALL: @gv0 = global i32 0 #0 +; CHECK-ALL-NEXT: @gv1 = global i32 0 #1 +; CHECK-ALL-NEXT: @gv2 = global i32 0 +@gv0 = global i32 0 #0 +@gv1 = global i32 0 #1 +@gv2 = global i32 0 #2 + +; CHECK-INTERESTINGNESS: attributes #0 = { +; CHECK-INTERESTINGNESS-SAME: "attr0" +; CHECK-INTERESTINGNESS-SAME: "attr2" + +; CHECK-INTERESTINGNESS-NEXT: attributes #1 = { +; CHECK-INTERESTINGNESS-SAME: "attr4" + +; CHECK-FINAL: attributes #0 = { "attr0" "attr2" } +; CHECK-FINAL-NEXT: attributes #1 = { "attr4" } + +; CHECK-FINAL-NOT: attributes #2 + +attributes #0 = { "attr0" "attr1" "attr2"} +attributes #1 = { "attr3" "attr4" "attr5"} +attributes #2 = { "attr6" "attr7" "attr8"} diff --git a/llvm/test/Transforms/Util/PredicateInfo/condprop.ll b/llvm/test/Transforms/Util/PredicateInfo/condprop.ll index daf6bb8b40a8ae..756457ab7fa9a9 100644 --- a/llvm/test/Transforms/Util/PredicateInfo/condprop.ll +++ b/llvm/test/Transforms/Util/PredicateInfo/condprop.ll @@ -138,7 +138,7 @@ define void @test4(i1 %b, i32 %x) { ; CHECK-NEXT: i32 2, label [[CASE0]] ; CHECK-NEXT: i32 3, label [[CASE3]] ; CHECK-NEXT: i32 4, label [[DEFAULT:%.*]] -; CHECK-NEXT: ] Edge: [label [[SW]],label %case1] } +; CHECK-NEXT: ] Edge: [label [[SW]],label %case1] ; CHECK-NEXT: [[X_0:%.*]] = call i32 @llvm.ssa.copy.{{.+}}(i32 [[X:%.*]]) ; CHECK-NEXT: switch i32 [[X]], label [[DEFAULT]] [ ; CHECK-NEXT: i32 0, label [[CASE0]] diff --git a/llvm/test/Transforms/Util/PredicateInfo/unnamed-types.ll b/llvm/test/Transforms/Util/PredicateInfo/unnamed-types.ll index 21e702178fd72a..d1e0f358fc9fc1 100644 --- a/llvm/test/Transforms/Util/PredicateInfo/unnamed-types.ll +++ b/llvm/test/Transforms/Util/PredicateInfo/unnamed-types.ll @@ -7,12 +7,12 @@ ; CHECK-LABEL: bb: ; CHECK: Has predicate info -; CHECK: branch predicate info { TrueEdge: 1 Comparison: %cmp1 = icmp ne %0* %arg, null Edge: [label %bb,label %bb1] } +; CHECK: branch predicate info { TrueEdge: 1 Comparison: %cmp1 = icmp ne %0* %arg, null Edge: [label %bb,label %bb1], RenamedOp: %arg } ; CHECK-NEXT: %arg.0 = call %0* @llvm.ssa.copy.{{.+}}(%0* %arg) ; CHECK-LABEL: bb1: ; CHECK: Has predicate info -; CHECK-NEXT: branch predicate info { TrueEdge: 0 Comparison: %cmp2 = icmp ne %1* null, %tmp Edge: [label %bb1,label %bb3] } +; CHECK-NEXT: branch predicate info { TrueEdge: 0 Comparison: %cmp2 = icmp ne %1* null, %tmp Edge: [label %bb1,label %bb3], RenamedOp: %tmp } ; CHECK-NEXT: %tmp.0 = call %1* @llvm.ssa.copy.{{.+}}(%1* %tmp) define void @f0(%0* %arg, %1* %tmp) { diff --git a/llvm/tools/llvm-reduce/CMakeLists.txt b/llvm/tools/llvm-reduce/CMakeLists.txt index 24eedac613f544..01b9d0b4afe1ac 100644 --- a/llvm/tools/llvm-reduce/CMakeLists.txt +++ b/llvm/tools/llvm-reduce/CMakeLists.txt @@ -14,6 +14,7 @@ add_llvm_tool(llvm-reduce TestRunner.cpp deltas/Delta.cpp deltas/ReduceArguments.cpp + deltas/ReduceAttributes.cpp deltas/ReduceBasicBlocks.cpp deltas/ReduceFunctions.cpp deltas/ReduceGlobalVars.cpp diff --git a/llvm/tools/llvm-reduce/DeltaManager.h b/llvm/tools/llvm-reduce/DeltaManager.h index 5635352b43d8a1..b1a4ee0df4dbe6 100644 --- a/llvm/tools/llvm-reduce/DeltaManager.h +++ b/llvm/tools/llvm-reduce/DeltaManager.h @@ -14,6 +14,7 @@ #include "TestRunner.h" #include "deltas/Delta.h" #include "deltas/ReduceArguments.h" +#include "deltas/ReduceAttributes.h" #include "deltas/ReduceBasicBlocks.h" #include "deltas/ReduceFunctions.h" #include "deltas/ReduceGlobalVars.h" @@ -32,6 +33,7 @@ inline void runDeltaPasses(TestRunner &Tester) { reduceArgumentsDeltaPass(Tester); reduceInstructionsDeltaPass(Tester); reduceOperandBundesDeltaPass(Tester); + reduceAttributesDeltaPass(Tester); // TODO: Implement the remaining Delta Passes } diff --git a/llvm/tools/llvm-reduce/deltas/ReduceAttributes.cpp b/llvm/tools/llvm-reduce/deltas/ReduceAttributes.cpp new file mode 100644 index 00000000000000..cbaf5d5efd346a --- /dev/null +++ b/llvm/tools/llvm-reduce/deltas/ReduceAttributes.cpp @@ -0,0 +1,200 @@ +//===- ReduceAttributes.cpp - Specialized Delta Pass -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a function which calls the Generic Delta pass in order +// to reduce uninteresting attributes. +// +//===----------------------------------------------------------------------===// + +#include "ReduceAttributes.h" +#include "Delta.h" +#include "TestRunner.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Sequence.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include + +namespace llvm { +class LLVMContext; +} // namespace llvm + +using namespace llvm; + +namespace { + +using AttrPtrVecTy = std::vector; +using AttrPtrIdxVecVecTy = std::pair; +using AttrPtrVecVecTy = SmallVector; + +/// Given ChunksToKeep, produce a map of global variables/functions/calls +/// and indexes of attributes to be preserved for each of them. +class AttributeRemapper : public InstVisitor { + Oracle O; + +public: + DenseMap GlobalVariablesToRefine; + DenseMap FunctionsToRefine; + DenseMap CallsToRefine; + + explicit AttributeRemapper(ArrayRef ChunksToKeep) : O(ChunksToKeep) {} + + void visitModule(Module &M) { + for (GlobalVariable &GV : M.getGlobalList()) + visitGlobalVariable(GV); + } + + void visitGlobalVariable(GlobalVariable &GV) { + // Global variables only have one attribute set. + const AttributeSet &AS = GV.getAttributes(); + if (AS.hasAttributes()) + visitAttributeSet(AS, GlobalVariablesToRefine[&GV]); + } + + void visitFunction(Function &F) { + if (F.getIntrinsicID() != Intrinsic::not_intrinsic) + return; // We can neither add nor remove attributes from intrinsics. + visitAttributeList(F.getAttributes(), FunctionsToRefine[&F]); + } + + void visitCallBase(CallBase &I) { + visitAttributeList(I.getAttributes(), CallsToRefine[&I]); + } + + void visitAttributeList(const AttributeList &AL, + AttrPtrVecVecTy &AttributeSetsToPreserve) { + assert(AttributeSetsToPreserve.empty() && "Should not be sharing vectors."); + AttributeSetsToPreserve.reserve(AL.getNumAttrSets()); + for (unsigned SetIdx : seq(AL.index_begin(), AL.index_end())) { + AttrPtrIdxVecVecTy AttributesToPreserve; + AttributesToPreserve.first = SetIdx; + visitAttributeSet(AL.getAttributes(AttributesToPreserve.first), + AttributesToPreserve.second); + if (!AttributesToPreserve.second.empty()) + AttributeSetsToPreserve.emplace_back(std::move(AttributesToPreserve)); + } + } + + void visitAttributeSet(const AttributeSet &AS, + AttrPtrVecTy &AttrsToPreserve) { + assert(AttrsToPreserve.empty() && "Should not be sharing vectors."); + AttrsToPreserve.reserve(AS.getNumAttributes()); + for (const Attribute &A : AS) + if (O.shouldKeep()) + AttrsToPreserve.emplace_back(&A); + } +}; + +struct AttributeCounter : public InstVisitor { + /// How many features (in this case, attributes) did we count, total? + int AttributeCount = 0; + + void visitModule(Module &M) { + for (GlobalVariable &GV : M.getGlobalList()) + visitGlobalVariable(GV); + } + + void visitGlobalVariable(GlobalVariable &GV) { + // Global variables only have one attribute set. + visitAttributeSet(GV.getAttributes()); + } + + void visitFunction(Function &F) { + if (F.getIntrinsicID() != Intrinsic::not_intrinsic) + return; // We can neither add nor remove attributes from intrinsics. + visitAttributeList(F.getAttributes()); + } + + void visitCallBase(CallBase &I) { visitAttributeList(I.getAttributes()); } + + void visitAttributeList(const AttributeList &AL) { + for (const AttributeSet &AS : AL) + visitAttributeSet(AS); + } + + void visitAttributeSet(const AttributeSet &AS) { + AttributeCount += AS.getNumAttributes(); + } +}; + +} // namespace + +AttributeSet +convertAttributeRefToAttributeSet(LLVMContext &C, + ArrayRef Attributes) { + AttrBuilder B; + for (const Attribute *A : Attributes) + B.addAttribute(*A); + return AttributeSet::get(C, B); +} + +AttributeList convertAttributeRefVecToAttributeList( + LLVMContext &C, ArrayRef AttributeSets) { + std::vector> SetVec; + SetVec.reserve(AttributeSets.size()); + + transform(AttributeSets, std::back_inserter(SetVec), + [&C](const AttrPtrIdxVecVecTy &V) { + return std::make_pair( + V.first, convertAttributeRefToAttributeSet(C, V.second)); + }); + + sort(SetVec, [](const std::pair &LHS, + const std::pair &RHS) { + return LHS.first < RHS.first; // All values are unique. + }); + + return AttributeList::get(C, SetVec); +} + +/// Removes out-of-chunk attributes from module. +static void extractAttributesFromModule(std::vector ChunksToKeep, + Module *Program) { + AttributeRemapper R(ChunksToKeep); + R.visit(Program); + + LLVMContext &C = Program->getContext(); + for (const auto &I : R.GlobalVariablesToRefine) + I.first->setAttributes(convertAttributeRefToAttributeSet(C, I.second)); + for (const auto &I : R.FunctionsToRefine) + I.first->setAttributes(convertAttributeRefVecToAttributeList(C, I.second)); + for (const auto &I : R.CallsToRefine) + I.first->setAttributes(convertAttributeRefVecToAttributeList(C, I.second)); +} + +/// Counts the amount of attributes. +static int countAttributes(Module *Program) { + AttributeCounter C; + + // TODO: Silence index with --quiet flag + outs() << "----------------------------\n"; + C.visit(Program); + outs() << "Number of attributes: " << C.AttributeCount << "\n"; + + return C.AttributeCount; +} + +void llvm::reduceAttributesDeltaPass(TestRunner &Test) { + outs() << "*** Reducing Attributes...\n"; + int AttributeCount = countAttributes(Test.getProgram()); + runDeltaPass(Test, AttributeCount, extractAttributesFromModule); +} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceAttributes.h b/llvm/tools/llvm-reduce/deltas/ReduceAttributes.h new file mode 100644 index 00000000000000..f8deb045560ff5 --- /dev/null +++ b/llvm/tools/llvm-reduce/deltas/ReduceAttributes.h @@ -0,0 +1,20 @@ +//===- ReduceAttributes.h - Specialized Delta Pass ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a function which calls the Generic Delta pass in order +// to reduce uninteresting attributes. +// +//===----------------------------------------------------------------------===// + +namespace llvm { + +class TestRunner; + +void reduceAttributesDeltaPass(TestRunner &Test); + +} // namespace llvm diff --git a/llvm/tools/llvm-reduce/deltas/ReduceOperandBundles.cpp b/llvm/tools/llvm-reduce/deltas/ReduceOperandBundles.cpp index 3f1cb374081311..77cb73837c8270 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceOperandBundles.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceOperandBundles.cpp @@ -56,10 +56,9 @@ class OperandBundleRemapper : public InstVisitor { OperandBundlesToKeepIndexes.reserve(Call.getNumOperandBundles()); // Enumerate every operand bundle on this call. - for_each(seq(0U, Call.getNumOperandBundles()), [&](unsigned BundleIndex) { + for (unsigned BundleIndex : seq(0U, Call.getNumOperandBundles())) if (O.shouldKeep()) // Should we keep this one? OperandBundlesToKeepIndexes.emplace_back(BundleIndex); - }); } }; @@ -67,8 +66,6 @@ struct OperandBundleCounter : public InstVisitor { /// How many features (in this case, operand bundles) did we count, total? int OperandBundeCount = 0; - OperandBundleCounter() {} - /// So far only CallBase sub-classes can have operand bundles. void visitCallBase(CallBase &Call) { // Just accumulate the total number of operand bundles. @@ -104,9 +101,8 @@ static void extractOperandBundesFromModule(std::vector ChunksToKeep, OperandBundleRemapper R(ChunksToKeep); R.visit(Program); - for_each(R.CallsToRefine, [](const auto &P) { - return maybeRewriteCallWithDifferentBundles(P.first, P.second); - }); + for (const auto &I : R.CallsToRefine) + maybeRewriteCallWithDifferentBundles(I.first, I.second); } /// Counts the amount of operand bundles. diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-reduce/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-reduce/BUILD.gn index efb8e40850c335..a8648d73ca0d14 100644 --- a/llvm/utils/gn/secondary/llvm/tools/llvm-reduce/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/tools/llvm-reduce/BUILD.gn @@ -12,6 +12,7 @@ executable("llvm-reduce") { "TestRunner.cpp", "deltas/Delta.cpp", "deltas/ReduceArguments.cpp", + "deltas/ReduceAttributes.cpp", "deltas/ReduceBasicBlocks.cpp", "deltas/ReduceFunctions.cpp", "deltas/ReduceGlobalVars.cpp",