diff --git a/clang-tools-extra/clangd/IncludeFixer.cpp b/clang-tools-extra/clangd/IncludeFixer.cpp index 7704ccb82c0f02..0fd8db0d116796 100644 --- a/clang-tools-extra/clangd/IncludeFixer.cpp +++ b/clang-tools-extra/clangd/IncludeFixer.cpp @@ -68,10 +68,10 @@ class VisitedContextCollector : public VisibleDeclConsumer { std::vector IncludeFixer::fix(DiagnosticsEngine::Level DiagLevel, const clang::Diagnostic &Info) const { switch (Info.getID()) { - case diag::err_incomplete_type: - case diag::err_incomplete_member_access: - case diag::err_incomplete_base_class: case diag::err_incomplete_nested_name_spec: + case diag::err_incomplete_base_class: + case diag::err_incomplete_member_access: + case diag::err_incomplete_type: // Incomplete type diagnostics should have a QualType argument for the // incomplete type. for (unsigned Idx = 0; Idx < Info.getNumArgs(); ++Idx) { diff --git a/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp b/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp index 8fd1fe385a1c8d..e80a135c0a999f 100644 --- a/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp +++ b/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp @@ -708,9 +708,11 @@ class Y : $base[[public ns::X]] {}; int main() { ns::X *x; x$access[[->]]f(); + auto& $type[[[]]a] = *x; } )cpp"); auto TU = TestTU::withCode(Test.code()); + TU.ExtraArgs.push_back("-std=c++17"); auto Index = buildIndexWithSymbol( {SymbolWithHeader{"ns::X", "unittest:///x.h", "\"x.h\""}}); TU.ExternalIndex = Index.get(); @@ -731,7 +733,13 @@ int main() { "member access into incomplete type 'ns::X'"), DiagName("incomplete_member_access"), WithFix(Fix(Test.range("insert"), "#include \"x.h\"\n", - "Add include \"x.h\" for symbol ns::X"))))); + "Add include \"x.h\" for symbol ns::X"))), + AllOf( + Diag(Test.range("type"), + "incomplete type 'ns::X' where a complete type is required"), + DiagName("incomplete_type"), + WithFix(Fix(Test.range("insert"), "#include \"x.h\"\n", + "Add include \"x.h\" for symbol ns::X"))))); } TEST(IncludeFixerTest, NoSuggestIncludeWhenNoDefinitionInHeader) { diff --git a/clang/include/clang/Driver/Job.h b/clang/include/clang/Driver/Job.h index 6173b9d314b4df..8a348c8048d066 100644 --- a/clang/include/clang/Driver/Job.h +++ b/clang/include/clang/Driver/Job.h @@ -122,6 +122,9 @@ class Command { /// The list of program arguments which are inputs. llvm::opt::ArgStringList InputFilenames; + /// The list of program arguments which are outputs. May be empty. + std::vector OutputFilenames; + /// Response file name, if this command is set to use one, or nullptr /// otherwise const char *ResponseFile = nullptr; @@ -158,8 +161,8 @@ class Command { Command(const Action &Source, const Tool &Creator, ResponseFileSupport ResponseSupport, const char *Executable, - const llvm::opt::ArgStringList &Arguments, - ArrayRef Inputs); + const llvm::opt::ArgStringList &Arguments, ArrayRef Inputs, + ArrayRef Outputs = None); // FIXME: This really shouldn't be copyable, but is currently copied in some // error handling in Driver::generateCompilationDiagnostics. Command(const Command &) = default; @@ -201,6 +204,14 @@ class Command { const llvm::opt::ArgStringList &getArguments() const { return Arguments; } + const llvm::opt::ArgStringList &getInputFilenames() const { + return InputFilenames; + } + + const std::vector &getOutputFilenames() const { + return OutputFilenames; + } + protected: /// Optionally print the filenames to be compiled void PrintFileNames() const; @@ -212,7 +223,7 @@ class CC1Command : public Command { CC1Command(const Action &Source, const Tool &Creator, ResponseFileSupport ResponseSupport, const char *Executable, const llvm::opt::ArgStringList &Arguments, - ArrayRef Inputs); + ArrayRef Inputs, ArrayRef Outputs = None); void Print(llvm::raw_ostream &OS, const char *Terminator, bool Quote, CrashReportInfo *CrashInfo = nullptr) const override; @@ -230,7 +241,7 @@ class FallbackCommand : public Command { FallbackCommand(const Action &Source_, const Tool &Creator_, ResponseFileSupport ResponseSupport, const char *Executable_, const llvm::opt::ArgStringList &Arguments_, - ArrayRef Inputs, + ArrayRef Inputs, ArrayRef Outputs, std::unique_ptr Fallback_); void Print(llvm::raw_ostream &OS, const char *Terminator, bool Quote, @@ -250,7 +261,8 @@ class ForceSuccessCommand : public Command { ResponseFileSupport ResponseSupport, const char *Executable_, const llvm::opt::ArgStringList &Arguments_, - ArrayRef Inputs); + ArrayRef Inputs, + ArrayRef Outputs = None); void Print(llvm::raw_ostream &OS, const char *Terminator, bool Quote, CrashReportInfo *CrashInfo = nullptr) const override; diff --git a/clang/lib/Driver/Job.cpp b/clang/lib/Driver/Job.cpp index 4808a9f4628d5e..de2c2350f8d1d2 100644 --- a/clang/lib/Driver/Job.cpp +++ b/clang/lib/Driver/Job.cpp @@ -38,12 +38,15 @@ using namespace driver; Command::Command(const Action &Source, const Tool &Creator, ResponseFileSupport ResponseSupport, const char *Executable, const llvm::opt::ArgStringList &Arguments, - ArrayRef Inputs) + ArrayRef Inputs, ArrayRef Outputs) : Source(Source), Creator(Creator), ResponseSupport(ResponseSupport), Executable(Executable), Arguments(Arguments) { for (const auto &II : Inputs) if (II.isFilename()) InputFilenames.push_back(II.getFilename()); + for (const auto &II : Outputs) + if (II.isFilename()) + OutputFilenames.push_back(II.getFilename()); } /// Check if the compiler flag in question should be skipped when @@ -357,8 +360,9 @@ CC1Command::CC1Command(const Action &Source, const Tool &Creator, ResponseFileSupport ResponseSupport, const char *Executable, const llvm::opt::ArgStringList &Arguments, - ArrayRef Inputs) - : Command(Source, Creator, ResponseSupport, Executable, Arguments, Inputs) { + ArrayRef Inputs, ArrayRef Outputs) + : Command(Source, Creator, ResponseSupport, Executable, Arguments, Inputs, + Outputs) { InProcess = true; } @@ -415,9 +419,10 @@ FallbackCommand::FallbackCommand(const Action &Source_, const Tool &Creator_, const char *Executable_, const llvm::opt::ArgStringList &Arguments_, ArrayRef Inputs, + ArrayRef Outputs, std::unique_ptr Fallback_) : Command(Source_, Creator_, ResponseSupport, Executable_, Arguments_, - Inputs), + Inputs, Outputs), Fallback(std::move(Fallback_)) {} void FallbackCommand::Print(raw_ostream &OS, const char *Terminator, @@ -456,9 +461,10 @@ int FallbackCommand::Execute(ArrayRef> Redirects, ForceSuccessCommand::ForceSuccessCommand( const Action &Source_, const Tool &Creator_, ResponseFileSupport ResponseSupport, const char *Executable_, - const llvm::opt::ArgStringList &Arguments_, ArrayRef Inputs) + const llvm::opt::ArgStringList &Arguments_, ArrayRef Inputs, + ArrayRef Outputs) : Command(Source_, Creator_, ResponseSupport, Executable_, Arguments_, - Inputs) {} + Inputs, Outputs) {} void ForceSuccessCommand::Print(raw_ostream &OS, const char *Terminator, bool Quote, CrashReportInfo *CrashInfo) const { diff --git a/clang/lib/Driver/ToolChains/AIX.cpp b/clang/lib/Driver/ToolChains/AIX.cpp index 351b34e8bf90fe..b833ebaebf9257 100644 --- a/clang/lib/Driver/ToolChains/AIX.cpp +++ b/clang/lib/Driver/ToolChains/AIX.cpp @@ -71,7 +71,7 @@ void aix::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), - Exec, CmdArgs, Inputs)); + Exec, CmdArgs, Inputs, Output)); } void aix::Linker::ConstructJob(Compilation &C, const JobAction &JA, @@ -170,7 +170,7 @@ void aix::Linker::ConstructJob(Compilation &C, const JobAction &JA, const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath()); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), - Exec, CmdArgs, Inputs)); + Exec, CmdArgs, Inputs, Output)); } /// AIX - AIX tool chain which can call as(1) and ld(1) directly. diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index c6087156642b27..6781045886f20b 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -356,9 +356,9 @@ void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-shared"); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); - C.addCommand( - std::make_unique(JA, *this, ResponseFileSupport::AtFileCurCP(), - Args.MakeArgString(Linker), CmdArgs, Inputs)); + C.addCommand(std::make_unique( + JA, *this, ResponseFileSupport::AtFileCurCP(), Args.MakeArgString(Linker), + CmdArgs, Inputs, Output)); } void amdgpu::getAMDGPUTargetFeatures(const Driver &D, diff --git a/clang/lib/Driver/ToolChains/AVR.cpp b/clang/lib/Driver/ToolChains/AVR.cpp index 092bade53c6352..02b745c6a2056b 100644 --- a/clang/lib/Driver/ToolChains/AVR.cpp +++ b/clang/lib/Driver/ToolChains/AVR.cpp @@ -142,9 +142,9 @@ void AVR::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Args.MakeArgString(std::string("-m") + *FamilyName)); } - C.addCommand( - std::make_unique(JA, *this, ResponseFileSupport::AtFileCurCP(), - Args.MakeArgString(Linker), CmdArgs, Inputs)); + C.addCommand(std::make_unique( + JA, *this, ResponseFileSupport::AtFileCurCP(), Args.MakeArgString(Linker), + CmdArgs, Inputs, Output)); } llvm::Optional AVRToolChain::findAVRLibcInstallation() const { diff --git a/clang/lib/Driver/ToolChains/Ananas.cpp b/clang/lib/Driver/ToolChains/Ananas.cpp index a4141a57acccb1..e5e33fe24874eb 100644 --- a/clang/lib/Driver/ToolChains/Ananas.cpp +++ b/clang/lib/Driver/ToolChains/Ananas.cpp @@ -39,8 +39,9 @@ void ananas::Assembler::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(II.getFilename()); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); } void ananas::Linker::ConstructJob(Compilation &C, const JobAction &JA, @@ -124,8 +125,9 @@ void ananas::Linker::ConstructJob(Compilation &C, const JobAction &JA, } const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath()); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); } // Ananas - Ananas tool chain which can call as(1) and ld(1) directly. diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp b/clang/lib/Driver/ToolChains/BareMetal.cpp index 61839a9e31b0b5..6ed81c1e34a120 100644 --- a/clang/lib/Driver/ToolChains/BareMetal.cpp +++ b/clang/lib/Driver/ToolChains/BareMetal.cpp @@ -202,5 +202,5 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), Args.MakeArgString(TC.GetLinkerPath()), - CmdArgs, Inputs)); + CmdArgs, Inputs, Output)); } diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 630b39d1e769ed..b37dcfee1a3ec5 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4356,9 +4356,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, II.getInputArg().renderAsInput(Args, CmdArgs); } - C.addCommand( - std::make_unique(JA, *this, ResponseFileSupport::AtFileUTF8(), - D.getClangProgramPath(), CmdArgs, Inputs)); + C.addCommand(std::make_unique( + JA, *this, ResponseFileSupport::AtFileUTF8(), D.getClangProgramPath(), + CmdArgs, Inputs, Output)); return; } @@ -6314,20 +6314,23 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, getCLFallback()->GetCommand(C, JA, Output, Inputs, Args, LinkingOutput); C.addCommand(std::make_unique( JA, *this, ResponseFileSupport::AtFileUTF8(), Exec, CmdArgs, Inputs, - std::move(CLCommand))); + Output, std::move(CLCommand))); } else if (Args.hasArg(options::OPT__SLASH_fallback) && isa(JA)) { // In /fallback builds, run the main compilation even if the pch generation // fails, so that the main compilation's fallback to cl.exe runs. C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileUTF8(), Exec, CmdArgs, Inputs)); + JA, *this, ResponseFileSupport::AtFileUTF8(), Exec, CmdArgs, Inputs, + Output)); } else if (D.CC1Main && !D.CCGenDiagnostics) { // Invoke the CC1 directly in this process - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileUTF8(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileUTF8(), + Exec, CmdArgs, Inputs, Output)); } else { - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileUTF8(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileUTF8(), + Exec, CmdArgs, Inputs, Output)); } // Make the compile command echo its inputs for /showFilenames. @@ -7074,8 +7077,9 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Input.getFilename()); const char *Exec = getToolChain().getDriver().getClangProgramPath(); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileUTF8(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileUTF8(), + Exec, CmdArgs, Inputs, Output)); } // Begin OffloadBundler @@ -7161,7 +7165,7 @@ void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA, C.addCommand(std::make_unique( JA, *this, ResponseFileSupport::None(), TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())), - CmdArgs, None)); + CmdArgs, None, Output)); } void OffloadBundler::ConstructJobMultipleOutputs( @@ -7227,7 +7231,7 @@ void OffloadBundler::ConstructJobMultipleOutputs( C.addCommand(std::make_unique( JA, *this, ResponseFileSupport::None(), TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())), - CmdArgs, None)); + CmdArgs, None, Outputs)); } void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA, @@ -7257,5 +7261,5 @@ void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA, C.addCommand(std::make_unique( JA, *this, ResponseFileSupport::None(), Args.MakeArgString(getToolChain().GetProgramPath(getShortName())), - CmdArgs, Inputs)); + CmdArgs, Inputs, Output)); } diff --git a/clang/lib/Driver/ToolChains/CloudABI.cpp b/clang/lib/Driver/ToolChains/CloudABI.cpp index 8dcfd4951bbfe4..3efca8776260a6 100644 --- a/clang/lib/Driver/ToolChains/CloudABI.cpp +++ b/clang/lib/Driver/ToolChains/CloudABI.cpp @@ -92,8 +92,9 @@ void cloudabi::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtend.o"))); const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath()); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); } // CloudABI - CloudABI tool chain which can call ld(1) directly. diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index e3723e213c52f6..692d0600bad35e 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -951,12 +951,13 @@ void tools::SplitDebugInfo(const ToolChain &TC, Compilation &C, const Tool &T, InputInfo II(types::TY_Object, Output.getFilename(), Output.getFilename()); // First extract the dwo sections. - C.addCommand(std::make_unique( - JA, T, ResponseFileSupport::AtFileCurCP(), Exec, ExtractArgs, II)); + C.addCommand(std::make_unique(JA, T, + ResponseFileSupport::AtFileCurCP(), + Exec, ExtractArgs, II, Output)); // Then remove them from the original .o file. C.addCommand(std::make_unique( - JA, T, ResponseFileSupport::AtFileCurCP(), Exec, StripArgs, II)); + JA, T, ResponseFileSupport::AtFileCurCP(), Exec, StripArgs, II, Output)); } // Claim options we don't want to warn if they are unused. We do this for diff --git a/clang/lib/Driver/ToolChains/CrossWindows.cpp b/clang/lib/Driver/ToolChains/CrossWindows.cpp index 127a8a5f24cce5..28ad6c59c655cd 100644 --- a/clang/lib/Driver/ToolChains/CrossWindows.cpp +++ b/clang/lib/Driver/ToolChains/CrossWindows.cpp @@ -58,7 +58,7 @@ void tools::CrossWindows::Assembler::ConstructJob( Exec = Args.MakeArgString(Assembler); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), - Exec, CmdArgs, Inputs)); + Exec, CmdArgs, Inputs, Output)); } void tools::CrossWindows::Linker::ConstructJob( @@ -203,8 +203,9 @@ void tools::CrossWindows::Linker::ConstructJob( Exec = Args.MakeArgString(TC.GetLinkerPath()); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileUTF8(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileUTF8(), + Exec, CmdArgs, Inputs, Output)); } CrossWindowsToolChain::CrossWindowsToolChain(const Driver &D, diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index d7933534a5d3d1..217a0155a52d3c 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -427,7 +427,7 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA, JA, *this, ResponseFileSupport{ResponseFileSupport::RF_Full, llvm::sys::WEM_UTF8, "--options-file"}, - Exec, CmdArgs, Inputs)); + Exec, CmdArgs, Inputs, Output)); } static bool shouldIncludePTX(const ArgList &Args, const char *gpu_arch) { @@ -496,7 +496,7 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA, JA, *this, ResponseFileSupport{ResponseFileSupport::RF_Full, llvm::sys::WEM_UTF8, "--options-file"}, - Exec, CmdArgs, Inputs)); + Exec, CmdArgs, Inputs, Output)); } void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA, @@ -577,7 +577,7 @@ void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA, JA, *this, ResponseFileSupport{ResponseFileSupport::RF_Full, llvm::sys::WEM_UTF8, "--options-file"}, - Exec, CmdArgs, Inputs)); + Exec, CmdArgs, Inputs, Output)); } /// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary, diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index 8f2be2a343cc5e..0d9e471ec07096 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -149,7 +149,7 @@ void darwin::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), - Exec, CmdArgs, Inputs)); + Exec, CmdArgs, Inputs, Output)); } void darwin::MachOTool::anchor() {} @@ -522,7 +522,7 @@ void darwin::Linker::ConstructJob(Compilation &C, const JobAction &JA, Args.MakeArgString(getToolChain().GetProgramPath("touch")); CmdArgs.push_back(Output.getFilename()); C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::None(), Exec, CmdArgs, None)); + JA, *this, ResponseFileSupport::None(), Exec, CmdArgs, None, Output)); return; } @@ -695,7 +695,7 @@ void darwin::Linker::ConstructJob(Compilation &C, const JobAction &JA, const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath()); std::unique_ptr Cmd = std::make_unique( - JA, *this, ResponseSupport, Exec, CmdArgs, Inputs); + JA, *this, ResponseSupport, Exec, CmdArgs, Inputs, Output); Cmd->setInputFileList(std::move(InputFileList)); C.addCommand(std::move(Cmd)); } @@ -720,7 +720,7 @@ void darwin::Lipo::ConstructJob(Compilation &C, const JobAction &JA, const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("lipo")); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), - Exec, CmdArgs, Inputs)); + Exec, CmdArgs, Inputs, Output)); } void darwin::Dsymutil::ConstructJob(Compilation &C, const JobAction &JA, @@ -741,7 +741,7 @@ void darwin::Dsymutil::ConstructJob(Compilation &C, const JobAction &JA, const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("dsymutil")); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), - Exec, CmdArgs, Inputs)); + Exec, CmdArgs, Inputs, Output)); } void darwin::VerifyDebug::ConstructJob(Compilation &C, const JobAction &JA, @@ -765,7 +765,7 @@ void darwin::VerifyDebug::ConstructJob(Compilation &C, const JobAction &JA, const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("dwarfdump")); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), - Exec, CmdArgs, Inputs)); + Exec, CmdArgs, Inputs, Output)); } MachO::MachO(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) diff --git a/clang/lib/Driver/ToolChains/DragonFly.cpp b/clang/lib/Driver/ToolChains/DragonFly.cpp index 88dd0c899d8a89..08176e507eed07 100644 --- a/clang/lib/Driver/ToolChains/DragonFly.cpp +++ b/clang/lib/Driver/ToolChains/DragonFly.cpp @@ -45,8 +45,9 @@ void dragonfly::Assembler::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(II.getFilename()); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); } void dragonfly::Linker::ConstructJob(Compilation &C, const JobAction &JA, @@ -170,8 +171,9 @@ void dragonfly::Linker::ConstructJob(Compilation &C, const JobAction &JA, getToolChain().addProfileRTLibs(Args, CmdArgs); const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath()); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); } /// DragonFly - DragonFly tool chain which can call as(1) and ld(1) directly. diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 93401c66266304..f8633b988faa50 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -72,8 +72,9 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA, // TODO: Replace flang-new with flang once the new driver replaces the // throwaway driver const char *Exec = Args.MakeArgString(D.GetProgramPath("flang-new", TC)); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileUTF8(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileUTF8(), + Exec, CmdArgs, Inputs, Output)); } Flang::Flang(const ToolChain &TC) : Tool("flang-new", "flang frontend", TC) {} diff --git a/clang/lib/Driver/ToolChains/FreeBSD.cpp b/clang/lib/Driver/ToolChains/FreeBSD.cpp index 909ac5e992129c..5854defca48819 100644 --- a/clang/lib/Driver/ToolChains/FreeBSD.cpp +++ b/clang/lib/Driver/ToolChains/FreeBSD.cpp @@ -128,8 +128,9 @@ void freebsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(II.getFilename()); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); } void freebsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, @@ -359,8 +360,9 @@ void freebsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, ToolChain.addProfileRTLibs(Args, CmdArgs); const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath()); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); } /// FreeBSD - FreeBSD tool chain which can call as(1) and ld(1) directly. diff --git a/clang/lib/Driver/ToolChains/Fuchsia.cpp b/clang/lib/Driver/ToolChains/Fuchsia.cpp index 781179be39a364..79d3a8d554ded2 100644 --- a/clang/lib/Driver/ToolChains/Fuchsia.cpp +++ b/clang/lib/Driver/ToolChains/Fuchsia.cpp @@ -167,7 +167,7 @@ void fuchsia::Linker::ConstructJob(Compilation &C, const JobAction &JA, } C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), - Exec, CmdArgs, Inputs)); + Exec, CmdArgs, Inputs, Output)); } /// Fuchsia - Fuchsia tool chain which can call as(1) and ld(1) directly. diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index f3843685a522be..7d75e90c6092f5 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -171,8 +171,9 @@ void tools::gcc::Common::ConstructJob(Compilation &C, const JobAction &JA, GCCName = "gcc"; const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath(GCCName)); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); } void tools::gcc::Preprocessor::RenderExtraToolArgs( @@ -364,8 +365,9 @@ void tools::gnutools::StaticLibTool::ConstructJob( } const char *Exec = Args.MakeArgString(getToolChain().GetStaticLibToolPath()); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); } void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, @@ -662,8 +664,9 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, Args.AddAllArgs(CmdArgs, options::OPT_T); const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath()); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); } void tools::gnutools::Assembler::ConstructJob(Compilation &C, @@ -930,8 +933,9 @@ void tools::gnutools::Assembler::ConstructJob(Compilation &C, const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath(DefaultAssembler)); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); // Handle the debug info splitting at object creation time if we're // creating an object. diff --git a/clang/lib/Driver/ToolChains/HIP.cpp b/clang/lib/Driver/ToolChains/HIP.cpp index 4d1e0f9f2fdfc7..a06835eee0243e 100644 --- a/clang/lib/Driver/ToolChains/HIP.cpp +++ b/clang/lib/Driver/ToolChains/HIP.cpp @@ -98,7 +98,7 @@ void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA, LldArgs.push_back(Input.getFilename()); const char *Lld = Args.MakeArgString(getToolChain().GetProgramPath("lld")); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), - Lld, LldArgs, Inputs)); + Lld, LldArgs, Inputs, Output)); } // Construct a clang-offload-bundler command to bundle code objects for @@ -127,14 +127,16 @@ void AMDGCN::constructHIPFatbinCommand(Compilation &C, const JobAction &JA, BundlerArgs.push_back(Args.MakeArgString(BundlerTargetArg)); BundlerArgs.push_back(Args.MakeArgString(BundlerInputArg)); - auto BundlerOutputArg = Args.MakeArgString( - std::string("-outputs=").append(std::string(OutputFileName))); + std::string Output = std::string(OutputFileName); + auto BundlerOutputArg = + Args.MakeArgString(std::string("-outputs=").append(Output)); BundlerArgs.push_back(BundlerOutputArg); const char *Bundler = Args.MakeArgString( T.getToolChain().GetProgramPath("clang-offload-bundler")); - C.addCommand(std::make_unique(JA, T, ResponseFileSupport::None(), - Bundler, BundlerArgs, Inputs)); + C.addCommand(std::make_unique( + JA, T, ResponseFileSupport::None(), Bundler, BundlerArgs, Inputs, + InputInfo(&JA, Args.MakeArgString(Output)))); } /// Add Generated HIP Object File which has device images embedded into the @@ -205,7 +207,7 @@ void AMDGCN::Linker::constructGenerateObjFileFromHIPFatBinary( McinFile, "--filetype=obj"}; const char *Mc = Args.MakeArgString(TC.GetProgramPath("llvm-mc")); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), - Mc, McArgs, Inputs)); + Mc, McArgs, Inputs, Output)); } // For amdgcn the inputs of the linker job are device bitcode and output is diff --git a/clang/lib/Driver/ToolChains/Hexagon.cpp b/clang/lib/Driver/ToolChains/Hexagon.cpp index 775f6e1094fa66..fb54f73bcd4c89 100644 --- a/clang/lib/Driver/ToolChains/Hexagon.cpp +++ b/clang/lib/Driver/ToolChains/Hexagon.cpp @@ -189,8 +189,9 @@ void hexagon::Assembler::ConstructJob(Compilation &C, const JobAction &JA, } auto *Exec = Args.MakeArgString(HTC.GetProgramPath(AsName)); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); } void hexagon::Linker::RenderExtraToolArgs(const JobAction &JA, @@ -407,8 +408,9 @@ void hexagon::Linker::ConstructJob(Compilation &C, const JobAction &JA, LinkingOutput); const char *Exec = Args.MakeArgString(HTC.GetLinkerPath()); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); } // Hexagon tools end. diff --git a/clang/lib/Driver/ToolChains/InterfaceStubs.cpp b/clang/lib/Driver/ToolChains/InterfaceStubs.cpp index f7c11421e80945..57acf338df5c42 100644 --- a/clang/lib/Driver/ToolChains/InterfaceStubs.cpp +++ b/clang/lib/Driver/ToolChains/InterfaceStubs.cpp @@ -56,7 +56,7 @@ void Merger::ConstructJob(Compilation &C, const JobAction &JA, C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), Args.MakeArgString(Merger), CmdArgs, - Inputs)); + Inputs, Output)); } } // namespace ifstool } // namespace tools diff --git a/clang/lib/Driver/ToolChains/MSP430.cpp b/clang/lib/Driver/ToolChains/MSP430.cpp index 6d663e4909e596..f3ed9967a81a1d 100644 --- a/clang/lib/Driver/ToolChains/MSP430.cpp +++ b/clang/lib/Driver/ToolChains/MSP430.cpp @@ -312,7 +312,7 @@ void msp430::Linker::ConstructJob(Compilation &C, const JobAction &JA, Args.AddAllArgs(CmdArgs, options::OPT_T); - C.addCommand( - std::make_unique(JA, *this, ResponseFileSupport::AtFileCurCP(), - Args.MakeArgString(Linker), CmdArgs, Inputs)); + C.addCommand(std::make_unique( + JA, *this, ResponseFileSupport::AtFileCurCP(), Args.MakeArgString(Linker), + CmdArgs, Inputs, Output)); } diff --git a/clang/lib/Driver/ToolChains/MSVC.cpp b/clang/lib/Driver/ToolChains/MSVC.cpp index 7faccdff6beed0..ba2c7146b924eb 100644 --- a/clang/lib/Driver/ToolChains/MSVC.cpp +++ b/clang/lib/Driver/ToolChains/MSVC.cpp @@ -606,9 +606,9 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA, linkPath = TC.GetProgramPath(Linker.str().c_str()); } - auto LinkCmd = - std::make_unique(JA, *this, ResponseFileSupport::AtFileUTF16(), - Args.MakeArgString(linkPath), CmdArgs, Inputs); + auto LinkCmd = std::make_unique( + JA, *this, ResponseFileSupport::AtFileUTF16(), + Args.MakeArgString(linkPath), CmdArgs, Inputs, Output); if (!Environment.empty()) LinkCmd->setEnvironment(Environment); C.addCommand(std::move(LinkCmd)); @@ -748,9 +748,9 @@ std::unique_ptr visualstudio::Compiler::GetCommand( CmdArgs.push_back(Fo); std::string Exec = FindVisualStudioExecutable(getToolChain(), "cl.exe"); - return std::make_unique(JA, *this, - ResponseFileSupport::AtFileUTF16(), - Args.MakeArgString(Exec), CmdArgs, Inputs); + return std::make_unique( + JA, *this, ResponseFileSupport::AtFileUTF16(), Args.MakeArgString(Exec), + CmdArgs, Inputs, Output); } MSVCToolChain::MSVCToolChain(const Driver &D, const llvm::Triple &Triple, diff --git a/clang/lib/Driver/ToolChains/MinGW.cpp b/clang/lib/Driver/ToolChains/MinGW.cpp index 4267af60bf031e..49fef4298bfecd 100644 --- a/clang/lib/Driver/ToolChains/MinGW.cpp +++ b/clang/lib/Driver/ToolChains/MinGW.cpp @@ -51,7 +51,7 @@ void tools::MinGW::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), - Exec, CmdArgs, Inputs)); + Exec, CmdArgs, Inputs, Output)); if (Args.hasArg(options::OPT_gsplit_dwarf)) SplitDebugInfo(getToolChain(), C, *this, JA, Args, Output, @@ -167,9 +167,10 @@ void tools::MinGW::Linker::ConstructJob(Compilation &C, const JobAction &JA, // that lacks an extension. // GCC used to do this only when the compiler itself runs on windows, but // since GCC 8 it does the same when cross compiling as well. - if (!llvm::sys::path::has_extension(OutputFile)) + if (!llvm::sys::path::has_extension(OutputFile)) { CmdArgs.push_back(Args.MakeArgString(Twine(OutputFile) + ".exe")); - else + OutputFile = CmdArgs.back(); + } else CmdArgs.push_back(OutputFile); Args.AddAllArgs(CmdArgs, options::OPT_e); @@ -318,8 +319,9 @@ void tools::MinGW::Linker::ConstructJob(Compilation &C, const JobAction &JA, } } const char *Exec = Args.MakeArgString(TC.GetLinkerPath()); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileUTF8(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileUTF8(), + Exec, CmdArgs, Inputs, Output)); } // Simplified from Generic_GCC::GCCInstallationDetector::ScanLibDirForGCCTriple. diff --git a/clang/lib/Driver/ToolChains/Minix.cpp b/clang/lib/Driver/ToolChains/Minix.cpp index d0314795620ce1..44479a24ebe78b 100644 --- a/clang/lib/Driver/ToolChains/Minix.cpp +++ b/clang/lib/Driver/ToolChains/Minix.cpp @@ -36,8 +36,9 @@ void tools::minix::Assembler::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(II.getFilename()); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); } void tools::minix::Linker::ConstructJob(Compilation &C, const JobAction &JA, @@ -89,8 +90,9 @@ void tools::minix::Linker::ConstructJob(Compilation &C, const JobAction &JA, } const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath()); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); } /// Minix - Minix tool chain which can call as(1) and ld(1) directly. diff --git a/clang/lib/Driver/ToolChains/Myriad.cpp b/clang/lib/Driver/ToolChains/Myriad.cpp index 84fe4748b6faff..ab0df5d8f1683e 100644 --- a/clang/lib/Driver/ToolChains/Myriad.cpp +++ b/clang/lib/Driver/ToolChains/Myriad.cpp @@ -79,7 +79,7 @@ void tools::SHAVE::Compiler::ConstructJob(Compilation &C, const JobAction &JA, Args.MakeArgString(getToolChain().GetProgramPath("moviCompile")); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), Args.MakeArgString(Exec), CmdArgs, - Inputs)); + Inputs, Output)); } void tools::SHAVE::Assembler::ConstructJob(Compilation &C, const JobAction &JA, @@ -115,7 +115,7 @@ void tools::SHAVE::Assembler::ConstructJob(Compilation &C, const JobAction &JA, Args.MakeArgString(getToolChain().GetProgramPath("moviAsm")); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), Args.MakeArgString(Exec), CmdArgs, - Inputs)); + Inputs, Output)); } void tools::Myriad::Linker::ConstructJob(Compilation &C, const JobAction &JA, @@ -200,9 +200,9 @@ void tools::Myriad::Linker::ConstructJob(Compilation &C, const JobAction &JA, std::string Exec = Args.MakeArgString(TC.GetProgramPath("sparc-myriad-rtems-ld")); - C.addCommand( - std::make_unique(JA, *this, ResponseFileSupport::AtFileCurCP(), - Args.MakeArgString(Exec), CmdArgs, Inputs)); + C.addCommand(std::make_unique( + JA, *this, ResponseFileSupport::AtFileCurCP(), Args.MakeArgString(Exec), + CmdArgs, Inputs, Output)); } MyriadToolChain::MyriadToolChain(const Driver &D, const llvm::Triple &Triple, diff --git a/clang/lib/Driver/ToolChains/NaCl.cpp b/clang/lib/Driver/ToolChains/NaCl.cpp index 15a773675299a1..8a150c39475320 100644 --- a/clang/lib/Driver/ToolChains/NaCl.cpp +++ b/clang/lib/Driver/ToolChains/NaCl.cpp @@ -193,8 +193,9 @@ void nacltools::Linker::ConstructJob(Compilation &C, const JobAction &JA, } const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath()); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); } /// NaCl Toolchain diff --git a/clang/lib/Driver/ToolChains/NetBSD.cpp b/clang/lib/Driver/ToolChains/NetBSD.cpp index 253ee6ce0f7210..48bf061c6650d9 100644 --- a/clang/lib/Driver/ToolChains/NetBSD.cpp +++ b/clang/lib/Driver/ToolChains/NetBSD.cpp @@ -103,8 +103,9 @@ void netbsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(II.getFilename()); const char *Exec = Args.MakeArgString((getToolChain().GetProgramPath("as"))); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); } void netbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, @@ -338,8 +339,9 @@ void netbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, ToolChain.addProfileRTLibs(Args, CmdArgs); const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath()); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); } /// NetBSD - NetBSD tool chain which can call as(1) and ld(1) directly. diff --git a/clang/lib/Driver/ToolChains/OpenBSD.cpp b/clang/lib/Driver/ToolChains/OpenBSD.cpp index 5ca2fa0850e632..f155d74632f93c 100644 --- a/clang/lib/Driver/ToolChains/OpenBSD.cpp +++ b/clang/lib/Driver/ToolChains/OpenBSD.cpp @@ -82,8 +82,9 @@ void openbsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(II.getFilename()); const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); } void openbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, @@ -221,8 +222,9 @@ void openbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, } const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath()); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Exec, CmdArgs, Inputs, Output)); } SanitizerMask OpenBSD::getSupportedSanitizers() const { diff --git a/clang/lib/Driver/ToolChains/PS4CPU.cpp b/clang/lib/Driver/ToolChains/PS4CPU.cpp index 6dc81899cbaacb..fab1b2ac62dfdb 100644 --- a/clang/lib/Driver/ToolChains/PS4CPU.cpp +++ b/clang/lib/Driver/ToolChains/PS4CPU.cpp @@ -66,8 +66,9 @@ void tools::PS4cpu::Assemble::ConstructJob(Compilation &C, const JobAction &JA, const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("orbis-as")); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileUTF8(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileUTF8(), + Exec, CmdArgs, Inputs, Output)); } static void AddPS4SanitizerArgs(const ToolChain &TC, ArgStringList &CmdArgs) { @@ -152,8 +153,9 @@ void tools::PS4cpu::Link::ConstructJob(Compilation &C, const JobAction &JA, const char *Exec = Args.MakeArgString(ToolChain.GetProgramPath("orbis-ld")); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileUTF8(), Exec, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileUTF8(), + Exec, CmdArgs, Inputs, Output)); } toolchains::PS4CPU::PS4CPU(const Driver &D, const llvm::Triple &Triple, diff --git a/clang/lib/Driver/ToolChains/RISCVToolchain.cpp b/clang/lib/Driver/ToolChains/RISCVToolchain.cpp index cc912d94cb92f3..312c8b52c5e834 100644 --- a/clang/lib/Driver/ToolChains/RISCVToolchain.cpp +++ b/clang/lib/Driver/ToolChains/RISCVToolchain.cpp @@ -191,8 +191,8 @@ void RISCV::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); - C.addCommand( - std::make_unique(JA, *this, ResponseFileSupport::AtFileCurCP(), - Args.MakeArgString(Linker), CmdArgs, Inputs)); + C.addCommand(std::make_unique( + JA, *this, ResponseFileSupport::AtFileCurCP(), Args.MakeArgString(Linker), + CmdArgs, Inputs, Output)); } // RISCV tools end. diff --git a/clang/lib/Driver/ToolChains/Solaris.cpp b/clang/lib/Driver/ToolChains/Solaris.cpp index b8fdc87478bc66..4ed4d839ad106c 100644 --- a/clang/lib/Driver/ToolChains/Solaris.cpp +++ b/clang/lib/Driver/ToolChains/Solaris.cpp @@ -42,7 +42,7 @@ void solaris::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), - Exec, CmdArgs, Inputs)); + Exec, CmdArgs, Inputs, Output)); } void solaris::Linker::ConstructJob(Compilation &C, const JobAction &JA, @@ -152,7 +152,7 @@ void solaris::Linker::ConstructJob(Compilation &C, const JobAction &JA, const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath()); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), - Exec, CmdArgs, Inputs)); + Exec, CmdArgs, Inputs, Output)); } static StringRef getSolarisLibSuffix(const llvm::Triple &Triple) { diff --git a/clang/lib/Driver/ToolChains/WebAssembly.cpp b/clang/lib/Driver/ToolChains/WebAssembly.cpp index d953082470aab6..6b654886e7746c 100644 --- a/clang/lib/Driver/ToolChains/WebAssembly.cpp +++ b/clang/lib/Driver/ToolChains/WebAssembly.cpp @@ -114,8 +114,9 @@ void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::AtFileCurCP(), Linker, CmdArgs, Inputs)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::AtFileCurCP(), + Linker, CmdArgs, Inputs, Output)); // When optimizing, if wasm-opt is available, run it. if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { @@ -139,7 +140,7 @@ void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Output.getFilename()); C.addCommand(std::make_unique( JA, *this, ResponseFileSupport::AtFileCurCP(), WasmOpt, CmdArgs, - Inputs)); + Inputs, Output)); } } } diff --git a/clang/lib/Driver/ToolChains/XCore.cpp b/clang/lib/Driver/ToolChains/XCore.cpp index 5030c73c7d8251..5f94f83d36919d 100644 --- a/clang/lib/Driver/ToolChains/XCore.cpp +++ b/clang/lib/Driver/ToolChains/XCore.cpp @@ -53,7 +53,7 @@ void tools::XCore::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("xcc")); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), - Exec, CmdArgs, Inputs)); + Exec, CmdArgs, Inputs, Output)); } void tools::XCore::Linker::ConstructJob(Compilation &C, const JobAction &JA, @@ -82,7 +82,7 @@ void tools::XCore::Linker::ConstructJob(Compilation &C, const JobAction &JA, const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("xcc")); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), - Exec, CmdArgs, Inputs)); + Exec, CmdArgs, Inputs, Output)); } /// XCore tool chain diff --git a/clang/unittests/Driver/ToolChainTest.cpp b/clang/unittests/Driver/ToolChainTest.cpp index 67bf545b14e4b3..227f7c76b8a1ec 100644 --- a/clang/unittests/Driver/ToolChainTest.cpp +++ b/clang/unittests/Driver/ToolChainTest.cpp @@ -259,4 +259,34 @@ TEST(ToolChainTest, GetTargetAndMode) { EXPECT_STREQ(Res.DriverMode, "--driver-mode=cl"); EXPECT_FALSE(Res.TargetIsValid); } + +TEST(ToolChainTest, CommandOutput) { + IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions(); + + IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); + struct TestDiagnosticConsumer : public DiagnosticConsumer {}; + DiagnosticsEngine Diags(DiagID, &*DiagOpts, new TestDiagnosticConsumer); + IntrusiveRefCntPtr InMemoryFileSystem( + new llvm::vfs::InMemoryFileSystem); + + Driver CCDriver("/home/test/bin/clang", "arm-linux-gnueabi", Diags, + "clang LLVM compiler", InMemoryFileSystem); + CCDriver.setCheckInputsExist(false); + std::unique_ptr CC( + CCDriver.BuildCompilation({"/home/test/bin/clang", "foo.cpp"})); + const JobList &Jobs = CC->getJobs(); + + const auto &CmdCompile = Jobs.getJobs().front(); + const auto &InFile = CmdCompile->getInputFilenames().front(); + EXPECT_STREQ(InFile, "foo.cpp"); + auto ObjFile = CmdCompile->getOutputFilenames().front(); + EXPECT_TRUE(StringRef(ObjFile).endswith(".o")); + + const auto &CmdLink = Jobs.getJobs().back(); + const auto LinkInFile = CmdLink->getInputFilenames().front(); + EXPECT_EQ(ObjFile, LinkInFile); + auto ExeFile = CmdLink->getOutputFilenames().front(); + EXPECT_EQ("a.out", ExeFile); +} + } // end anonymous namespace. diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake index e57abea427530f..4e9b1f6c233223 100644 --- a/llvm/cmake/modules/AddLLVM.cmake +++ b/llvm/cmake/modules/AddLLVM.cmake @@ -1494,6 +1494,7 @@ def relpath(p):\n if os.path.splitdrive(p)[0] != os.path.splitdrive(base)[0]: return p\n if haslink(p) or haslink(base): return p\n return os.path.relpath(p, base)\n +if len(sys.argv) < 3: sys.exit(0)\n sys.stdout.write(';'.join(relpath(p) for p in sys.argv[2].split(';')))" ${basedir} ${pathlist_escaped} diff --git a/llvm/docs/CommandGuide/llvm-readobj.rst b/llvm/docs/CommandGuide/llvm-readobj.rst index 9b1b5ba92bc071..ba5511bb765a64 100644 --- a/llvm/docs/CommandGuide/llvm-readobj.rst +++ b/llvm/docs/CommandGuide/llvm-readobj.rst @@ -286,6 +286,10 @@ The following options are implemented only for the PE/COFF file format. Display the debug directory. +.. option:: --coff-tls-directory + + Display the TLS directory. + .. option:: --coff-directives Display the .drectve section. diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index 158257a5aa9a10..ac6090a30d2ff5 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1117,6 +1117,15 @@ class ScalarEvolution { const SCEV *S, const Loop *L, SmallPtrSetImpl &Preds); + /// Compute \p LHS - \p RHS and returns the result as an APInt if it is a + /// constant, and None if it isn't. + /// + /// This is intended to be a cheaper version of getMinusSCEV. We can be + /// frugal here since we just bail out of actually constructing and + /// canonicalizing an expression in the cases where the result isn't going + /// to be a constant. + Optional computeConstantDifference(const SCEV *LHS, const SCEV *RHS); + private: /// A CallbackVH to arrange for ScalarEvolution to be notified whenever a /// Value is deleted. @@ -1799,15 +1808,6 @@ class ScalarEvolution { bool splitBinaryAdd(const SCEV *Expr, const SCEV *&L, const SCEV *&R, SCEV::NoWrapFlags &Flags); - /// Compute \p LHS - \p RHS and returns the result as an APInt if it is a - /// constant, and None if it isn't. - /// - /// This is intended to be a cheaper version of getMinusSCEV. We can be - /// frugal here since we just bail out of actually constructing and - /// canonicalizing an expression in the cases where the result isn't going - /// to be a constant. - Optional computeConstantDifference(const SCEV *LHS, const SCEV *RHS); - /// Drop memoized information computed for S. void forgetMemoizedResults(const SCEV *S); diff --git a/llvm/include/llvm/BinaryFormat/COFF.h b/llvm/include/llvm/BinaryFormat/COFF.h index 1919d7f0dece96..716d649f7c51c5 100644 --- a/llvm/include/llvm/BinaryFormat/COFF.h +++ b/llvm/include/llvm/BinaryFormat/COFF.h @@ -311,6 +311,7 @@ enum SectionCharacteristics : uint32_t { IMAGE_SCN_ALIGN_2048BYTES = 0x00C00000, IMAGE_SCN_ALIGN_4096BYTES = 0x00D00000, IMAGE_SCN_ALIGN_8192BYTES = 0x00E00000, + IMAGE_SCN_ALIGN_MASK = 0x00F00000, IMAGE_SCN_LNK_NRELOC_OVFL = 0x01000000, IMAGE_SCN_MEM_DISCARDABLE = 0x02000000, IMAGE_SCN_MEM_NOT_CACHED = 0x04000000, diff --git a/llvm/include/llvm/Object/COFF.h b/llvm/include/llvm/Object/COFF.h index 8aef00a8809dcd..505aab8bff5b39 100644 --- a/llvm/include/llvm/Object/COFF.h +++ b/llvm/include/llvm/Object/COFF.h @@ -786,6 +786,8 @@ class COFFObjectFile : public ObjectFile { const coff_base_reloc_block_header *BaseRelocEnd; const debug_directory *DebugDirectoryBegin; const debug_directory *DebugDirectoryEnd; + const coff_tls_directory32 *TLSDirectory32; + const coff_tls_directory64 *TLSDirectory64; // Either coff_load_configuration32 or coff_load_configuration64. const void *LoadConfig = nullptr; @@ -805,6 +807,7 @@ class COFFObjectFile : public ObjectFile { Error initExportTablePtr(); Error initBaseRelocPtr(); Error initDebugDirectoryPtr(); + Error initTLSDirectoryPtr(); Error initLoadConfigPtr(); public: @@ -976,6 +979,13 @@ class COFFObjectFile : public ObjectFile { return make_range(debug_directory_begin(), debug_directory_end()); } + const coff_tls_directory32 *getTLSDirectory32() const { + return TLSDirectory32; + } + const coff_tls_directory64 *getTLSDirectory64() const { + return TLSDirectory64; + } + const dos_header *getDOSHeader() const { if (!PE32Header && !PE32PlusHeader) return nullptr; diff --git a/llvm/lib/Object/COFFObjectFile.cpp b/llvm/lib/Object/COFFObjectFile.cpp index c26d7721b3fe9c..cd10e67af239e8 100644 --- a/llvm/lib/Object/COFFObjectFile.cpp +++ b/llvm/lib/Object/COFFObjectFile.cpp @@ -649,6 +649,38 @@ Error COFFObjectFile::initDebugDirectoryPtr() { return Error::success(); } +Error COFFObjectFile::initTLSDirectoryPtr() { + // Get the RVA of the TLS directory. Do nothing if it does not exist. + const data_directory *DataEntry = getDataDirectory(COFF::TLS_TABLE); + if (!DataEntry) + return Error::success(); + + // Do nothing if the RVA is NULL. + if (DataEntry->RelativeVirtualAddress == 0) + return Error::success(); + + uint64_t DirSize = + is64() ? sizeof(coff_tls_directory64) : sizeof(coff_tls_directory32); + + // Check that the size is correct. + if (DataEntry->Size != DirSize) + return createStringError( + object_error::parse_failed, + "TLS Directory size (%u) is not the expected size (%u).", + static_cast(DataEntry->Size), DirSize); + + uintptr_t IntPtr = 0; + if (Error E = getRvaPtr(DataEntry->RelativeVirtualAddress, IntPtr)) + return E; + + if (is64()) + TLSDirectory64 = reinterpret_cast(IntPtr); + else + TLSDirectory32 = reinterpret_cast(IntPtr); + + return Error::success(); +} + Error COFFObjectFile::initLoadConfigPtr() { // Get the RVA of the debug directory. Do nothing if it does not exist. const data_directory *DataEntry = getDataDirectory(COFF::LOAD_CONFIG_TABLE); @@ -682,7 +714,8 @@ COFFObjectFile::COFFObjectFile(MemoryBufferRef Object) ImportDirectory(nullptr), DelayImportDirectory(nullptr), NumberOfDelayImportDirectory(0), ExportDirectory(nullptr), BaseRelocHeader(nullptr), BaseRelocEnd(nullptr), - DebugDirectoryBegin(nullptr), DebugDirectoryEnd(nullptr) {} + DebugDirectoryBegin(nullptr), DebugDirectoryEnd(nullptr), + TLSDirectory32(nullptr), TLSDirectory64(nullptr) {} Error COFFObjectFile::initialize() { // Check that we at least have enough room for a header. @@ -809,10 +842,14 @@ Error COFFObjectFile::initialize() { if (Error E = initBaseRelocPtr()) return E; - // Initialize the pointer to the export table. + // Initialize the pointer to the debug directory. if (Error E = initDebugDirectoryPtr()) return E; + // Initialize the pointer to the TLS directory. + if (Error E = initTLSDirectoryPtr()) + return E; + if (Error E = initLoadConfigPtr()) return E; diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index dc44980ce218c8..a355cbf30d315a 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -154,19 +154,6 @@ static unsigned AdrImmBits(unsigned Value) { return (hi19 << 5) | (lo2 << 29); } -static bool valueFitsIntoFixupKind(unsigned Kind, uint64_t Value) { - unsigned NumBits; - switch(Kind) { - case FK_Data_1: NumBits = 8; break; - case FK_Data_2: NumBits = 16; break; - case FK_Data_4: NumBits = 32; break; - case FK_Data_8: NumBits = 64; break; - default: return true; - } - return isUIntN(NumBits, Value) || - isIntN(NumBits, static_cast(Value)); -} - static uint64_t adjustFixupValue(const MCFixup &Fixup, const MCValue &Target, uint64_t Value, MCContext &Ctx, const Triple &TheTriple, bool IsResolved) { @@ -341,9 +328,6 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, const MCValue &Target, case FK_Data_2: case FK_Data_4: case FK_Data_8: - if (!valueFitsIntoFixupKind(Fixup.getTargetKind(), Value)) - Ctx.reportError(Fixup.getLoc(), "fixup value too large for data type!"); - LLVM_FALLTHROUGH; case FK_SecRel_2: case FK_SecRel_4: return Value; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index d1182015d581b8..822b9556eb4677 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -8028,13 +8028,6 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { assert(Op.getValueType().getVectorElementType() == MVT::i32 && "Custom lowering for non-i32 vectors hasn't been implemented."); - if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), - MemVT, *Load->getMemOperand())) { - SDValue Ops[2]; - std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG); - return DAG.getMergeValues(Ops, DL); - } - unsigned Alignment = Load->getAlignment(); unsigned AS = Load->getAddressSpace(); if (Subtarget->hasLDSMisalignedBug() && @@ -8146,6 +8139,14 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { return SplitVectorLoad(Op, DAG); } } + + if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), + MemVT, *Load->getMemOperand())) { + SDValue Ops[2]; + std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG); + return DAG.getMergeValues(Ops, DL); + } + return SDValue(); } @@ -8551,11 +8552,6 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { assert(VT.isVector() && Store->getValue().getValueType().getScalarType() == MVT::i32); - if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), - VT, *Store->getMemOperand())) { - return expandUnalignedStore(Store, DAG); - } - unsigned AS = Store->getAddressSpace(); if (Subtarget->hasLDSMisalignedBug() && AS == AMDGPUAS::FLAT_ADDRESS && @@ -8580,6 +8576,11 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { // v3 stores not supported on SI. if (NumElements == 3 && !Subtarget->hasDwordx3LoadStores()) return SplitVectorStore(Op, DAG); + + if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), + VT, *Store->getMemOperand())) + return expandUnalignedStore(Store, DAG); + return SDValue(); } else if (AS == AMDGPUAS::PRIVATE_ADDRESS) { switch (Subtarget->getMaxPrivateElementSize()) { @@ -8619,6 +8620,13 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { return SplitVectorStore(Op, DAG); } + if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), + VT, *Store->getMemOperand())) { + if (VT.isVector()) + return SplitVectorStore(Op, DAG); + return expandUnalignedStore(Store, DAG); + } + return SDValue(); } else { llvm_unreachable("unhandled address space"); diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 3451c238918116..4c263de673d671 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -92,6 +92,7 @@ class VOP2_Real : // copy relevant pseudo op flags let SubtargetPredicate = ps.SubtargetPredicate; + let OtherPredicates = ps.OtherPredicates; let AsmMatchConverter = ps.AsmMatchConverter; let AsmVariantName = ps.AsmVariantName; let Constraints = ps.Constraints; @@ -494,14 +495,14 @@ defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN, or>; defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_PAT_GEN, xor>; let mayRaiseFPException = 0 in { -let SubtargetPredicate = HasMadMacF32Insts in { +let OtherPredicates = [HasMadMacF32Insts] in { let Constraints = "$vdst = $src2", DisableEncoding="$src2", isConvertibleToThreeAddress = 1 in { defm V_MAC_F32 : VOP2Inst <"v_mac_f32", VOP_MAC_F32>; } def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32, []>; -} // End SubtargetPredicate = HasMadMacF32Insts +} // End OtherPredicates = [HasMadMacF32Insts] } // No patterns so that the scalar instructions are always selected. @@ -873,6 +874,7 @@ class Base_VOP2_DPP16 op, VOP2_DPP_Pseudo ps, VOP2_DPP { let AssemblerPredicate = HasDPP16; let SubtargetPredicate = HasDPP16; + let OtherPredicates = ps.OtherPredicates; } class VOP2_DPP16 op, VOP2_DPP_Pseudo ps, @@ -899,6 +901,7 @@ class VOP2_DPP8 op, VOP2_Pseudo ps, let AssemblerPredicate = HasDPP8; let SubtargetPredicate = HasDPP8; + let OtherPredicates = ps.OtherPredicates; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index edb2dc8881c7b9..52b81ad2164a62 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1701,27 +1701,8 @@ static Instruction *foldOrToXor(BinaryOperator &I, /// Return true if a constant shift amount is always less than the specified /// bit-width. If not, the shift could create poison in the narrower type. static bool canNarrowShiftAmt(Constant *C, unsigned BitWidth) { - if (auto *ScalarC = dyn_cast(C)) - return ScalarC->getZExtValue() < BitWidth; - - if (C->getType()->isVectorTy()) { - // Check each element of a constant vector. - unsigned NumElts = cast(C->getType())->getNumElements(); - for (unsigned i = 0; i != NumElts; ++i) { - Constant *Elt = C->getAggregateElement(i); - if (!Elt) - return false; - if (isa(Elt)) - continue; - auto *CI = dyn_cast(Elt); - if (!CI || CI->getZExtValue() >= BitWidth) - return false; - } - return true; - } - - // The constant is a constant expression or unknown. - return false; + APInt Threshold(C->getType()->getScalarSizeInBits(), BitWidth); + return match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold)); } /// Try to use narrower ops (sink zext ops) for an 'and' with binop operand and @@ -2071,22 +2052,22 @@ Instruction *InstCombinerImpl::matchBSwap(BinaryOperator &Or) { return LastInst; } -/// Transform UB-safe variants of bitwise rotate to the funnel shift intrinsic. -static Instruction *matchRotate(Instruction &Or) { +/// Match UB-safe variants of the funnel shift intrinsic. +static Instruction *matchFunnelShift(Instruction &Or) { // TODO: Can we reduce the code duplication between this and the related // rotate matching code under visitSelect and visitTrunc? unsigned Width = Or.getType()->getScalarSizeInBits(); - // First, find an or'd pair of opposite shifts with the same shifted operand: - // or (lshr ShVal, ShAmt0), (shl ShVal, ShAmt1) + // First, find an or'd pair of opposite shifts: + // or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1) BinaryOperator *Or0, *Or1; if (!match(Or.getOperand(0), m_BinOp(Or0)) || !match(Or.getOperand(1), m_BinOp(Or1))) return nullptr; - Value *ShVal, *ShAmt0, *ShAmt1; - if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal), m_Value(ShAmt0)))) || - !match(Or1, m_OneUse(m_LogicalShift(m_Specific(ShVal), m_Value(ShAmt1))))) + Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1; + if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) || + !match(Or1, m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1))))) return nullptr; BinaryOperator::BinaryOps ShiftOpcode0 = Or0->getOpcode(); @@ -2094,15 +2075,30 @@ static Instruction *matchRotate(Instruction &Or) { if (ShiftOpcode0 == ShiftOpcode1) return nullptr; - // Match the shift amount operands for a rotate pattern. This always matches - // a subtraction on the R operand. - auto matchShiftAmount = [](Value *L, Value *R, unsigned Width) -> Value * { + // Match the shift amount operands for a funnel shift pattern. This always + // matches a subtraction on the R operand. + auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * { // Check for constant shift amounts that sum to the bitwidth. - // TODO: Support non-uniform shift amounts. - const APInt *LC, *RC; - if (match(L, m_APIntAllowUndef(LC)) && match(R, m_APIntAllowUndef(RC))) - if (LC->ult(Width) && RC->ult(Width) && (*LC + *RC) == Width) - return ConstantInt::get(L->getType(), *LC); + const APInt *LI, *RI; + if (match(L, m_APIntAllowUndef(LI)) && match(R, m_APIntAllowUndef(RI))) + if (LI->ult(Width) && RI->ult(Width) && (*LI + *RI) == Width) + return ConstantInt::get(L->getType(), *LI); + + // TODO: Support undefs in non-uniform shift amounts. + Constant *LC, *RC; + if (match(L, m_Constant(LC)) && !LC->containsUndefElement() && + match(R, m_Constant(RC)) && !RC->containsUndefElement() && + match(L, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(Width, Width))) && + match(R, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(Width, Width)))) { + if (match(ConstantExpr::getAdd(LC, RC), m_SpecificInt(Width))) + return L; + } + + // For non-constant cases, the following patterns currently only work for + // rotation patterns. + // TODO: Add general funnel-shift compatible patterns. + if (ShVal0 != ShVal1) + return nullptr; // For non-constant cases we don't support non-pow2 shift masks. // TODO: Is it worth matching urem as well? @@ -2140,7 +2136,8 @@ static Instruction *matchRotate(Instruction &Or) { (SubIsOnLHS && ShiftOpcode1 == BinaryOperator::Shl); Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr; Function *F = Intrinsic::getDeclaration(Or.getModule(), IID, Or.getType()); - return IntrinsicInst::Create(F, { ShVal, ShVal, ShAmt }); + return IntrinsicInst::Create( + F, {IsFshl ? ShVal0 : ShVal1, IsFshl ? ShVal1 : ShVal0, ShAmt}); } /// Attempt to combine or(zext(x),shl(zext(y),bw/2) concat packing patterns. @@ -2593,8 +2590,8 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { if (Instruction *BSwap = matchBSwap(I)) return BSwap; - if (Instruction *Rotate = matchRotate(I)) - return Rotate; + if (Instruction *Funnel = matchFunnelShift(I)) + return Funnel; if (Instruction *Concat = matchOrConcat(I, Builder)) return replaceInstUsesWith(I, Concat); diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 537838e2bdc19f..93b9917b5972b1 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -59,6 +59,7 @@ #include "llvm/ADT/Hashing.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" @@ -80,6 +81,7 @@ #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/GlobalValue.h" @@ -5776,6 +5778,27 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, if (MSSA) MSSAU = std::make_unique(MSSA); + // Debug preservation - record all llvm.dbg.value from the loop as well as + // the SCEV of their variable location. Since salvageDebugInfo may change the + // DIExpression we need to store the original here as well (i.e. it needs to + // be in sync with the SCEV). + SmallVector< + std::tuple, + 32> + DbgValues; + for (auto &B : L->getBlocks()) { + for (auto &I : *B) { + if (DbgValueInst *D = dyn_cast(&I)) { + auto V = D->getVariableLocation(); + if (!V || !SE.isSCEVable(V->getType())) + continue; + auto DS = SE.getSCEV(V); + DbgValues.push_back( + std::make_tuple(D, V->getType(), DS, D->getExpression())); + } + } + } + // Run the main LSR transformation. Changed |= LSRInstance(L, IU, SE, DT, LI, TTI, AC, TLI, MSSAU.get()).getChanged(); @@ -5797,6 +5820,40 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get()); } } + // Debug preservation - go through all recorded llvm.dbg.value and for those + // that now have an undef variable location use the recorded SCEV to try and + // update it. Compare with SCEV of Phi-nodes of loop header to find a + // suitable update candidate. SCEV match with constant offset is allowed and + // will be compensated for in the DIExpression. + if (Changed) { + for (auto &D : DbgValues) { + auto DbgValue = std::get(D); + auto DbgValueType = std::get(D); + auto DbgValueSCEV = std::get(D); + auto DbgDIExpr = std::get(D); + if (!isa(DbgValue->getVariableLocation())) + continue; + for (PHINode &Phi : L->getHeader()->phis()) { + if (DbgValueType != Phi.getType()) + continue; + if (!SE.isSCEVable(Phi.getType())) + continue; + auto PhiSCEV = SE.getSCEV(&Phi); + if (Optional Offset = + SE.computeConstantDifference(DbgValueSCEV, PhiSCEV)) { + auto &Ctx = DbgValue->getContext(); + DbgValue->setOperand( + 0, MetadataAsValue::get(Ctx, ValueAsMetadata::get(&Phi))); + if (Offset.getValue().getSExtValue()) { + SmallVector Ops; + DIExpression::appendOffset(Ops, Offset.getValue().getSExtValue()); + DbgDIExpr = DIExpression::prependOpcodes(DbgDIExpr, Ops, true); + } + DbgValue->setOperand(2, MetadataAsValue::get(Ctx, DbgDIExpr)); + } + } + } + } return Changed; } diff --git a/llvm/test/CodeGen/AArch64/2s-complement-asm.ll b/llvm/test/CodeGen/AArch64/2s-complement-asm.ll new file mode 100644 index 00000000000000..cf646d13602042 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/2s-complement-asm.ll @@ -0,0 +1,9 @@ +; RUN: llc -mtriple=arm64-apple-ios %s -filetype=obj -o - | llvm-objdump --macho --section __DATA,__data - | FileCheck %s + +; CHECK: Contents of (__DATA,__data) section +; CHECK: 0000002a 59ed145d +@other = global i32 42 +@var = global i32 sub(i32 646102975, + i32 add (i32 trunc(i64 sub(i64 ptrtoint(i32* @var to i64), + i64 ptrtoint(i32* @other to i64)) to i32), + i32 3432360802)) diff --git a/llvm/test/CodeGen/AMDGPU/load-local-redundant-copies.ll b/llvm/test/CodeGen/AMDGPU/load-local-redundant-copies.ll new file mode 100644 index 00000000000000..2de1423e5eea5b --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/load-local-redundant-copies.ll @@ -0,0 +1,124 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s + +; Test that checks for redundant copies to temporary stack slot produced by +; expandUnalignedLoad. + +define amdgpu_vs void @test(<4 x i32> inreg %arg1, <6 x float> addrspace(3)* %arg2) { +; CHECK-LABEL: test: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_add_i32_e32 v3, vcc, 12, v0 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, 8, v0 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, 4, v0 +; CHECK-NEXT: s_mov_b32 m0, -1 +; CHECK-NEXT: ds_read_b32 v2, v1 +; CHECK-NEXT: ds_read_b32 v1, v4 +; CHECK-NEXT: ds_read_b32 v3, v3 +; CHECK-NEXT: ds_read_b32 v0, v0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: exp mrt0 off, off, off, off +; CHECK-NEXT: v_mov_b32_e32 v4, 0 +; CHECK-NEXT: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_FLOAT] idxen +; CHECK-NEXT: s_endpgm + call void @llvm.amdgcn.exp.f32(i32 immarg 0, i32 immarg 0, float undef, float undef, float undef, float undef, i1 immarg false, i1 immarg false) + %var1 = load <6 x float>, <6 x float> addrspace(3)* %arg2, align 4 + %var2 = shufflevector <6 x float> %var1, <6 x float> undef, <4 x i32> + call void @llvm.amdgcn.struct.tbuffer.store.v4f32(<4 x float> %var2, <4 x i32> %arg1, i32 0, i32 0, i32 0, i32 immarg 126, i32 immarg 0) + ret void +} + +define amdgpu_vs void @test_2(<4 x i32> inreg %arg1, i32 %arg2, i32 inreg %arg3, <8 x float> addrspace(3)* %arg4) { +; CHECK-LABEL: test_2: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_add_i32_e32 v5, vcc, 28, v1 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, 24, v1 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, 20, v1 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, 16, v1 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, 12, v1 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, 8, v1 +; CHECK-NEXT: v_add_i32_e32 v10, vcc, 4, v1 +; CHECK-NEXT: s_mov_b32 m0, -1 +; CHECK-NEXT: ds_read_b32 v4, v2 +; CHECK-NEXT: ds_read_b32 v3, v3 +; CHECK-NEXT: ds_read_b32 v2, v6 +; CHECK-NEXT: ds_read_b32 v9, v7 +; CHECK-NEXT: ds_read_b32 v8, v8 +; CHECK-NEXT: ds_read_b32 v7, v10 +; CHECK-NEXT: ds_read_b32 v6, v1 +; CHECK-NEXT: ds_read_b32 v5, v5 +; CHECK-NEXT: s_waitcnt lgkmcnt(1) +; CHECK-NEXT: tbuffer_store_format_xyzw v[6:9], v0, s[0:3], s4 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen glc slc +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: tbuffer_store_format_xyzw v[2:5], v0, s[0:3], s4 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen offset:16 glc slc +; CHECK-NEXT: s_endpgm + %load = load <8 x float>, <8 x float> addrspace(3)* %arg4, align 4 + %vec1 = shufflevector <8 x float> %load, <8 x float> undef, <4 x i32> + call void @llvm.amdgcn.struct.tbuffer.store.v4f32(<4 x float> %vec1, <4 x i32> %arg1, i32 %arg2, i32 0, i32 %arg3, i32 immarg 77, i32 immarg 3) + %vec2 = shufflevector <8 x float> %load, <8 x float> undef, <4 x i32> + call void @llvm.amdgcn.struct.tbuffer.store.v4f32(<4 x float> %vec2, <4 x i32> %arg1, i32 %arg2, i32 16, i32 %arg3, i32 immarg 77, i32 immarg 3) + ret void +} + +define amdgpu_vs void @test_3(i32 inreg %arg1, i32 inreg %arg2, <4 x i32> inreg %arg3, i32 %arg4, <6 x float> addrspace(3)* %arg5, <6 x float> addrspace(3)* %arg6) { +; CHECK-LABEL: test_3: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_mov_b32 s7, s5 +; CHECK-NEXT: s_mov_b32 s6, s4 +; CHECK-NEXT: s_mov_b32 s5, s3 +; CHECK-NEXT: s_mov_b32 s4, s2 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, 16, v1 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, 12, v1 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, 8, v1 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, 4, v1 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, 20, v1 +; CHECK-NEXT: v_mov_b32_e32 v9, s0 +; CHECK-NEXT: v_add_i32_e32 v10, vcc, 16, v2 +; CHECK-NEXT: v_add_i32_e32 v11, vcc, 12, v2 +; CHECK-NEXT: v_add_i32_e32 v12, vcc, 8, v2 +; CHECK-NEXT: s_mov_b32 m0, -1 +; CHECK-NEXT: ds_read_b32 v3, v1 +; CHECK-NEXT: ds_read_b32 v5, v4 +; CHECK-NEXT: ds_read_b32 v4, v7 +; CHECK-NEXT: ds_read_b32 v1, v8 +; CHECK-NEXT: ds_read_b32 v6, v6 +; CHECK-NEXT: ds_read_b32 v0, v0 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, 4, v2 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, 20, v2 +; CHECK-NEXT: s_waitcnt lgkmcnt(1) +; CHECK-NEXT: tbuffer_store_format_xyzw v[3:6], v9, s[4:7], s1 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen offset:264 glc slc +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: tbuffer_store_format_xy v[0:1], v9, s[4:7], s1 format:[BUF_DATA_FORMAT_INVALID,BUF_NUM_FORMAT_UINT] idxen offset:280 glc slc +; CHECK-NEXT: s_waitcnt expcnt(0) +; CHECK-NEXT: ds_read_b32 v0, v2 +; CHECK-NEXT: ds_read_b32 v2, v12 +; CHECK-NEXT: ds_read_b32 v1, v7 +; CHECK-NEXT: ds_read_b32 v5, v8 +; CHECK-NEXT: ds_read_b32 v3, v11 +; CHECK-NEXT: ds_read_b32 v4, v10 +; CHECK-NEXT: s_waitcnt lgkmcnt(5) +; CHECK-NEXT: exp mrt0 off, off, off, off +; CHECK-NEXT: s_waitcnt lgkmcnt(1) +; CHECK-NEXT: tbuffer_store_format_xyzw v[0:3], v9, s[4:7], s1 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen offset:240 glc slc +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: tbuffer_store_format_xy v[4:5], v9, s[4:7], s1 format:[BUF_DATA_FORMAT_INVALID,BUF_NUM_FORMAT_UINT] idxen offset:256 glc slc +; CHECK-NEXT: s_endpgm + %load1 = load <6 x float>, <6 x float> addrspace(3)* %arg5, align 4 + %vec11 = shufflevector <6 x float> %load1, <6 x float> undef, <4 x i32> + call void @llvm.amdgcn.struct.tbuffer.store.v4f32(<4 x float> %vec11, <4 x i32> %arg3, i32 %arg1, i32 264, i32 %arg2, i32 immarg 77, i32 immarg 3) + %vec12 = shufflevector <6 x float> %load1, <6 x float> undef, <2 x i32> + call void @llvm.amdgcn.struct.tbuffer.store.v2f32(<2 x float> %vec12, <4 x i32> %arg3, i32 %arg1, i32 280, i32 %arg2, i32 immarg 64, i32 immarg 3) + + call void @llvm.amdgcn.exp.f32(i32 immarg 0, i32 immarg 0, float undef, float undef, float undef, float undef, i1 immarg false, i1 immarg false) + + %load2 = load <6 x float>, <6 x float> addrspace(3)* %arg6, align 4 + %vec21 = shufflevector <6 x float> %load2, <6 x float> undef, <4 x i32> + call void @llvm.amdgcn.struct.tbuffer.store.v4f32(<4 x float> %vec21, <4 x i32> %arg3, i32 %arg1, i32 240, i32 %arg2, i32 immarg 77, i32 immarg 3) + %vec22 = shufflevector <6 x float> %load2, <6 x float> undef, <2 x i32> + call void @llvm.amdgcn.struct.tbuffer.store.v2f32(<2 x float> %vec22, <4 x i32> %arg3, i32 %arg1, i32 256, i32 %arg2, i32 immarg 64, i32 immarg 3) + + ret void +} + +declare void @llvm.amdgcn.struct.tbuffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32 immarg, i32 immarg) +declare void @llvm.amdgcn.struct.tbuffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32, i32 immarg, i32 immarg) +declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) diff --git a/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll b/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll index 08aecdac5b794c..e8f37a370666c9 100644 --- a/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll +++ b/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll @@ -33,7 +33,7 @@ ; ASM: popl %ebx ; ASM: [[EPILOGUE]]: # %return ; ASM: retl $8 -; ASM: Ltmp10: +; ASM: Ltmp11: ; ASM: .cv_fpo_endproc ; Note how RvaStart advances 7 bytes to skip the shrink-wrapped portion. diff --git a/llvm/test/MC/AMDGPU/gfx1030_err.s b/llvm/test/MC/AMDGPU/gfx1030_err.s index 246548f1668398..c6af1736371aee 100644 --- a/llvm/test/MC/AMDGPU/gfx1030_err.s +++ b/llvm/test/MC/AMDGPU/gfx1030_err.s @@ -26,7 +26,7 @@ s_getreg_b32 s2, hwreg(HW_REG_XNACK_MASK) // GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: specified hardware register is not supported on this GPU v_mac_f32 v0, v1, v2 -// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mad_f32 v0, v1, v2, v3 // GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU diff --git a/llvm/test/Transforms/IndVarSimplify/eliminate-comparison.ll b/llvm/test/Transforms/IndVarSimplify/eliminate-comparison.ll index 085d9ee20e1c0c..00e655942c3635 100644 --- a/llvm/test/Transforms/IndVarSimplify/eliminate-comparison.ll +++ b/llvm/test/Transforms/IndVarSimplify/eliminate-comparison.ll @@ -942,4 +942,111 @@ leave: ret void } +declare i1 @cond_func() + +define i32 @func_25(i32 %start) { +; CHECK-LABEL: @func_25( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[C1:%.*]] = icmp ne i32 [[IV]], 0 +; CHECK-NEXT: br i1 [[C1]], label [[CHECKED_1:%.*]], label [[FAIL:%.*]] +; CHECK: checked.1: +; CHECK-NEXT: [[C2:%.*]] = icmp ne i32 [[IV]], 0 +; CHECK-NEXT: br i1 [[C2]], label [[CHECKED_2:%.*]], label [[FAIL]] +; CHECK: checked.2: +; CHECK-NEXT: [[C3:%.*]] = icmp ne i32 [[IV]], 0 +; CHECK-NEXT: br i1 [[C3]], label [[BACKEDGE]], label [[FAIL]] +; CHECK: backedge: +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 758394 +; CHECK-NEXT: [[LOOP_COND:%.*]] = call i1 @cond_func() +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: fail: +; CHECK-NEXT: unreachable +; CHECK: exit: +; CHECK-NEXT: [[IV_LCSSA1:%.*]] = phi i32 [ [[IV]], [[BACKEDGE]] ] +; CHECK-NEXT: ret i32 [[IV_LCSSA1]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [%start, %entry], [%iv.next, %backedge] + %c1 = icmp ne i32 %iv, 0 + br i1 %c1, label %checked.1, label %fail + +checked.1: + %c2 = icmp ne i32 %iv, 0 + br i1 %c2, label %checked.2, label %fail + +checked.2: + %c3 = icmp ne i32 %iv, 0 + br i1 %c3, label %backedge, label %fail + +backedge: + %iv.next = add i32 %iv, 758394 + %loop.cond = call i1 @cond_func() + br i1 %loop.cond, label %loop, label %exit + +fail: + unreachable + +exit: + ret i32 %iv +} + +define i32 @func_26(i32 %start) { +; CHECK-LABEL: @func_26( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[C1:%.*]] = icmp slt i32 [[IV]], 0 +; CHECK-NEXT: br i1 [[C1]], label [[CHECKED_1:%.*]], label [[FAIL:%.*]] +; CHECK: checked.1: +; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C2]], label [[CHECKED_2:%.*]], label [[FAIL]] +; CHECK: checked.2: +; CHECK-NEXT: [[C3:%.*]] = icmp slt i32 [[IV]], 2 +; CHECK-NEXT: br i1 [[C3]], label [[BACKEDGE]], label [[FAIL]] +; CHECK: backedge: +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 758394 +; CHECK-NEXT: [[LOOP_COND:%.*]] = call i1 @cond_func() +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: fail: +; CHECK-NEXT: unreachable +; CHECK: exit: +; CHECK-NEXT: [[IV_LCSSA1:%.*]] = phi i32 [ [[IV]], [[BACKEDGE]] ] +; CHECK-NEXT: ret i32 [[IV_LCSSA1]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [%start, %entry], [%iv.next, %backedge] + %c1 = icmp slt i32 %iv, 0 + br i1 %c1, label %checked.1, label %fail + +checked.1: + %c2 = icmp slt i32 %iv, 1 + br i1 %c2, label %checked.2, label %fail + +checked.2: + %c3 = icmp slt i32 %iv, 2 + br i1 %c3, label %backedge, label %fail + +backedge: + %iv.next = add i32 %iv, 758394 + %loop.cond = call i1 @cond_func() + br i1 %loop.cond, label %loop, label %exit + +fail: + unreachable + +exit: + ret i32 %iv +} + + !0 = !{i32 0, i32 2147483647} diff --git a/llvm/test/Transforms/InstCombine/funnel.ll b/llvm/test/Transforms/InstCombine/funnel.ll index fca73a4ffb884f..d56bb74119102b 100644 --- a/llvm/test/Transforms/InstCombine/funnel.ll +++ b/llvm/test/Transforms/InstCombine/funnel.ll @@ -3,16 +3,14 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" -; TODO: Canonicalize or(shl,lshr) by constant to funnel shift intrinsics. +; Canonicalize or(shl,lshr) by constant to funnel shift intrinsics. ; This should help cost modeling for vectorization, inlining, etc. ; If a target does not have a fshl instruction, the expansion will ; be exactly these same 3 basic ops (shl/lshr/or). define i32 @fshl_i32_constant(i32 %x, i32 %y) { ; CHECK-LABEL: @fshl_i32_constant( -; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], 11 -; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[Y:%.*]], 21 -; CHECK-NEXT: [[R:%.*]] = or i32 [[SHR]], [[SHL]] +; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 11) ; CHECK-NEXT: ret i32 [[R]] ; %shl = shl i32 %x, 11 @@ -23,9 +21,7 @@ define i32 @fshl_i32_constant(i32 %x, i32 %y) { define i42 @fshr_i42_constant(i42 %x, i42 %y) { ; CHECK-LABEL: @fshr_i42_constant( -; CHECK-NEXT: [[SHR:%.*]] = lshr i42 [[X:%.*]], 31 -; CHECK-NEXT: [[SHL:%.*]] = shl i42 [[Y:%.*]], 11 -; CHECK-NEXT: [[R:%.*]] = or i42 [[SHR]], [[SHL]] +; CHECK-NEXT: [[R:%.*]] = call i42 @llvm.fshl.i42(i42 [[Y:%.*]], i42 [[X:%.*]], i42 11) ; CHECK-NEXT: ret i42 [[R]] ; %shr = lshr i42 %x, 31 @@ -34,13 +30,11 @@ define i42 @fshr_i42_constant(i42 %x, i42 %y) { ret i42 %r } -; TODO: Vector types are allowed. +; Vector types are allowed. define <2 x i16> @fshl_v2i16_constant_splat(<2 x i16> %x, <2 x i16> %y) { ; CHECK-LABEL: @fshl_v2i16_constant_splat( -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i16> [[X:%.*]], -; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i16> [[Y:%.*]], -; CHECK-NEXT: [[R:%.*]] = or <2 x i16> [[SHL]], [[SHR]] +; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> ) ; CHECK-NEXT: ret <2 x i16> [[R]] ; %shl = shl <2 x i16> %x, @@ -51,9 +45,7 @@ define <2 x i16> @fshl_v2i16_constant_splat(<2 x i16> %x, <2 x i16> %y) { define <2 x i16> @fshl_v2i16_constant_splat_undef0(<2 x i16> %x, <2 x i16> %y) { ; CHECK-LABEL: @fshl_v2i16_constant_splat_undef0( -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i16> [[X:%.*]], -; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i16> [[Y:%.*]], -; CHECK-NEXT: [[R:%.*]] = or <2 x i16> [[SHL]], [[SHR]] +; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> ) ; CHECK-NEXT: ret <2 x i16> [[R]] ; %shl = shl <2 x i16> %x, @@ -64,9 +56,7 @@ define <2 x i16> @fshl_v2i16_constant_splat_undef0(<2 x i16> %x, <2 x i16> %y) { define <2 x i16> @fshl_v2i16_constant_splat_undef1(<2 x i16> %x, <2 x i16> %y) { ; CHECK-LABEL: @fshl_v2i16_constant_splat_undef1( -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i16> [[X:%.*]], -; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i16> [[Y:%.*]], -; CHECK-NEXT: [[R:%.*]] = or <2 x i16> [[SHL]], [[SHR]] +; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> ) ; CHECK-NEXT: ret <2 x i16> [[R]] ; %shl = shl <2 x i16> %x, @@ -75,13 +65,11 @@ define <2 x i16> @fshl_v2i16_constant_splat_undef1(<2 x i16> %x, <2 x i16> %y) { ret <2 x i16> %r } -; TODO: Non-power-of-2 vector types are allowed. +; Non-power-of-2 vector types are allowed. define <2 x i17> @fshr_v2i17_constant_splat(<2 x i17> %x, <2 x i17> %y) { ; CHECK-LABEL: @fshr_v2i17_constant_splat( -; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i17> [[X:%.*]], -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i17> [[Y:%.*]], -; CHECK-NEXT: [[R:%.*]] = or <2 x i17> [[SHR]], [[SHL]] +; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[Y:%.*]], <2 x i17> [[X:%.*]], <2 x i17> ) ; CHECK-NEXT: ret <2 x i17> [[R]] ; %shr = lshr <2 x i17> %x, @@ -92,9 +80,7 @@ define <2 x i17> @fshr_v2i17_constant_splat(<2 x i17> %x, <2 x i17> %y) { define <2 x i17> @fshr_v2i17_constant_splat_undef0(<2 x i17> %x, <2 x i17> %y) { ; CHECK-LABEL: @fshr_v2i17_constant_splat_undef0( -; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i17> [[X:%.*]], -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i17> [[Y:%.*]], -; CHECK-NEXT: [[R:%.*]] = or <2 x i17> [[SHR]], [[SHL]] +; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[Y:%.*]], <2 x i17> [[X:%.*]], <2 x i17> ) ; CHECK-NEXT: ret <2 x i17> [[R]] ; %shr = lshr <2 x i17> %x, @@ -105,9 +91,7 @@ define <2 x i17> @fshr_v2i17_constant_splat_undef0(<2 x i17> %x, <2 x i17> %y) { define <2 x i17> @fshr_v2i17_constant_splat_undef1(<2 x i17> %x, <2 x i17> %y) { ; CHECK-LABEL: @fshr_v2i17_constant_splat_undef1( -; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i17> [[X:%.*]], -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i17> [[Y:%.*]], -; CHECK-NEXT: [[R:%.*]] = or <2 x i17> [[SHR]], [[SHL]] +; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[Y:%.*]], <2 x i17> [[X:%.*]], <2 x i17> ) ; CHECK-NEXT: ret <2 x i17> [[R]] ; %shr = lshr <2 x i17> %x, @@ -116,13 +100,12 @@ define <2 x i17> @fshr_v2i17_constant_splat_undef1(<2 x i17> %x, <2 x i17> %y) { ret <2 x i17> %r } -; TODO: Allow arbitrary shift constants. +; Allow arbitrary shift constants. +; TODO: Support undef elements. define <2 x i32> @fshr_v2i32_constant_nonsplat(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @fshr_v2i32_constant_nonsplat( -; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> [[Y:%.*]], -; CHECK-NEXT: [[R:%.*]] = or <2 x i32> [[SHL]], [[SHR]] +; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> [[X:%.*]], <2 x i32> ) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %shr = lshr <2 x i32> %x, @@ -159,9 +142,7 @@ define <2 x i32> @fshr_v2i32_constant_nonsplat_undef1(<2 x i32> %x, <2 x i32> %y define <2 x i36> @fshl_v2i36_constant_nonsplat(<2 x i36> %x, <2 x i36> %y) { ; CHECK-LABEL: @fshl_v2i36_constant_nonsplat( -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i36> [[X:%.*]], -; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i36> [[Y:%.*]], -; CHECK-NEXT: [[R:%.*]] = or <2 x i36> [[SHL]], [[SHR]] +; CHECK-NEXT: [[R:%.*]] = call <2 x i36> @llvm.fshl.v2i36(<2 x i36> [[X:%.*]], <2 x i36> [[Y:%.*]], <2 x i36> ) ; CHECK-NEXT: ret <2 x i36> [[R]] ; %shl = shl <2 x i36> %x, diff --git a/llvm/test/Transforms/InstCombine/rotate.ll b/llvm/test/Transforms/InstCombine/rotate.ll index d08fe07784224f..667b5f087c8b53 100644 --- a/llvm/test/Transforms/InstCombine/rotate.ll +++ b/llvm/test/Transforms/InstCombine/rotate.ll @@ -122,13 +122,12 @@ define <2 x i17> @rotr_v2i17_constant_splat_undef1(<2 x i17> %x) { ret <2 x i17> %r } -; TODO: Allow arbitrary shift constants. +; Allow arbitrary shift constants. +; TODO: Support undef elements. define <2 x i32> @rotr_v2i32_constant_nonsplat(<2 x i32> %x) { ; CHECK-LABEL: @rotr_v2i32_constant_nonsplat( -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i32> [[X]], -; CHECK-NEXT: [[R:%.*]] = or <2 x i32> [[SHL]], [[SHR]] +; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[X]], <2 x i32> ) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %shl = shl <2 x i32> %x, @@ -165,9 +164,7 @@ define <2 x i32> @rotr_v2i32_constant_nonsplat_undef1(<2 x i32> %x) { define <2 x i36> @rotl_v2i36_constant_nonsplat(<2 x i36> %x) { ; CHECK-LABEL: @rotl_v2i36_constant_nonsplat( -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i36> [[X:%.*]], -; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i36> [[X]], -; CHECK-NEXT: [[R:%.*]] = or <2 x i36> [[SHL]], [[SHR]] +; CHECK-NEXT: [[R:%.*]] = call <2 x i36> @llvm.fshl.v2i36(<2 x i36> [[X:%.*]], <2 x i36> [[X]], <2 x i36> ) ; CHECK-NEXT: ret <2 x i36> [[R]] ; %shl = shl <2 x i36> %x, diff --git a/llvm/test/Transforms/LoopStrengthReduce/dbg-preserve-0.ll b/llvm/test/Transforms/LoopStrengthReduce/dbg-preserve-0.ll new file mode 100644 index 00000000000000..71031aabb95b74 --- /dev/null +++ b/llvm/test/Transforms/LoopStrengthReduce/dbg-preserve-0.ll @@ -0,0 +1,74 @@ +; RUN: opt < %s -loop-reduce -S | FileCheck %s + +; Test that LSR preserves debug-info for induction variables. + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +define dso_local void @foo(i8* nocapture %p) local_unnamed_addr !dbg !7 { +; CHECK-LABEL: @foo( +entry: + call void @llvm.dbg.value(metadata i8* %p, metadata !13, metadata !DIExpression()), !dbg !16 + call void @llvm.dbg.value(metadata i8 0, metadata !14, metadata !DIExpression()), !dbg !17 + br label %for.body, !dbg !18 + +for.cond.cleanup: ; preds = %for.body + ret void, !dbg !19 + +for.body: ; preds = %entry, %for.body +; CHECK-LABEL: for.body: + %i.06 = phi i8 [ 0, %entry ], [ %inc, %for.body ] + %p.addr.05 = phi i8* [ %p, %entry ], [ %add.ptr, %for.body ] + call void @llvm.dbg.value(metadata i8 %i.06, metadata !14, metadata !DIExpression()), !dbg !17 + call void @llvm.dbg.value(metadata i8* %p.addr.05, metadata !13, metadata !DIExpression()), !dbg !16 +; CHECK-NOT: call void @llvm.dbg.value(metadata i8* undef +; CHECK: call void @llvm.dbg.value(metadata i8* %lsr.iv, metadata ![[MID_p:[0-9]+]], metadata !DIExpression(DW_OP_constu, 3, DW_OP_minus, DW_OP_stack_value)), !dbg !16 + %add.ptr = getelementptr inbounds i8, i8* %p.addr.05, i64 3, !dbg !20 + call void @llvm.dbg.value(metadata i8* %add.ptr, metadata !13, metadata !DIExpression()), !dbg !16 +; CHECK-NOT: call void @llvm.dbg.value(metadata i8* undef +; CHECK: call void @llvm.dbg.value(metadata i8* %lsr.iv, metadata ![[MID_p]], metadata !DIExpression()), !dbg !16 + store i8 %i.06, i8* %add.ptr, align 1, !dbg !23, !tbaa !24 + %inc = add nuw nsw i8 %i.06, 1, !dbg !27 + call void @llvm.dbg.value(metadata i8 %inc, metadata !14, metadata !DIExpression()), !dbg !17 + %exitcond.not = icmp eq i8 %inc, 32, !dbg !28 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !dbg !18, !llvm.loop !29 +} + +declare void @llvm.dbg.value(metadata, metadata, metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "lsrdbg.c", directory: "/") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 12.0.0"} +!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 2, type: !8, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !12) +!8 = !DISubroutineType(types: !9) +!9 = !{null, !10} +!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64) +!11 = !DIBasicType(name: "unsigned char", size: 8, encoding: DW_ATE_unsigned_char) +!12 = !{!13, !14} +!13 = !DILocalVariable(name: "p", arg: 1, scope: !7, file: !1, line: 2, type: !10) +; CHECK: ![[MID_p]] = !DILocalVariable(name: "p", arg: 1, scope: !7, file: !1, line: 2, type: !10) +!14 = !DILocalVariable(name: "i", scope: !15, file: !1, line: 4, type: !11) +!15 = distinct !DILexicalBlock(scope: !7, file: !1, line: 4, column: 3) +!16 = !DILocation(line: 0, scope: !7) +!17 = !DILocation(line: 0, scope: !15) +!18 = !DILocation(line: 4, column: 3, scope: !15) +!19 = !DILocation(line: 8, column: 1, scope: !7) +!20 = !DILocation(line: 5, column: 7, scope: !21) +!21 = distinct !DILexicalBlock(scope: !22, file: !1, line: 4, column: 42) +!22 = distinct !DILexicalBlock(scope: !15, file: !1, line: 4, column: 3) +!23 = !DILocation(line: 6, column: 8, scope: !21) +!24 = !{!25, !25, i64 0} +!25 = !{!"omnipotent char", !26, i64 0} +!26 = !{!"Simple C/C++ TBAA"} +!27 = !DILocation(line: 4, column: 38, scope: !22) +!28 = !DILocation(line: 4, column: 31, scope: !22) +!29 = distinct !{!29, !18, !30, !31} +!30 = !DILocation(line: 7, column: 3, scope: !15) +!31 = !{!"llvm.loop.unroll.disable"} diff --git a/llvm/test/tools/llvm-readobj/COFF/tls-directory.test b/llvm/test/tools/llvm-readobj/COFF/tls-directory.test new file mode 100644 index 00000000000000..d553130e0a017d --- /dev/null +++ b/llvm/test/tools/llvm-readobj/COFF/tls-directory.test @@ -0,0 +1,162 @@ +## Tests for the --coff-tls-directory flag. + +## Test that the output of --coff-tls-directory works on x86. +## The binary created from this yaml definition is such that .rdata contains +## only the IMAGE_TLS_DIRECTORY structure and hence we should have that +## TlsTable.RelativeVirtualAddress == .rdata section VirtualAddress. +## Also note that the .rdata section VirtualSize == sizeof(coff_tls_directory32) == sizeof(IMAGE_TLS_DIRECTORY32) == 24 + +# RUN: yaml2obj %s --docnum=1 -o %t.32.exe -DTLSRVA=10000 -DTLSSIZE=24 +# RUN: llvm-readobj --coff-tls-directory %t.32.exe | FileCheck %s --check-prefix I386 + +# I386: Arch: i386 +# I386-NEXT: AddressSize: 32bit +# I386-NEXT: TLSDirectory { +# I386-NEXT: StartAddressOfRawData: 0x404000 +# I386-NEXT: EndAddressOfRawData: 0x404008 +# I386-NEXT: AddressOfIndex: 0x402000 +# I386-NEXT: AddressOfCallBacks: 0x0 +# I386-NEXT: SizeOfZeroFill: 0x0 +# I386-NEXT: Characteristics [ (0x300000) +# I386-NEXT: IMAGE_SCN_ALIGN_4BYTES (0x300000) +# I386-NEXT: ] +# I386-NEXT: } + + +## Test that the output of --coff-tls-directory errors on malformed input. +## On x86, the TLS directory should be 24 bytes. +## This test has a truncated TLS directory. + +# RUN: yaml2obj %s --docnum=1 -o %t.wrong-size.32.exe -DTLSRVA=10000 -DTLSSIZE=10 +# RUN: not llvm-readobj --coff-tls-directory %t.wrong-size.32.exe 2>&1 | FileCheck %s --check-prefix I386-WRONG-SIZE-ERR + +# I386-WRONG-SIZE-ERR: error: '{{.*}}': TLS Directory size (10) is not the expected size (24). + +--- !COFF +OptionalHeader: + AddressOfEntryPoint: 0 + ImageBase: 0 + SectionAlignment: 4096 + FileAlignment: 512 + MajorOperatingSystemVersion: 0 + MinorOperatingSystemVersion: 0 + MajorImageVersion: 0 + MinorImageVersion: 0 + MajorSubsystemVersion: 0 + MinorSubsystemVersion: 0 + Subsystem: IMAGE_SUBSYSTEM_WINDOWS_CUI + DLLCharacteristics: [] + SizeOfStackReserve: 0 + SizeOfStackCommit: 0 + SizeOfHeapReserve: 0 + SizeOfHeapCommit: 0 + TlsTable: + RelativeVirtualAddress: [[TLSRVA]] + Size: [[TLSSIZE]] +header: + Machine: IMAGE_FILE_MACHINE_I386 + Characteristics: [ IMAGE_FILE_EXECUTABLE_IMAGE, IMAGE_FILE_32BIT_MACHINE ] +sections: + - Name: .rdata + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ] + VirtualAddress: 10000 + VirtualSize: 24 + SectionData: '004040000840400000204000000000000000000000003000' +symbols: [] + + +## Test that the output of --coff-tls-directory works on x86_64. +## The binary created from this yaml definition is such that .rdata contains +## only the IMAGE_TLS_DIRECTORY structure and hence we should have that +## TlsTable.RelativeVirtualAddress == .rdata section VirtualAddress. +## Also note that the .rdata section VirtualSize == sizeof(coff_tls_directory64) == sizeof(IMAGE_TLS_DIRECTORY64) == 40 + +# RUN: yaml2obj %s --docnum=2 -o %t.64.exe -DTLSRVA=10000 -DTLSSIZE=40 +# RUN: llvm-readobj --coff-tls-directory %t.64.exe | FileCheck %s --check-prefix X86-64 + +# X86-64: Arch: x86_64 +# X86-64-NEXT: AddressSize: 64bit +# X86-64-NEXT: TLSDirectory { +# X86-64-NEXT: StartAddressOfRawData: 0x140004000 +# X86-64-NEXT: EndAddressOfRawData: 0x140004008 +# X86-64-NEXT: AddressOfIndex: 0x140002000 +# X86-64-NEXT: AddressOfCallBacks: 0x0 +# X86-64-NEXT: SizeOfZeroFill: 0x0 +# X86-64-NEXT: Characteristics [ (0x300000) +# X86-64-NEXT: IMAGE_SCN_ALIGN_4BYTES (0x300000) +# X86-64-NEXT: ] +# X86-64-NEXT: } + + +## Test that the output of --coff-tls-directory errors on malformed input. + +## On x86-64, the TLS directory should be 40 bytes. +## This test has an erroneously lengthened TLS directory. + +# RUN: yaml2obj %s --docnum=2 -o %t.wrong-size.64.exe -DTLSRVA=10000 -DTLSSIZE=80 +# RUN: not llvm-readobj --coff-tls-directory %t.wrong-size.64.exe 2>&1 | FileCheck %s --check-prefix X86-64-WRONG-SIZE-ERR + +# X86-64-WRONG-SIZE-ERR: error: '{{.*}}': TLS Directory size (80) is not the expected size (40). + + +## This test has a correct TLS Directory size but the RVA is invalid. + +# RUN: yaml2obj %s --docnum=2 -o %t.bad-tls-rva.exe -DTLSRVA=999999 -DTLSSIZE=40 +# RUN: not llvm-readobj --coff-tls-directory %t.bad-tls-rva.exe 2>&1 | FileCheck %s --check-prefix BAD-TLS-RVA-ERR + +# BAD-TLS-RVA-ERR: error: '{{.*}}': Invalid data was encountered while parsing the file + +--- !COFF +OptionalHeader: + AddressOfEntryPoint: 0 + ImageBase: 0 + SectionAlignment: 4096 + FileAlignment: 512 + MajorOperatingSystemVersion: 0 + MinorOperatingSystemVersion: 0 + MajorImageVersion: 0 + MinorImageVersion: 0 + MajorSubsystemVersion: 0 + MinorSubsystemVersion: 0 + Subsystem: IMAGE_SUBSYSTEM_WINDOWS_CUI + DLLCharacteristics: [] + SizeOfStackReserve: 0 + SizeOfStackCommit: 0 + SizeOfHeapReserve: 0 + SizeOfHeapCommit: 0 + TlsTable: + RelativeVirtualAddress: [[TLSRVA]] + Size: [[TLSSIZE]] +header: + Machine: IMAGE_FILE_MACHINE_AMD64 + Characteristics: [ IMAGE_FILE_EXECUTABLE_IMAGE, IMAGE_FILE_LARGE_ADDRESS_AWARE ] +sections: + - Name: .rdata + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ] + VirtualAddress: 10000 + VirtualSize: 40 + SectionData: '00400040010000000840004001000000002000400100000000000000000000000000000000003000' +symbols: [] + + +## Test that --coff-tls-directory doesn't output anything if there's no TLS directory. + +## Case 1: TlsTable.RelativeVirtualAddress/Size = 0. + +# RUN: yaml2obj %s --docnum=2 -o %t.no-tls1.exe -DTLSRVA=0 -DTLSSIZE=0 +# RUN: llvm-readobj --coff-tls-directory %t.no-tls1.exe | FileCheck %s --check-prefix NO-TLS + +## Case 2: There's no TlsTable listed in the COFF header. + +# RUN: yaml2obj %s --docnum=3 -o %t.no-tls2.exe +# RUN: llvm-readobj --coff-tls-directory %t.no-tls2.exe | FileCheck %s --check-prefix NO-TLS + +# NO-TLS: TLSDirectory { +# NO-TLS-NEXT: } + +--- !COFF +header: + Machine: IMAGE_FILE_MACHINE_AMD64 + Characteristics: [ IMAGE_FILE_EXECUTABLE_IMAGE, IMAGE_FILE_LARGE_ADDRESS_AWARE ] +sections: [] +symbols: [] diff --git a/llvm/test/tools/llvm-readobj/ELF/hash-table.test b/llvm/test/tools/llvm-readobj/ELF/hash-table.test index 1102d848f03e46..b8d44e3cdf7191 100644 --- a/llvm/test/tools/llvm-readobj/ELF/hash-table.test +++ b/llvm/test/tools/llvm-readobj/ELF/hash-table.test @@ -45,9 +45,13 @@ ProgramHeaders: ## Check we can dump the SHT_HASH section even when an object ## does not have the section header table. -# RUN: yaml2obj --docnum=2 %s -o %t.noshdr -# RUN: llvm-readobj --hash-table %t.noshdr | FileCheck %s --check-prefix=NOSHDR -# RUN: llvm-readelf --hash-table %t.noshdr | FileCheck %s --check-prefix=NOSHDR +# RUN: yaml2obj --docnum=2 -DNOHEADERS=true %s -o %t.noshdr +# RUN: llvm-readobj --hash-table %t.noshdr 2>&1 | \ +# RUN: FileCheck %s -DFILE=%t.noshdr --check-prefix=NOSHDR --implicit-check-not=warning: +# RUN: llvm-readelf --hash-table %t.noshdr 2>&1 | \ +# RUN: FileCheck %s -DFILE=%t.noshdr --check-prefix=NOSHDR --implicit-check-not=warning: + +# NOSHDR: warning: '[[FILE]]': string table was not found # NOSHDR: HashTable { # NOSHDR-NEXT: Num Buckets: 1 @@ -58,37 +62,57 @@ ProgramHeaders: --- !ELF FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_DYN -## We simulate no section header table by -## overriding the ELF header properties. - EShOff: 0x0 - EShNum: 0x0 + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN Sections: - Name: .hash Type: SHT_HASH Flags: [ SHF_ALLOC ] Bucket: [ 0 ] Chain: [ 1 ] + EntSize: [[ENTSIZE=4]] - Name: .dynamic Type: SHT_DYNAMIC Flags: [ SHF_ALLOC ] Entries: - - Tag: DT_HASH + - Tag: [[DYNTAG=DT_HASH]] Value: 0x0 - Tag: DT_NULL Value: 0x0 +SectionHeaderTable: + NoHeaders: [[NOHEADERS=false]] ProgramHeaders: - Type: PT_LOAD Sections: - Section: .hash - Section: .dynamic - Type: PT_DYNAMIC - VAddr: 0x1010 + VAddr: 0x10 Sections: - Section: .dynamic +## Document we don't report a warning when the value of the sh_entsize field of the SHT_HASH section is not 4. + +# RUN: yaml2obj --docnum=2 -DENTSIZE=0xff %s -o %t.ent.size +# RUN: llvm-readobj --hash-table %t.ent.size 2>&1 | \ +# RUN: FileCheck %s -DFILE=%t.ent.size --check-prefix=NOSHDR --implicit-check-not=warning: +# RUN: llvm-readelf --hash-table %t.ent.size 2>&1 | \ +# RUN: FileCheck %s -DFILE=%t.ent.size --check-prefix=NOSHDR --implicit-check-not=warning: + +## Document we need the DT_HASH dynamic tag to locate the hash table. + +# RUN: yaml2obj --docnum=2 -DDYNTAG=DT_NULL %s -o %t.no.dyntag +# RUN: llvm-readobj --hash-table %t.no.dyntag 2>&1 | \ +# RUN: FileCheck %s -DFILE=%t.no.dyntag --check-prefix=NODYNTAG --implicit-check-not=warning: +# RUN: llvm-readelf --hash-table %t.no.dyntag 2>&1 | \ +# RUN: FileCheck %s -DFILE=%t.no.dyntag --check-prefix=NODYNTAG --implicit-check-not=warning: + +# NODYNTAG: warning: '[[FILE]]': string table was not found + +# NODYNTAG: HashTable { +# NODYNTAG-NEXT: } + ## Each SHT_HASH section starts with two 32-bit fields: nbucket and nchain. ## Check we report an error when a DT_HASH value points to data that has size less than 8 bytes. diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index 46ed7414dbb31e..d57ea8ef94e788 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -1737,8 +1737,8 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, // the output. StringSet<> FoundDisasmSymbolSet; for (std::pair &SecSyms : AllSymbols) - stable_sort(SecSyms.second); - stable_sort(AbsoluteSymbols); + llvm::stable_sort(SecSyms.second); + llvm::stable_sort(AbsoluteSymbols); std::unique_ptr DICtx; LiveVariablePrinter LVP(*Ctx.getRegisterInfo(), *STI); diff --git a/llvm/tools/llvm-readobj/COFFDumper.cpp b/llvm/tools/llvm-readobj/COFFDumper.cpp index 22e27b3e5a29e1..f59bfd8b7cb6e3 100644 --- a/llvm/tools/llvm-readobj/COFFDumper.cpp +++ b/llvm/tools/llvm-readobj/COFFDumper.cpp @@ -89,6 +89,7 @@ class COFFDumper : public ObjDumper { void printCOFFDirectives() override; void printCOFFBaseReloc() override; void printCOFFDebugDirectory() override; + void printCOFFTLSDirectory() override; void printCOFFResources() override; void printCOFFLoadConfig() override; void printCodeViewDebugInfo() override; @@ -116,6 +117,8 @@ class COFFDumper : public ObjDumper { void printBaseOfDataField(const pe32plus_header *Hdr); template void printCOFFLoadConfig(const T *Conf, LoadConfigTables &Tables); + template + void printCOFFTLSDirectory(const coff_tls_directory *TlsTable); typedef void (*PrintExtraCB)(raw_ostream &, const uint8_t *); void printRVATable(uint64_t TableVA, uint64_t Count, uint64_t EntrySize, PrintExtraCB PrintExtra = 0); @@ -2018,3 +2021,27 @@ void llvm::dumpCodeViewMergedTypes(ScopedPrinter &Writer, Writer.flush(); } } + +void COFFDumper::printCOFFTLSDirectory() { + if (Obj->is64()) + printCOFFTLSDirectory(Obj->getTLSDirectory64()); + else + printCOFFTLSDirectory(Obj->getTLSDirectory32()); +} + +template +void COFFDumper::printCOFFTLSDirectory( + const coff_tls_directory *TlsTable) { + DictScope D(W, "TLSDirectory"); + if (!TlsTable) + return; + + W.printHex("StartAddressOfRawData", TlsTable->StartAddressOfRawData); + W.printHex("EndAddressOfRawData", TlsTable->EndAddressOfRawData); + W.printHex("AddressOfIndex", TlsTable->AddressOfIndex); + W.printHex("AddressOfCallBacks", TlsTable->AddressOfCallBacks); + W.printHex("SizeOfZeroFill", TlsTable->SizeOfZeroFill); + W.printFlags("Characteristics", TlsTable->Characteristics, + makeArrayRef(ImageSectionCharacteristics), + COFF::SectionCharacteristics(COFF::IMAGE_SCN_ALIGN_MASK)); +} diff --git a/llvm/tools/llvm-readobj/ObjDumper.h b/llvm/tools/llvm-readobj/ObjDumper.h index 9e45062ccda8d1..943299a121fc53 100644 --- a/llvm/tools/llvm-readobj/ObjDumper.h +++ b/llvm/tools/llvm-readobj/ObjDumper.h @@ -80,6 +80,7 @@ class ObjDumper { virtual void printCOFFDirectives() { } virtual void printCOFFBaseReloc() { } virtual void printCOFFDebugDirectory() { } + virtual void printCOFFTLSDirectory() {} virtual void printCOFFResources() {} virtual void printCOFFLoadConfig() { } virtual void printCodeViewDebugInfo() { } diff --git a/llvm/tools/llvm-readobj/llvm-readobj.cpp b/llvm/tools/llvm-readobj/llvm-readobj.cpp index 173ee3a7f140d5..1546ce7926a401 100644 --- a/llvm/tools/llvm-readobj/llvm-readobj.cpp +++ b/llvm/tools/llvm-readobj/llvm-readobj.cpp @@ -272,6 +272,10 @@ namespace opts { COFFDebugDirectory("coff-debug-directory", cl::desc("Display the PE/COFF debug directory")); + // --coff-tls-directory + cl::opt COFFTLSDirectory("coff-tls-directory", + cl::desc("Display the PE/COFF TLS directory")); + // --coff-resources cl::opt COFFResources("coff-resources", cl::desc("Display the PE/COFF .rsrc section")); @@ -533,6 +537,8 @@ static void dumpObject(const ObjectFile &Obj, ScopedPrinter &Writer, Dumper->printCOFFBaseReloc(); if (opts::COFFDebugDirectory) Dumper->printCOFFDebugDirectory(); + if (opts::COFFTLSDirectory) + Dumper->printCOFFTLSDirectory(); if (opts::COFFResources) Dumper->printCOFFResources(); if (opts::COFFLoadConfig) diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h index 2e566c941894f9..395db396dadca1 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -16,6 +16,9 @@ #include "llvm/ADT/SmallBitVector.h" namespace mlir { + +class BufferAssignmentTypeConverter; + namespace linalg { struct LinalgFusionOptions; @@ -45,6 +48,12 @@ void populateConvVectorizationPatterns( MLIRContext *context, SmallVectorImpl &patterns, ArrayRef tileSizes); +/// Populates the given list with patterns to convert Linalg operations on +/// tensors to buffers. +void populateConvertLinalgOnTensorsToBuffersPatterns( + MLIRContext *context, BufferAssignmentTypeConverter *converter, + OwningRewritePatternList *patterns); + /// Performs standalone tiling of a single LinalgOp by `tileSizes`. /// and permute the loop nest according to `interchangeVector` /// The permutation is expressed as a list of integers that specify @@ -246,6 +255,16 @@ Optional promoteSubViews(OpBuilder &b, LinalgOp op, LinalgPromotionOptions options, OperationFolder *folder = nullptr); +/// Creates a number of ranges equal to the number of dimensions in the `map`. +/// The returned ranges correspond to the loop ranges, in the proper order, for +/// which new loops will be created. +/// The function supports only maps that are invertible and have results of type +/// DimExpr or (DimExpr + DimExpr - SymbolExpr floordiv ConstExpr). +/// It expects a non-inverted, concatenated map and last values in +/// allViewSizes will be applied to the symbols in the map if it contains any. +SmallVector emitLoopRanges(OpBuilder &b, Location loc, AffineMap map, + ValueRange viewSizes); + /// Emit a suitable vector form for a Linalg op with fully static shape. void vectorizeLinalgOp(OpBuilder &builder, Operation *op); diff --git a/mlir/include/mlir/Support/LLVM.h b/mlir/include/mlir/Support/LLVM.h index 17e020442eb484..e8595ae29ed748 100644 --- a/mlir/include/mlir/Support/LLVM.h +++ b/mlir/include/mlir/Support/LLVM.h @@ -60,6 +60,8 @@ template class SmallVectorImpl; template class StringSet; +template +class StringSwitch; template class TinyPtrVector; template @@ -111,6 +113,8 @@ using llvm::SmallPtrSet; using llvm::SmallPtrSetImpl; using llvm::SmallVector; using llvm::SmallVectorImpl; +template +using StringSwitch = llvm::StringSwitch; using llvm::TinyPtrVector; template using TypeSwitch = llvm::TypeSwitch; diff --git a/mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h b/mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h index 93381054dd213d..f4b7cedeb0e120 100644 --- a/mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h +++ b/mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h @@ -27,7 +27,7 @@ struct GPUIndexIntrinsicOpLowering : public ConvertToLLVMPattern { unsigned indexBitwidth; static dimension dimensionToIndex(Op op) { - return llvm::StringSwitch(op.dimension()) + return StringSwitch(op.dimension()) .Case("x", X) .Case("y", Y) .Case("z", Z) diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypeSyntax.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypeSyntax.cpp index aa611d76a67abf..574d0aa8c37f0e 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypeSyntax.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypeSyntax.cpp @@ -440,7 +440,7 @@ static LLVMType parseTypeImpl(DialectAsmParser &parser, if (failed(parser.parseKeyword(&key))) return LLVMType(); - return llvm::StringSwitch>(key) + return StringSwitch>(key) .Case("void", [&] { return LLVMVoidType::get(ctx); }) .Case("half", [&] { return LLVMHalfType::get(ctx); }) .Case("bfloat", [&] { return LLVMBFloatType::get(ctx); }) diff --git a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp index 9e96c8cdc69194..b95469d8a95549 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp @@ -58,77 +58,6 @@ static SmallVector permuteIvs(ArrayRef ivs, : SmallVector(ivs.begin(), ivs.end()); } -/// Creates a number of ranges equal to the number of dimensions in the `map`. -/// The returned ranges correspond to the loop ranges, in the proper order, for -/// which new loops will be created. -/// The function supports only maps that are invertible and have results of type -/// DimExpr or (DimExpr + DimExpr - SymbolExpr floordiv ConstExpr). -/// It expects a non-inverted, concatenated map and last values in -/// allViewSizes will be applied to the symbols in the map if it contains any. -static SmallVector emitLoopRanges(OpBuilder &b, Location loc, - AffineMap map, - ValueRange viewSizes) { - unsigned numDims = map.getNumDims(), numRes = map.getNumResults(); - unsigned numSym = map.getNumSymbols(); - assert(viewSizes.size() == numRes + numSym && - "viewSizes must contain sizes of all views and values for symbols"); - SmallVector res(numDims); - for (unsigned idx = 0; idx < numRes; ++idx) { - auto result = map.getResult(idx); - if (auto d = result.dyn_cast()) { - if (res[d.getPosition()].offset) - continue; - res[d.getPosition()] = - Range{std_constant_index(0), viewSizes[idx], std_constant_index(1)}; - } - - // If the access pattern is of form (m, n)[s] -> (m + n - s floordiv 2), - // then the bounds are: - // (s floordiv 2) <= m <= (size(m) + s floordiv 2 - s + 1). - // where size(n) is applied to the symbol s. - // This is done statically now. - if (auto binOp = result.dyn_cast()) { - auto lhs = binOp.getLHS().dyn_cast(); - auto rhs = binOp.getRHS().dyn_cast(); - if (!lhs || !rhs || binOp.getKind() != AffineExprKind::Add || - lhs.getKind() != AffineExprKind::Add || - rhs.getKind() != mlir::AffineExprKind::Mul) - continue; - - auto m = lhs.getLHS().dyn_cast(); - auto n = lhs.getRHS().dyn_cast(); - auto fDiv = rhs.getLHS().dyn_cast(); - auto minusOne = rhs.getRHS().dyn_cast(); - if (!m || !n || !fDiv || !minusOne || - fDiv.getKind() != AffineExprKind::FloorDiv || - fDiv.getLHS().getKind() != AffineExprKind::SymbolId || - fDiv.getRHS().getKind() != AffineExprKind::Constant) - continue; - - auto s = fDiv.getLHS().dyn_cast(); - if (minusOne.getValue() != -1) - continue; - - int mPos = m.getPosition(); - AffineExpr one = getAffineConstantExpr(1, s.getContext()); - AffineExpr sizeOfM = getAffineSymbolExpr(numSym, s.getContext()); - // Construction of upper bound (size(m) + s floordiv 2 - s + 1). - AffineExpr upperOffsetExpr = sizeOfM + fDiv + one - s; - AffineMap fromMap = AffineMap::get(numDims, numSym + 1, fDiv); - AffineMap toMap = AffineMap::get(numDims, numSym + 1, upperOffsetExpr); - SmallVector values(viewSizes.begin(), - viewSizes.begin() + numDims); - values.insert(values.end(), viewSizes.begin() + numRes, viewSizes.end()); - values.push_back(viewSizes[mPos]); - // Construction of the lower bound (s floordiv 2). - Value from = applyMapToValues(b, loc, fromMap, values).front(); - Value to = applyMapToValues(b, loc, toMap, values).front(); - res[mPos] = Range{from, to, std_constant_index(1)}; - } - } - return res; -} - template static void inlineRegionAndEmitStore(OpType op, ArrayRef indexedValues, ArrayRef> indexing, @@ -708,6 +637,70 @@ static Optional linalgOpToLoopsImplSwitch(Operation *op, llvm_unreachable("Unexpected op in linalgOpToLoopsImpl"); } +SmallVector mlir::linalg::emitLoopRanges(OpBuilder &b, Location loc, + AffineMap map, + ValueRange viewSizes) { + unsigned numDims = map.getNumDims(), numRes = map.getNumResults(); + unsigned numSym = map.getNumSymbols(); + assert(viewSizes.size() == numRes + numSym && + "viewSizes must contain sizes of all views and values for symbols"); + SmallVector res(numDims); + for (unsigned idx = 0; idx < numRes; ++idx) { + auto result = map.getResult(idx); + if (auto d = result.dyn_cast()) { + if (res[d.getPosition()].offset) + continue; + res[d.getPosition()] = + Range{std_constant_index(0), viewSizes[idx], std_constant_index(1)}; + } + + // If the access pattern is of form (m, n)[s] -> (m + n - s floordiv 2), + // then the bounds are: + // (s floordiv 2) <= m <= (size(m) + s floordiv 2 - s + 1). + // where size(n) is applied to the symbol s. + // This is done statically now. + if (auto binOp = result.dyn_cast()) { + auto lhs = binOp.getLHS().dyn_cast(); + auto rhs = binOp.getRHS().dyn_cast(); + if (!lhs || !rhs || binOp.getKind() != AffineExprKind::Add || + lhs.getKind() != AffineExprKind::Add || + rhs.getKind() != mlir::AffineExprKind::Mul) + continue; + + auto m = lhs.getLHS().dyn_cast(); + auto n = lhs.getRHS().dyn_cast(); + auto fDiv = rhs.getLHS().dyn_cast(); + auto minusOne = rhs.getRHS().dyn_cast(); + if (!m || !n || !fDiv || !minusOne || + fDiv.getKind() != AffineExprKind::FloorDiv || + fDiv.getLHS().getKind() != AffineExprKind::SymbolId || + fDiv.getRHS().getKind() != AffineExprKind::Constant) + continue; + + auto s = fDiv.getLHS().dyn_cast(); + if (minusOne.getValue() != -1) + continue; + + int mPos = m.getPosition(); + AffineExpr one = getAffineConstantExpr(1, s.getContext()); + AffineExpr sizeOfM = getAffineSymbolExpr(numSym, s.getContext()); + // Construction of upper bound (size(m) + s floordiv 2 - s + 1). + AffineExpr upperOffsetExpr = sizeOfM + fDiv + one - s; + AffineMap fromMap = AffineMap::get(numDims, numSym + 1, fDiv); + AffineMap toMap = AffineMap::get(numDims, numSym + 1, upperOffsetExpr); + SmallVector values(viewSizes.begin(), + viewSizes.begin() + numDims); + values.insert(values.end(), viewSizes.begin() + numRes, viewSizes.end()); + values.push_back(viewSizes[mPos]); + // Construction of the lower bound (s floordiv 2). + Value from = applyMapToValues(b, loc, fromMap, values).front(); + Value to = applyMapToValues(b, loc, toMap, values).front(); + res[mPos] = Range{from, to, std_constant_index(1)}; + } + } + return res; +} + /// Emits a loop nest with the proper body for `op`. template Optional mlir::linalg::linalgLowerOpToLoops(OpBuilder &builder, diff --git a/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp b/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp index b714a1f6c64288..3282358f5f4149 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp @@ -14,14 +14,119 @@ #include "PassDetail.h" #include "mlir/Dialect/Linalg/IR/LinalgOps.h" #include "mlir/Dialect/Linalg/Passes.h" +#include "mlir/Dialect/Linalg/Transforms/Transforms.h" +#include "mlir/Dialect/Linalg/Utils/Utils.h" #include "mlir/IR/Function.h" #include "mlir/IR/Operation.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/BufferPlacement.h" -using namespace mlir; - namespace { + +using namespace ::mlir; +using namespace ::mlir::linalg; + +SmallVector +computeLoopRanges(Location loc, linalg::GenericOp linalgOp, OpBuilder *b) { + auto indexingMaps = llvm::to_vector<4>( + linalgOp.indexing_maps().getAsValueRange()); + auto inputIndexingMaps = + llvm::makeArrayRef(indexingMaps).take_front(linalgOp.getNumInputs()); + + mlir::edsc::ScopedContext scope(*b, loc); + return emitLoopRanges(scope.getBuilderRef(), loc, + concatAffineMaps(inputIndexingMaps), + getShape(*b, linalgOp)); +} + +Value maybeConvertToIndex(Location loc, Value val, OpBuilder *b) { + if (val.getType().isIndex()) + return val; + return b->create(loc, val, b->getIndexType()); +} + +LogicalResult allocateBuffersForResults(Location loc, + linalg::GenericOp linalgOp, + linalg::GenericOpAdaptor &adaptor, + SmallVectorImpl *resultBuffers, + OpBuilder *b) { + // Lazily compute loopRanges. + SmallVector loopRanges; + + // Allocate a buffer for every tensor result. + for (auto en : llvm::enumerate(linalgOp.getResultTypes())) { + size_t resultIndex = en.index(); + Type resultType = en.value(); + + auto tensorType = resultType.dyn_cast(); + if (tensorType == nullptr) { + linalgOp.emitOpError() + << "tensor to buffer conversion expects ranked tensor results"; + return failure(); + } + auto tensorShape = tensorType.getShape(); + auto memrefType = MemRefType::get(tensorShape, tensorType.getElementType()); + + // Allocate buffers for init tensors that are assumed to fold onto the first + // results. + // TODO: update this assumption because the reality is more complex + // under linalg on tensor based transformations. + bool foldedInitTensor = resultIndex < linalgOp.getNumInitTensors(); + if (foldedInitTensor) { + // Dealing with an init tensor requires distinguishing between 1-use + // and many-use cases which would create aliasing and WAR hazards. + Value initTensor = linalgOp.getInitTensor(resultIndex); + Value initBuffer = adaptor.init_tensors()[resultIndex]; + if (initTensor.hasOneUse()) { + resultBuffers->push_back(initBuffer); + continue; + } + SmallVector dynOperands; + for (auto dim : llvm::enumerate(tensorShape)) { + if (dim.value() == TensorType::kDynamicSize) { + dynOperands.push_back(b->create(loc, initTensor, dim.index())); + } + } + auto alloc = b->create(loc, memrefType, dynOperands); + b->create(loc, initBuffer, alloc); + resultBuffers->push_back(alloc); + continue; + } + + // Allocate buffers for statically-shaped results. + if (memrefType.hasStaticShape()) { + resultBuffers->push_back(b->create(loc, memrefType)); + continue; + } + + // Perform a naive shape inference for the dynamically-shaped results. + // Extract the required element out of the vector. + SmallVector dynOperands; + auto resultIndexingMap = linalgOp.getOutputIndexingMap(resultIndex); + for (auto shapeElement : llvm::enumerate(tensorType.getShape())) { + if (loopRanges.empty()) + loopRanges = computeLoopRanges(loc, linalgOp, b); + + if (shapeElement.value() != ShapedType::kDynamicSize) + continue; + + AffineExpr expr = resultIndexingMap.getResult(shapeElement.index()); + switch (expr.getKind()) { + case AffineExprKind::DimId: { + int64_t loopIndex = expr.cast().getPosition(); + Value size = maybeConvertToIndex(loc, loopRanges[loopIndex].size, b); + dynOperands.push_back(size); + break; + } + default: + return failure(); + } + } + resultBuffers->push_back(b->create(loc, memrefType, dynOperands)); + } + return success(); +} + /// A pattern to convert Generic Linalg operations which work on tensors to /// use buffers. A buffer is allocated using BufferAssignmentPlacer for /// each operation result. BufferPlacement pass should be later used to move @@ -34,10 +139,10 @@ class GenericOpConverter linalg::GenericOp>::BufferAssignmentOpConversionPattern; LogicalResult - matchAndRewrite(linalg::GenericOp op, ArrayRef operands, + matchAndRewrite(linalg::GenericOp linalgOp, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { - linalg::GenericOpAdaptor adaptor(operands, - op.getOperation()->getAttrDictionary()); + linalg::GenericOpAdaptor adaptor( + operands, linalgOp.getOperation()->getAttrDictionary()); // All inputs need to be turned into buffers first. Until then, bail out. if (llvm::any_of(adaptor.inputs(), @@ -50,93 +155,54 @@ class GenericOpConverter [](Value in) { return !in.getType().isa(); })) return failure(); - Location loc = op.getLoc(); - SmallVector newOutputBuffers; - newOutputBuffers.reserve(op.getNumOutputs()); - newOutputBuffers.append(adaptor.output_buffers().begin(), - adaptor.output_buffers().end()); - - // Update all types to memref types. - // Assume the init tensors fold onto the first results. - // TODO: update this assumption because the reality is more complex under - // linalg on tensor based transformations. - for (auto en : llvm::enumerate(op.getResultTypes())) { - auto type = en.value().cast(); - if (!type.hasStaticShape()) - return rewriter.notifyMatchFailure( - op, "dynamic shapes not currently supported"); - auto memrefType = MemRefType::get(type.getShape(), type.getElementType()); - bool foldedInitTensor = en.index() < op.getNumInitTensors(); - if (foldedInitTensor) { - // Dealing with an init tensor requires distinguishing between 1-use - // and many-use cases which would create aliasing and WAR hazards. - Value initTensor = op.getInitTensor(en.index()); - Value initBuffer = adaptor.init_tensors()[en.index()]; - if (initTensor.hasOneUse()) { - newOutputBuffers.push_back(initBuffer); - continue; - } - auto alloc = rewriter.create(loc, memrefType); - rewriter.create(loc, initBuffer, alloc); - newOutputBuffers.push_back(alloc); - } else { - auto alloc = rewriter.create(loc, memrefType); - newOutputBuffers.push_back(alloc); - } + Location loc = linalgOp.getLoc(); + SmallVector newOutputBuffers(adaptor.output_buffers().begin(), + adaptor.output_buffers().end()); + + if (failed(allocateBuffersForResults(loc, linalgOp, adaptor, + &newOutputBuffers, &rewriter))) { + linalgOp.emitOpError() + << "Failed to allocate buffers for tensor results."; + return failure(); } // Generate a new linalg operation that works on buffers. - auto linalgOp = rewriter.create( + auto newLinalgOp = rewriter.create( loc, - /*resultTensorTypes=*/ArrayRef{}, + /*resultTensorTypes=*/llvm::None, /*inputs=*/adaptor.inputs(), /*outputBuffers=*/newOutputBuffers, - /*initTensors=*/ValueRange{}, op.indexing_maps(), op.iterator_types(), - op.docAttr(), op.library_callAttr(), op.symbol_sourceAttr()); + /*initTensors=*/llvm::None, linalgOp.indexing_maps(), + linalgOp.iterator_types(), linalgOp.docAttr(), + linalgOp.library_callAttr(), linalgOp.symbol_sourceAttr()); // Create a new block in the region of the new Generic Op. - Block &oldBlock = op.getRegion().front(); - Region &newRegion = linalgOp.region(); + Block *oldBlock = linalgOp.getBody(); + Region &newRegion = newLinalgOp.region(); Block *newBlock = rewriter.createBlock(&newRegion, newRegion.begin(), - oldBlock.getArgumentTypes()); - - // Add the result arguments that do not come from init_tensors to the new - // block. - // TODO: update this assumption because the reality is more complex under - // linalg on tensor based transformations. - for (Value v : - ValueRange(newOutputBuffers).drop_front(adaptor.init_tensors().size())) + oldBlock->getArgumentTypes()); + + // Add the result arguments to the new block. + for (Value v : newOutputBuffers) newBlock->addArgument(v.getType().cast().getElementType()); // Clone the body of the old block to the new block. BlockAndValueMapping mapping; - for (unsigned i = 0; i < oldBlock.getNumArguments(); i++) - mapping.map(oldBlock.getArgument(i), newBlock->getArgument(i)); + mapping.map(oldBlock->getArguments(), newBlock->getArguments()); OpBuilder::InsertionGuard guard(rewriter); rewriter.setInsertionPointToEnd(newBlock); - for (auto &op : oldBlock.getOperations()) { + for (auto &op : oldBlock->getOperations()) { Operation *clonedOp = rewriter.clone(op, mapping); mapping.map(op.getResults(), clonedOp->getResults()); } // Replace the results of the old op with the new output buffers. - rewriter.replaceOp(op, newOutputBuffers); + rewriter.replaceOp(linalgOp, newOutputBuffers); return success(); } }; -/// Populate the given list with patterns to convert Linalg operations on -/// tensors to buffers. -static void populateConvertLinalgOnTensorsToBuffersPattern( - MLIRContext *context, BufferAssignmentTypeConverter *converter, - OwningRewritePatternList *patterns) { - populateWithBufferAssignmentOpConversionPatterns< - mlir::ReturnOp, mlir::ReturnOp, linalg::CopyOp>(context, converter, - patterns); - patterns->insert(context, converter); -} - /// Converts Linalg operations that work on tensor-type operands or results to /// work on buffers. struct ConvertLinalgOnTensorsToBuffers @@ -176,8 +242,11 @@ struct ConvertLinalgOnTensorsToBuffers BufferAssignmentTypeConverter::AppendToArgumentsList); OwningRewritePatternList patterns; - populateConvertLinalgOnTensorsToBuffersPattern(&context, &converter, - &patterns); + populateConvertLinalgOnTensorsToBuffersPatterns(&context, &converter, + &patterns); + populateWithBufferAssignmentOpConversionPatterns< + mlir::ReturnOp, mlir::ReturnOp, linalg::CopyOp>(&context, &converter, + &patterns); if (failed(applyFullConversion(this->getOperation(), target, patterns))) this->signalPassFailure(); } @@ -188,3 +257,9 @@ std::unique_ptr> mlir::createConvertLinalgOnTensorsToBuffersPass() { return std::make_unique(); } + +void mlir::linalg::populateConvertLinalgOnTensorsToBuffersPatterns( + MLIRContext *context, BufferAssignmentTypeConverter *converter, + OwningRewritePatternList *patterns) { + patterns->insert(context, converter); +} diff --git a/mlir/lib/Dialect/PDL/IR/PDL.cpp b/mlir/lib/Dialect/PDL/IR/PDL.cpp index a0b9c969becf61..ba1eb7b9957494 100644 --- a/mlir/lib/Dialect/PDL/IR/PDL.cpp +++ b/mlir/lib/Dialect/PDL/IR/PDL.cpp @@ -34,7 +34,7 @@ Type PDLDialect::parseType(DialectAsmParser &parser) const { return Type(); Builder &builder = parser.getBuilder(); - Type result = llvm::StringSwitch(keyword) + Type result = StringSwitch(keyword) .Case("attribute", builder.getType()) .Case("operation", builder.getType()) .Case("type", builder.getType()) diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp index f2823c564ccef6..f445a0cce242cd 100644 --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -2823,19 +2823,30 @@ static SmallVector extractFromI64ArrayAttr(Attribute attr) { })); } +enum SubViewVerificationResult { + Success, + RankTooLarge, + SizeMismatch, + StrideMismatch, + ElemTypeMismatch, + MemSpaceMismatch, + AffineMapMismatch +}; + /// Checks if `original` Type type can be rank reduced to `reduced` type. /// This function is slight variant of `is subsequence` algorithm where /// not matching dimension must be 1. -static bool isRankReducedType(Type originalType, Type reducedType) { +static SubViewVerificationResult isRankReducedType(Type originalType, + Type reducedType) { if (originalType == reducedType) - return true; + return SubViewVerificationResult::Success; if (!originalType.isa() && !originalType.isa()) - return true; + return SubViewVerificationResult::Success; if (originalType.isa() && !reducedType.isa()) - return true; + return SubViewVerificationResult::Success; if (originalType.isa() && !reducedType.isa()) - return true; + return SubViewVerificationResult::Success; ShapedType originalShapedType = originalType.cast(); ShapedType reducedShapedType = reducedType.cast(); @@ -2846,7 +2857,7 @@ static bool isRankReducedType(Type originalType, Type reducedType) { unsigned originalRank = originalShape.size(), reducedRank = reducedShape.size(); if (reducedRank > originalRank) - return false; + return SubViewVerificationResult::RankTooLarge; unsigned reducedIdx = 0; SmallVector keepMask(originalRank); @@ -2858,41 +2869,78 @@ static bool isRankReducedType(Type originalType, Type reducedType) { reducedIdx++; // 1 is the only non-matching allowed. else if (originalShape[originalIdx] != 1) - return false; + return SubViewVerificationResult::SizeMismatch; } // Must match the reduced rank. if (reducedIdx != reducedRank) - return false; + return SubViewVerificationResult::SizeMismatch; // We are done for the tensor case. if (originalType.isa()) - return true; + return SubViewVerificationResult::Success; // Strided layout logic is relevant for MemRefType only. MemRefType original = originalType.cast(); MemRefType reduced = reducedType.cast(); MLIRContext *c = original.getContext(); - int64_t originalOffset, symCounter = 0, dimCounter = 0; - SmallVector originalStrides; + int64_t originalOffset, reducedOffset; + SmallVector originalStrides, reducedStrides, keepStrides; getStridesAndOffset(original, originalStrides, originalOffset); - auto getSymbolOrConstant = [&](int64_t offset) { - return offset == ShapedType::kDynamicStrideOrOffset - ? getAffineSymbolExpr(symCounter++, c) - : getAffineConstantExpr(offset, c); - }; - - AffineExpr expr = getSymbolOrConstant(originalOffset); - for (unsigned i = 0, e = originalStrides.size(); i < e; i++) { - if (keepMask[i]) - expr = expr + getSymbolOrConstant(originalStrides[i]) * - getAffineDimExpr(dimCounter++, c); + getStridesAndOffset(reduced, reducedStrides, reducedOffset); + + // Filter strides based on the mask and check that they are the same + // as reduced ones. + reducedIdx = 0; + for (unsigned originalIdx = 0; originalIdx < originalRank; ++originalIdx) { + if (keepMask[originalIdx]) { + if (originalStrides[originalIdx] != reducedStrides[reducedIdx++]) + return SubViewVerificationResult::StrideMismatch; + keepStrides.push_back(originalStrides[originalIdx]); + } } - auto reducedMap = AffineMap::get(dimCounter, symCounter, expr, c); - return original.getElementType() == reduced.getElementType() && - original.getMemorySpace() == reduced.getMemorySpace() && - (reduced.getAffineMaps().empty() || - reducedMap == reduced.getAffineMaps().front()); + if (original.getElementType() != reduced.getElementType()) + return SubViewVerificationResult::ElemTypeMismatch; + + if (original.getMemorySpace() != reduced.getMemorySpace()) + return SubViewVerificationResult::MemSpaceMismatch; + + auto reducedMap = makeStridedLinearLayoutMap(keepStrides, originalOffset, c); + if (!reduced.getAffineMaps().empty() && + reducedMap != reduced.getAffineMaps().front()) + return SubViewVerificationResult::AffineMapMismatch; + + return SubViewVerificationResult::Success; +} + +template +static LogicalResult produceSubViewErrorMsg(SubViewVerificationResult result, + OpTy op, Type expectedType) { + auto memrefType = expectedType.cast(); + switch (result) { + case SubViewVerificationResult::Success: + return success(); + case SubViewVerificationResult::RankTooLarge: + return op.emitError("expected result rank to be smaller or equal to ") + << "the source rank."; + case SubViewVerificationResult::SizeMismatch: + return op.emitError("expected result type to be ") + << expectedType + << " or a rank-reduced version. (mismatch of result sizes)"; + case SubViewVerificationResult::StrideMismatch: + return op.emitError("expected result type to be ") + << expectedType + << " or a rank-reduced version. (mismatch of result strides)"; + case SubViewVerificationResult::ElemTypeMismatch: + return op.emitError("expected result element type to be ") + << memrefType.getElementType(); + case SubViewVerificationResult::MemSpaceMismatch: + return op.emitError("expected result and source memory spaces to match."); + case SubViewVerificationResult::AffineMapMismatch: + return op.emitError("expected result type to be ") + << expectedType + << " or a rank-reduced version. (mismatch of result affine map)"; + } } template @@ -2937,11 +2985,9 @@ static LogicalResult verify(SubViewOp op) { baseType, extractFromI64ArrayAttr(op.static_offsets()), extractFromI64ArrayAttr(op.static_sizes()), extractFromI64ArrayAttr(op.static_strides())); - if (!isRankReducedType(expectedType, subViewType)) - return op.emitError("expected result type to be ") - << expectedType << " or a rank-reduced version."; - return success(); + auto result = isRankReducedType(expectedType, subViewType); + return produceSubViewErrorMsg(result, op, expectedType); } raw_ostream &mlir::operator<<(raw_ostream &os, Range &range) { @@ -3352,11 +3398,8 @@ static LogicalResult verify(SubTensorOp op) { op.getSourceType(), extractFromI64ArrayAttr(op.static_offsets()), extractFromI64ArrayAttr(op.static_sizes()), extractFromI64ArrayAttr(op.static_strides())); - if (!isRankReducedType(expectedType, op.getType())) - return op.emitError("expected result type to be ") - << expectedType << " or a rank-reduced version."; - - return success(); + auto result = isRankReducedType(expectedType, op.getType()); + return produceSubViewErrorMsg(result, op, expectedType); } void SubTensorOp::getCanonicalizationPatterns(OwningRewritePatternList &results, diff --git a/mlir/lib/ExecutionEngine/JitRunner.cpp b/mlir/lib/ExecutionEngine/JitRunner.cpp index 2b18adb3734713..7d141e90edda3b 100644 --- a/mlir/lib/ExecutionEngine/JitRunner.cpp +++ b/mlir/lib/ExecutionEngine/JitRunner.cpp @@ -291,7 +291,7 @@ int mlir::JitRunnerMain( Error (*)(Options &, ModuleOp, StringRef, std::function); auto compileAndExecuteFn = - llvm::StringSwitch(options.mainFuncType.getValue()) + StringSwitch(options.mainFuncType.getValue()) .Case("i32", compileAndExecuteSingleReturnFunction) .Case("i64", compileAndExecuteSingleReturnFunction) .Case("f32", compileAndExecuteSingleReturnFunction) diff --git a/mlir/lib/IR/SymbolTable.cpp b/mlir/lib/IR/SymbolTable.cpp index b064d83b5faadc..e18e691f8cc808 100644 --- a/mlir/lib/IR/SymbolTable.cpp +++ b/mlir/lib/IR/SymbolTable.cpp @@ -166,7 +166,7 @@ SymbolTable::Visibility SymbolTable::getSymbolVisibility(Operation *symbol) { return Visibility::Public; // Otherwise, switch on the string value. - return llvm::StringSwitch(vis.getValue()) + return StringSwitch(vis.getValue()) .Case("private", Visibility::Private) .Case("nested", Visibility::Nested) .Case("public", Visibility::Public); diff --git a/mlir/lib/Parser/Lexer.cpp b/mlir/lib/Parser/Lexer.cpp index 9a3418eaf83275..ee31ff0cf9e4cd 100644 --- a/mlir/lib/Parser/Lexer.cpp +++ b/mlir/lib/Parser/Lexer.cpp @@ -212,7 +212,7 @@ Token Lexer::lexBareIdentifierOrKeyword(const char *tokStart) { isAllDigit(spelling.drop_front(2)))) return Token(Token::inttype, spelling); - Token::Kind kind = llvm::StringSwitch(spelling) + Token::Kind kind = StringSwitch(spelling) #define TOK_KEYWORD(SPELLING) .Case(#SPELLING, Token::kw_##SPELLING) #include "TokenKinds.def" .Default(Token::bare_identifier); diff --git a/mlir/lib/TableGen/Format.cpp b/mlir/lib/TableGen/Format.cpp index 12735875c1c199..7d17a0aef3f978 100644 --- a/mlir/lib/TableGen/Format.cpp +++ b/mlir/lib/TableGen/Format.cpp @@ -60,7 +60,7 @@ Optional FmtContext::getSubstFor(StringRef placeholder) const { } FmtContext::PHKind FmtContext::getPlaceHolderKind(StringRef str) { - return llvm::StringSwitch(str) + return StringSwitch(str) .Case("_builder", FmtContext::PHKind::Builder) .Case("_op", FmtContext::PHKind::Op) .Case("_self", FmtContext::PHKind::Self) diff --git a/mlir/lib/TableGen/Predicate.cpp b/mlir/lib/TableGen/Predicate.cpp index 8927296af223bd..a37847f0d48930 100644 --- a/mlir/lib/TableGen/Predicate.cpp +++ b/mlir/lib/TableGen/Predicate.cpp @@ -119,7 +119,7 @@ static PredCombinerKind getPredCombinerKind(const Pred &pred) { return PredCombinerKind::Leaf; const auto &combinedPred = static_cast(pred); - return llvm::StringSwitch( + return StringSwitch( combinedPred.getCombinerDef()->getName()) .Case("PredCombinerAnd", PredCombinerKind::And) .Case("PredCombinerOr", PredCombinerKind::Or) diff --git a/mlir/test/Dialect/Linalg/tensors-to-buffers.mlir b/mlir/test/Dialect/Linalg/tensors-to-buffers.mlir index 654a13fca743f3..4339b33a237920 100644 --- a/mlir/test/Dialect/Linalg/tensors-to-buffers.mlir +++ b/mlir/test/Dialect/Linalg/tensors-to-buffers.mlir @@ -2,11 +2,13 @@ #map0 = affine_map<(d0) -> (d0)> -// CHECK-LABEL: func @multiple_results_generic_op -func @multiple_results_generic_op(%arg0: tensor<4xf32>) -> (tensor<4xf32>, tensor<4xf32>) { - %0, %1 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel"]} - ins(%arg0 : tensor<4xf32>) { - ^bb0(%gen_arg1: f32): +// CHECK-LABEL: func @multiple_results +func @multiple_results(%arg0: tensor<4xf32>) -> (tensor<4xf32>, tensor<4xf32>) { + %0, %1 = linalg.generic { + indexing_maps = [#map0, #map0, #map0], + iterator_types = ["parallel"] + } ins(%arg0 : tensor<4xf32>) { + ^bb0(%gen_arg1: f32): %tmp1 = exp %gen_arg1 : f32 linalg.yield %tmp1, %tmp1 : f32, f32 } -> tensor<4xf32>, tensor<4xf32> @@ -34,15 +36,20 @@ func @multiple_results_generic_op(%arg0: tensor<4xf32>) -> (tensor<4xf32>, tenso // CHECK-LABEL: func @chained_operations func @chained_operations(%arg0: tensor<4xf32>) -> tensor<4xf32> { - %0 = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%arg0 : tensor<4xf32>) { - ^bb0(%gen_arg1: f32): + %0 = linalg.generic { + indexing_maps = [#map0, #map0], + iterator_types = ["parallel"] + } ins(%arg0 : tensor<4xf32>) { + ^bb0(%gen_arg1: f32): %tmp1 = exp %gen_arg1 : f32 linalg.yield %tmp1 : f32 } -> tensor<4xf32> - %1 = linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} - ins(%0 : tensor<4xf32>) { - ^bb0(%gen_arg2: f32): + + %1 = linalg.generic { + indexing_maps = [#map0, #map0], + iterator_types = ["parallel"] + } ins(%0 : tensor<4xf32>) { + ^bb0(%gen_arg2: f32): %tmp2 = exp %gen_arg2 : f32 linalg.yield %tmp2 : f32 } -> tensor<4xf32> @@ -73,6 +80,46 @@ func @no_linalg_op(%arg0: f32) -> (f32, f32) { %0 = mulf %arg0, %arg0 : f32 return %0, %0 : f32, f32 } -// CHECK: (%[[NEW_ARG0:.*]]: [[TYPE:.*]]) -> ([[TYPE]], [[TYPE]]) -// CHECK: %[[RESULT:.*]] = mulf %[[NEW_ARG0]], %[[NEW_ARG0]] : [[TYPE]] -// CHECK: return %[[RESULT]], %[[RESULT]] : [[TYPE]], [[TYPE]] +// CHECK: (%[[NEW_ARG0:.*]]: [[TYPE:.*]]) -> ([[TYPE]], [[TYPE]]) +// CHECK: %[[RESULT:.*]] = mulf %[[NEW_ARG0]], %[[NEW_ARG0]] : [[TYPE]] +// CHECK: return %[[RESULT]], %[[RESULT]] : [[TYPE]], [[TYPE]] + +// ----- + +#map_2d = affine_map<(d0, d1) -> (d0, d1)> +#map_2d_inv = affine_map<(d0, d1) -> (d1, d0)> + +func @dynamic_results(%arg0: tensor) + -> (tensor, tensor) { + %0, %1 = linalg.generic { + indexing_maps = [#map_2d, #map_2d, #map_2d_inv], + iterator_types = ["parallel", "parallel"] + } ins(%arg0 : tensor) { + ^bb0(%gen_arg1: f32): + %tmp1 = exp %gen_arg1 : f32 + linalg.yield %tmp1, %tmp1 : f32, f32 + } -> tensor, tensor + return %0, %1 : tensor, tensor +} + +// CHECK: #map0 = affine_map<(d0, d1) -> (d0, d1)> +// CHECK: #map1 = affine_map<(d0, d1) -> (d1, d0)> + +// CHECK-LABEL: func @dynamic_results +// CHECK-SAME: (%[[INPUT:.*]]: [[TYPE:.*]], %[[OUT_1:.*]]: [[TYPE]], %[[OUT_2:.*]]: [[TYPE]]) { +// CHECK: %[[C0:.*]] = constant 0 : index +// CHECK: %[[DIM_0:.*]] = dim %[[INPUT]], %[[C0]] : [[TYPE]] +// CHECK: %[[C1:.*]] = constant 1 : index +// CHECK: %[[DIM_1:.*]] = dim %[[INPUT]], %[[C1]] : [[TYPE]] +// CHECK: %[[OUT_BUF_1:.*]] = alloc(%[[DIM_0]], %[[DIM_1]]) : [[TYPE]] +// CHECK: %[[OUT_BUF_2:.*]] = alloc(%[[DIM_1]], %[[DIM_0]]) : [[TYPE]] + +// CHECK: linalg.generic {indexing_maps = [#map0, #map0, #map1], {{.*}}} +// CHECK-SAME: ins(%[[INPUT]] : [[TYPE]]) +// CHECK-SAME: outs(%[[OUT_BUF_1]], %[[OUT_BUF_2]] : [[TYPE]], [[TYPE]]) { + +// CHECK: linalg.copy(%[[OUT_BUF_1]], %[[OUT_1]]) : [[TYPE]], [[TYPE]] +// CHECK: dealloc %[[OUT_BUF_1]] : [[TYPE]] +// CHECK: linalg.copy(%[[OUT_BUF_2]], %[[OUT_2]]) : [[TYPE]], [[TYPE]] +// CHECK: dealloc %[[OUT_BUF_2]] : [[TYPE]] +// CHECK: return diff --git a/mlir/test/IR/core-ops.mlir b/mlir/test/IR/core-ops.mlir index 2590dc0105c4ea..219c3bc84d5706 100644 --- a/mlir/test/IR/core-ops.mlir +++ b/mlir/test/IR/core-ops.mlir @@ -21,6 +21,7 @@ // CHECK-DAG: #[[$SUBVIEW_MAP5:map[0-9]+]] = affine_map<(d0, d1)[s0] -> (d0 * 8 + s0 + d1 * 2)> // CHECK-DAG: #[[$SUBVIEW_MAP6:map[0-9]+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0 * 36 + d1 * 36 + d2 * 4 + d3 * 4 + d4)> // CHECK-DAG: #[[$SUBVIEW_MAP7:map[0-9]+]] = affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4 + d4 * s5 + d5 * s6)> +// CHECK-DAG: #[[$SUBVIEW_MAP8:map[0-9]+]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)> // CHECK-LABEL: func @func_with_ops // CHECK-SAME: %[[ARG:.*]]: f32 @@ -811,11 +812,11 @@ func @memref_subview(%arg0 : index, %arg1 : index, %arg2 : index) { %15 = alloc(%arg1, %arg2)[%c0, %c1, %arg1, %arg0, %arg0, %arg2, %arg2] : memref<1x?x5x1x?x1xf32, affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6] -> (s0 + s1 * d0 + s2 * d1 + s3 * d2 + s4 * d3 + s5 * d4 + s6 * d5)>> // CHECK: subview %15[0, 0, 0, 0, 0, 0] [1, %arg1, 5, 1, %arg2, 1] [1, 1, 1, 1, 1, 1] : - // CHECK-SAME: memref<1x?x5x1x?x1xf32, #[[$SUBVIEW_MAP7]]> to memref - %16 = subview %15[0, 0, 0, 0, 0, 0][1, %arg1, 5, 1, %arg2, 1][1, 1, 1, 1, 1, 1] : memref<1x?x5x1x?x1xf32, offset: ?, strides: [?, ?, ?, ?, ?, ?]> to memref + // CHECK-SAME: memref<1x?x5x1x?x1xf32, #[[$SUBVIEW_MAP7]]> to memref + %16 = subview %15[0, 0, 0, 0, 0, 0][1, %arg1, 5, 1, %arg2, 1][1, 1, 1, 1, 1, 1] : memref<1x?x5x1x?x1xf32, offset: ?, strides: [?, ?, ?, ?, ?, ?]> to memref // CHECK: subview %15[%arg1, %arg1, %arg1, %arg1, %arg1, %arg1] [1, %arg1, 5, 1, %arg2, 1] [1, 1, 1, 1, 1, 1] : - // CHECK-SAME: memref<1x?x5x1x?x1xf32, #[[$SUBVIEW_MAP7]]> to memref - %17 = subview %15[%arg1, %arg1, %arg1, %arg1, %arg1, %arg1][1, %arg1, 5, 1, %arg2, 1][1, 1, 1, 1, 1, 1] : memref<1x?x5x1x?x1xf32, offset: ?, strides: [?, ?, ?, ?, ?, ?]> to memref + // CHECK-SAME: memref<1x?x5x1x?x1xf32, #[[$SUBVIEW_MAP7]]> to memref + %17 = subview %15[%arg1, %arg1, %arg1, %arg1, %arg1, %arg1][1, %arg1, 5, 1, %arg2, 1][1, 1, 1, 1, 1, 1] : memref<1x?x5x1x?x1xf32, offset: ?, strides: [?, ?, ?, ?, ?, ?]> to memref %18 = alloc() : memref<1x8xf32> // CHECK: subview %18[0, 0] [1, 8] [1, 1] : memref<1x8xf32> to memref<8xf32> diff --git a/mlir/test/IR/invalid-ops.mlir b/mlir/test/IR/invalid-ops.mlir index 7356c07577dbaf..b59353aa2f7c51 100644 --- a/mlir/test/IR/invalid-ops.mlir +++ b/mlir/test/IR/invalid-ops.mlir @@ -1011,7 +1011,7 @@ func @invalid_subview(%arg0 : index, %arg1 : index, %arg2 : index) { func @invalid_subview(%arg0 : index, %arg1 : index, %arg2 : index) { %0 = alloc() : memref<8x16x4xf32> - // expected-error@+1 {{expected result type to be 'memref (d0 * s1 + s0 + d1 * s2 + d2 * s3)>>'}} + // expected-error@+1 {{expected result type to be 'memref (d0 * s1 + s0 + d1 * s2 + d2 * s3)>>' or a rank-reduced version. (mismatch of result strides)}} %1 = subview %0[%arg0, %arg1, %arg2][%arg0, %arg1, %arg2][%arg0, %arg1, %arg2] : memref<8x16x4xf32> to memref @@ -1020,9 +1020,31 @@ func @invalid_subview(%arg0 : index, %arg1 : index, %arg2 : index) { // ----- +func @invalid_subview(%arg0 : index, %arg1 : index, %arg2 : index) { + %0 = alloc() : memref<8x16x4xf32> + // expected-error@+1 {{expected result element type to be 'f32'}} + %1 = subview %0[0, 0, 0][8, 16, 4][1, 1, 1] + : memref<8x16x4xf32> to + memref<8x16x4xi32> + return +} + +// ----- + +func @invalid_subview(%arg0 : index, %arg1 : index, %arg2 : index) { + %0 = alloc() : memref<8x16x4xf32> + // expected-error@+1 {{expected result rank to be smaller or equal to the source rank.}} + %1 = subview %0[0, 0, 0][8, 16, 4][1, 1, 1] + : memref<8x16x4xf32> to + memref<8x16x4x3xi32> + return +} + +// ----- + func @invalid_rank_reducing_subview(%arg0 : index, %arg1 : index, %arg2 : index) { %0 = alloc() : memref<8x16x4xf32> - // expected-error@+1 {{expected result type to be 'memref<8x16x4xf32, affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2)>>'}} + // expected-error@+1 {{expected result type to be 'memref<8x16x4xf32, affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2)>>' or a rank-reduced version. (mismatch of result sizes)}} %1 = subview %0[0, 0, 0][8, 16, 4][1, 1, 1] : memref<8x16x4xf32> to memref<16x4xf32> return @@ -1030,6 +1052,14 @@ func @invalid_rank_reducing_subview(%arg0 : index, %arg1 : index, %arg2 : index) // ----- +func @invalid_rank_reducing_subview(%arg0 : memref, %arg1 : index, %arg2 : index) { + // expected-error@+1 {{expected result type to be 'memref (d0 * s1 + s0 + d1)>>' or a rank-reduced version. (mismatch of result strides)}} + %0 = subview %arg0[0, %arg1][%arg2, 1][1, 1] : memref to memref + return +} + +// ----- + func @invalid_memref_cast(%arg0 : memref<12x4x16xf32, offset:0, strides:[64, 16, 1]>) { // expected-error@+1{{operand type 'memref<12x4x16xf32, affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 16 + d2)>>' and result type 'memref<12x4x16xf32, affine_map<(d0, d1, d2) -> (d0 * 128 + d1 * 32 + d2 * 2)>>' are cast incompatible}} %0 = memref_cast %arg0 : memref<12x4x16xf32, offset:0, strides:[64, 16, 1]> to memref<12x4x16xf32, offset:0, strides:[128, 32, 2]> @@ -1259,7 +1289,7 @@ func @imaginary_part_from_incompatible_complex_type(%cplx: complex) { // ----- func @subtensor_wrong_dynamic_type(%t: tensor<8x16x4xf32>, %idx : index) { - // expected-error @+1 {{expected result type to be 'tensor<4x4x4xf32>'}} + // expected-error @+1 {{expected result type to be 'tensor<4x4x4xf32>' or a rank-reduced version. (mismatch of result sizes)}} %0 = subtensor %t[0, 2, 0][4, 4, 4][1, 1, 1] : tensor<8x16x4xf32> to tensor @@ -1269,7 +1299,7 @@ func @subtensor_wrong_dynamic_type(%t: tensor<8x16x4xf32>, %idx : index) { // ----- func @subtensor_wrong_static_type(%t: tensor<8x16x4xf32>, %idx : index) { - // expected-error @+1 {{expected result type to be 'tensor'}} + // expected-error @+1 {{expected result type to be 'tensor' or a rank-reduced version. (mismatch of result sizes)}} %0 = subtensor %t[0, 0, 0][%idx, 3, %idx][1, 1, 1] : tensor<8x16x4xf32> to tensor<4x4x4xf32> diff --git a/mlir/test/lib/Dialect/Test/TestDialect.cpp b/mlir/test/lib/Dialect/Test/TestDialect.cpp index c84a7717abe784..4ca89bced5eb93 100644 --- a/mlir/test/lib/Dialect/Test/TestDialect.cpp +++ b/mlir/test/lib/Dialect/Test/TestDialect.cpp @@ -685,7 +685,7 @@ void SideEffectOp::getEffects( // Get the specific memory effect. MemoryEffects::Effect *effect = - llvm::StringSwitch( + StringSwitch( effectElement.get("effect").cast().getValue()) .Case("allocate", MemoryEffects::Allocate::get()) .Case("free", MemoryEffects::Free::get()) diff --git a/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp b/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp index 4fe3cd1ee174b9..64424b4ac3d2f8 100644 --- a/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp +++ b/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp @@ -288,7 +288,7 @@ Token Lexer::lexIdentifier(const char *tokStart) { // Check to see if this identifier is a keyword. StringRef str(tokStart, curPtr - tokStart); - Token::Kind kind = llvm::StringSwitch(str) + Token::Kind kind = StringSwitch(str) .Case("def", Token::Kind::kw_def) .Case("ods_def", Token::Kind::kw_ods_def) .Case("floordiv", Token::Kind::kw_floordiv) diff --git a/mlir/tools/mlir-tblgen/OpFormatGen.cpp b/mlir/tools/mlir-tblgen/OpFormatGen.cpp index 336c9111677b4e..9b8f249232401f 100644 --- a/mlir/tools/mlir-tblgen/OpFormatGen.cpp +++ b/mlir/tools/mlir-tblgen/OpFormatGen.cpp @@ -719,7 +719,7 @@ static void genLiteralParser(StringRef value, OpMethodBody &body) { body << "Keyword(\"" << value << "\")"; return; } - body << (StringRef)llvm::StringSwitch(value) + body << (StringRef)StringSwitch(value) .Case("->", "Arrow()") .Case(":", "Colon()") .Case(",", "Comma()") @@ -1936,7 +1936,7 @@ Token FormatLexer::lexIdentifier(const char *tokStart) { // Check to see if this identifier is a keyword. StringRef str(tokStart, curPtr - tokStart); Token::Kind kind = - llvm::StringSwitch(str) + StringSwitch(str) .Case("attr-dict", Token::kw_attr_dict) .Case("attr-dict-with-keyword", Token::kw_attr_dict_w_keyword) .Case("custom", Token::kw_custom)