diff --git a/.github/workflows/containers/github-action-ci/stage1.Dockerfile b/.github/workflows/containers/github-action-ci/stage1.Dockerfile index 73828cc05736e6..3e2c1ab11d58bf 100644 --- a/.github/workflows/containers/github-action-ci/stage1.Dockerfile +++ b/.github/workflows/containers/github-action-ci/stage1.Dockerfile @@ -2,7 +2,7 @@ FROM docker.io/library/ubuntu:22.04 as base ENV LLVM_SYSROOT=/opt/llvm FROM base as stage1-toolchain -ENV LLVM_VERSION=18.1.8 +ENV LLVM_VERSION=19.1.2 RUN apt-get update && \ apt-get install -y \ diff --git a/clang/test/CodeGenCUDA/bf16.cu b/clang/test/CodeGenCUDA/bf16.cu index 3c443420dbd36a..f794b83239f14a 100644 --- a/clang/test/CodeGenCUDA/bf16.cu +++ b/clang/test/CodeGenCUDA/bf16.cu @@ -25,7 +25,7 @@ __device__ void test_arg(__bf16 *out, __bf16 in) { __device__ __bf16 test_ret( __bf16 in) { // CHECK: ld.param.b16 %[[R:rs[0-9]+]], [_Z8test_retDF16b_param_0]; return in; -// CHECK: st.param.b16 [func_retval0+0], %[[R]] +// CHECK: st.param.b16 [func_retval0], %[[R]] // CHECK: ret; } @@ -35,15 +35,15 @@ __device__ __bf16 external_func( __bf16 in); // CHECK: .param .align 2 .b8 _Z9test_callDF16b_param_0[2] __device__ __bf16 test_call( __bf16 in) { // CHECK: ld.param.b16 %[[R:rs[0-9]+]], [_Z9test_callDF16b_param_0]; -// CHECK: st.param.b16 [param0+0], %[[R]]; +// CHECK: st.param.b16 [param0], %[[R]]; // CHECK: .param .align 2 .b8 retval0[2]; // CHECK: call.uni (retval0), // CHECK-NEXT: _Z13external_funcDF16b, // CHECK-NEXT: ( // CHECK-NEXT: param0 // CHECK-NEXT ); -// CHECK: ld.param.b16 %[[RET:rs[0-9]+]], [retval0+0]; +// CHECK: ld.param.b16 %[[RET:rs[0-9]+]], [retval0]; return external_func(in); -// CHECK: st.param.b16 [func_retval0+0], %[[RET]] +// CHECK: st.param.b16 [func_retval0], %[[RET]] // CHECK: ret; } diff --git a/compiler-rt/lib/hwasan/CMakeLists.txt b/compiler-rt/lib/hwasan/CMakeLists.txt index 086079c7536e5d..afafa0c4a92761 100644 --- a/compiler-rt/lib/hwasan/CMakeLists.txt +++ b/compiler-rt/lib/hwasan/CMakeLists.txt @@ -24,16 +24,19 @@ foreach(arch ${HWASAN_SUPPORTED_ARCH}) if(${arch} MATCHES "aarch64") list(APPEND HWASAN_RTL_SOURCES hwasan_setjmp_aarch64.S - hwasan_tag_mismatch_aarch64.S) + hwasan_tag_mismatch_aarch64.S + ) endif() if(${arch} MATCHES "riscv64") list(APPEND HWASAN_RTL_SOURCES hwasan_setjmp_riscv64.S - hwasan_tag_mismatch_riscv64.S) + hwasan_tag_mismatch_riscv64.S + ) endif() if(${arch} MATCHES "x86_64") list(APPEND HWASAN_RTL_SOURCES - hwasan_setjmp_x86_64.S) + hwasan_setjmp_x86_64.S + ) endif() endforeach() diff --git a/lldb/include/lldb/Core/Module.h b/lldb/include/lldb/Core/Module.h index 5589c1c9a350dc..23257e429ad0d6 100644 --- a/lldb/include/lldb/Core/Module.h +++ b/lldb/include/lldb/Core/Module.h @@ -30,6 +30,7 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLFunctionalExtras.h" +#include "llvm/ADT/StableHashing.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Chrono.h" @@ -1057,8 +1058,11 @@ class Module : public std::enable_shared_from_this, /// time for the symbol tables can be aggregated here. StatsDuration m_symtab_index_time; - std::once_flag m_optimization_warning; - std::once_flag m_language_warning; + /// A set of hashes of all warnings and errors, to avoid reporting them + /// multiple times to the same Debugger. + llvm::DenseMap> + m_shown_diagnostics; + std::recursive_mutex m_diagnostic_mutex; void SymbolIndicesToSymbolContextList(Symtab *symtab, std::vector &symbol_indexes, @@ -1086,6 +1090,7 @@ class Module : public std::enable_shared_from_this, void ReportWarning(const llvm::formatv_object_base &payload); void ReportError(const llvm::formatv_object_base &payload); void ReportErrorIfModifyDetected(const llvm::formatv_object_base &payload); + std::once_flag *GetDiagnosticOnceFlag(llvm::StringRef msg); }; } // namespace lldb_private diff --git a/lldb/source/Core/Module.cpp b/lldb/source/Core/Module.cpp index 88cc957e91fac4..03eb81459b29bc 100644 --- a/lldb/source/Core/Module.cpp +++ b/lldb/source/Core/Module.cpp @@ -1093,8 +1093,8 @@ void Module::ReportWarningOptimization( ss << file_name << " was compiled with optimization - stepping may behave " "oddly; variables may not be available."; - Debugger::ReportWarning(std::string(ss.GetString()), debugger_id, - &m_optimization_warning); + llvm::StringRef msg = ss.GetString(); + Debugger::ReportWarning(msg.str(), debugger_id, GetDiagnosticOnceFlag(msg)); } void Module::ReportWarningUnsupportedLanguage( @@ -1104,8 +1104,8 @@ void Module::ReportWarningUnsupportedLanguage( << Language::GetNameForLanguageType(language) << "\". " "Inspection of frame variables will be limited."; - Debugger::ReportWarning(std::string(ss.GetString()), debugger_id, - &m_language_warning); + llvm::StringRef msg = ss.GetString(); + Debugger::ReportWarning(msg.str(), debugger_id, GetDiagnosticOnceFlag(msg)); } void Module::ReportErrorIfModifyDetected( @@ -1125,20 +1125,29 @@ void Module::ReportErrorIfModifyDetected( } } +std::once_flag *Module::GetDiagnosticOnceFlag(llvm::StringRef msg) { + std::lock_guard guard(m_diagnostic_mutex); + auto &once_ptr = m_shown_diagnostics[llvm::stable_hash_name(msg)]; + if (!once_ptr) + once_ptr = std::make_unique(); + return once_ptr.get(); +} + void Module::ReportError(const llvm::formatv_object_base &payload) { StreamString strm; GetDescription(strm.AsRawOstream(), lldb::eDescriptionLevelBrief); - strm.PutChar(' '); - strm.PutCString(payload.str()); - Debugger::ReportError(strm.GetString().str()); + std::string msg = payload.str(); + strm << ' ' << msg; + Debugger::ReportError(strm.GetString().str(), {}, GetDiagnosticOnceFlag(msg)); } void Module::ReportWarning(const llvm::formatv_object_base &payload) { StreamString strm; GetDescription(strm.AsRawOstream(), lldb::eDescriptionLevelFull); - strm.PutChar(' '); - strm.PutCString(payload.str()); - Debugger::ReportWarning(std::string(strm.GetString())); + std::string msg = payload.str(); + strm << ' ' << msg; + Debugger::ReportWarning(strm.GetString().str(), {}, + GetDiagnosticOnceFlag(msg)); } void Module::LogMessage(Log *log, const llvm::formatv_object_base &payload) { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index 9287d4baf19e9c..e5b8eee8d08c24 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -2069,13 +2069,15 @@ void SymbolFileDWARF::UpdateExternalModuleListIfNeeded() { Status error = ModuleList::GetSharedModule(dwo_module_spec, module_sp, nullptr, nullptr, nullptr); if (!module_sp) { + // ReportWarning also rate-limits based on the warning string, + // but in a -gmodules build, each object file has a similar DAG + // of module dependencies that would all be listed here. GetObjectFile()->GetModule()->ReportWarning( - "{0:x16}: unable to locate module needed for external types: " - "{1}\nerror: {2}\nDebugging will be degraded due to missing " - "types. Rebuilding the project will regenerate the needed " - "module files.", - die.GetOffset(), dwo_module_spec.GetFileSpec().GetPath().c_str(), - error.AsCString("unknown error")); + "{0}", error.AsCString("unknown error")); + GetObjectFile()->GetModule()->ReportWarning( + "Unable to locate module needed for external types.\n" + "Debugging will be degraded due to missing types. Rebuilding the " + "project will regenerate the needed module files."); continue; } @@ -2095,12 +2097,11 @@ void SymbolFileDWARF::UpdateExternalModuleListIfNeeded() { if (dwo_id != dwo_dwo_id) { GetObjectFile()->GetModule()->ReportWarning( - "{0:x16}: Module {1} is out-of-date (hash mismatch). Type " - "information " - "from this module may be incomplete or inconsistent with the rest of " - "the program. Rebuilding the project will regenerate the needed " - "module files.", - die.GetOffset(), dwo_module_spec.GetFileSpec().GetPath().c_str()); + "Module {0} is out-of-date (hash mismatch).\n" + "Type information from this module may be incomplete or inconsistent " + "with the rest of the program. Rebuilding the project will " + "regenerate the needed module files.", + dwo_module_spec.GetFileSpec().GetPath()); } } } diff --git a/lldb/test/Shell/SymbolFile/DWARF/TestDedupWarnings.test b/lldb/test/Shell/SymbolFile/DWARF/TestDedupWarnings.test new file mode 100644 index 00000000000000..d4fcf78d01b81c --- /dev/null +++ b/lldb/test/Shell/SymbolFile/DWARF/TestDedupWarnings.test @@ -0,0 +1,22 @@ +# REQUIRES: system-darwin +# Test the rate-limiting of module not found warnings. +# RUN: rm -rf %t +# RUN: mkdir -p %t + +# RUN: echo 'module "C" { header "c.h" }' >%t/module.modulemap +# RUN: echo 'struct c {};' >>%t/c.h +# RUN: echo '@import C;' >%t/a.m +# RUN: echo 'struct a { struct c c; } a;' >>%t/a.m +# RUN: echo '@import C;' >%t/b.m +# RUN: echo 'struct b { struct c c; } b;' >>%t/b.m +# RUN: echo 'int main() {}' >>%t/b.m + +# RUN: %clang_host -fmodules -Xclang -fmodules-cache-path=%t/cache -I%t -g -gmodules %t/a.m -o %t/a.o -c +# RUN: %clang_host -fmodules -Xclang -fmodules-cache-path=%t/cache -I%t -g -gmodules %t/b.m -o %t/b.o -c +# RUN: %clang_host %t/a.o %t/b.o -o %t/a.out +# RUN: rm -rf %t/cache +# RUN: %lldb %t/a.out -o "b main" -o run -o "p a" -o "p b" -o q 2>&1 | FileCheck %s +# CHECK: {{[ab]}}.o{{.*}}/cache/{{.*}}/C-{{.*}}.pcm' does not exist +# CHECK-NOT: /cache/{{.*}}/C-{.*}.pcm' does not exist +# CHECK: {{[ab]}}.o{{.*}}/cache/{{.*}}/C-{{.*}}.pcm' does not exist +# CHECK-NOT: /cache/{{.*}}/C-{.*}.pcm' does not exist diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 61615cb0f7b301..8e0cdc6f1a5e77 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3223,6 +3223,9 @@ class TargetLoweringBase { /// not legal, but should return true if those types will eventually legalize /// to types that support FMAs. After legalization, it will only be called on /// types that support FMAs (via Legal or Custom actions) + /// + /// Targets that care about soft float support should return false when soft + /// float code is being generated (i.e. use-soft-float). virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT) const { return false; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 5d679a1a916dc4..a4f01e55f53c1c 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -19354,6 +19354,9 @@ bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const { /// patterns (and we don't have the non-fused floating point instruction). bool ARMTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const { + if (Subtarget->useSoftFloat()) + return false; + if (!VT.isSimple()) return false; diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp index 7d6442a611125f..9b589284463294 100644 --- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp +++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp @@ -363,6 +363,16 @@ void NVPTXInstPrinter::printMemOperand(const MCInst *MI, int OpNum, } } +void NVPTXInstPrinter::printOffseti32imm(const MCInst *MI, int OpNum, + raw_ostream &O, const char *Modifier) { + auto &Op = MI->getOperand(OpNum); + assert(Op.isImm() && "Invalid operand"); + if (Op.getImm() != 0) { + O << "+"; + printOperand(MI, OpNum, O); + } +} + void NVPTXInstPrinter::printProtoIdent(const MCInst *MI, int OpNum, raw_ostream &O, const char *Modifier) { const MCOperand &Op = MI->getOperand(OpNum); diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h index e6954f861cd10e..e8a4a6dbdd5324 100644 --- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h +++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h @@ -45,6 +45,8 @@ class NVPTXInstPrinter : public MCInstPrinter { const char *Modifier = nullptr); void printMemOperand(const MCInst *MI, int OpNum, raw_ostream &O, const char *Modifier = nullptr); + void printOffseti32imm(const MCInst *MI, int OpNum, raw_ostream &O, + const char *Modifier = nullptr); void printProtoIdent(const MCInst *MI, int OpNum, raw_ostream &O, const char *Modifier = nullptr); void printPrmtMode(const MCInst *MI, int OpNum, raw_ostream &O, diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 8b34ce4f1001c1..b5478b8f09ceb4 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -1934,6 +1934,10 @@ def MmaCode : Operand { let PrintMethod = "printMmaCode"; } +def Offseti32imm : Operand { + let PrintMethod = "printOffseti32imm"; +} + def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>; @@ -2482,21 +2486,21 @@ def ProxyReg : let mayLoad = true in { class LoadParamMemInst : - NVPTXInst<(outs regclass:$dst), (ins i32imm:$b), - !strconcat("ld.param", opstr, " \t$dst, [retval0+$b];"), + NVPTXInst<(outs regclass:$dst), (ins Offseti32imm:$b), + !strconcat("ld.param", opstr, " \t$dst, [retval0$b];"), []>; class LoadParamV2MemInst : - NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins i32imm:$b), + NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins Offseti32imm:$b), !strconcat("ld.param.v2", opstr, - " \t{{$dst, $dst2}}, [retval0+$b];"), []>; + " \t{{$dst, $dst2}}, [retval0$b];"), []>; class LoadParamV4MemInst : NVPTXInst<(outs regclass:$dst, regclass:$dst2, regclass:$dst3, regclass:$dst4), - (ins i32imm:$b), + (ins Offseti32imm:$b), !strconcat("ld.param.v4", opstr, - " \t{{$dst, $dst2, $dst3, $dst4}}, [retval0+$b];"), + " \t{{$dst, $dst2, $dst3, $dst4}}, [retval0$b];"), []>; } @@ -2512,8 +2516,8 @@ let mayStore = true in { if !or(support_imm, !isa(op)) then def _ # !if(!isa(op), "r", "i") : NVPTXInst<(outs), - (ins op:$val, i32imm:$a, i32imm:$b), - "st.param" # opstr # " \t[param$a+$b], $val;", + (ins op:$val, i32imm:$a, Offseti32imm:$b), + "st.param" # opstr # " \t[param$a$b], $val;", []>; } @@ -2524,8 +2528,8 @@ let mayStore = true in { # !if(!isa(op2), "r", "i") : NVPTXInst<(outs), (ins op1:$val1, op2:$val2, - i32imm:$a, i32imm:$b), - "st.param.v2" # opstr # " \t[param$a+$b], {{$val1, $val2}};", + i32imm:$a, Offseti32imm:$b), + "st.param.v2" # opstr # " \t[param$a$b], {{$val1, $val2}};", []>; } @@ -2541,29 +2545,29 @@ let mayStore = true in { : NVPTXInst<(outs), (ins op1:$val1, op2:$val2, op3:$val3, op4:$val4, - i32imm:$a, i32imm:$b), + i32imm:$a, Offseti32imm:$b), "st.param.v4" # opstr # - " \t[param$a+$b], {{$val1, $val2, $val3, $val4}};", + " \t[param$a$b], {{$val1, $val2, $val3, $val4}};", []>; } class StoreRetvalInst : - NVPTXInst<(outs), (ins regclass:$val, i32imm:$a), - !strconcat("st.param", opstr, " \t[func_retval0+$a], $val;"), + NVPTXInst<(outs), (ins regclass:$val, Offseti32imm:$a), + !strconcat("st.param", opstr, " \t[func_retval0$a], $val;"), []>; class StoreRetvalV2Inst : - NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, i32imm:$a), + NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, Offseti32imm:$a), !strconcat("st.param.v2", opstr, - " \t[func_retval0+$a], {{$val, $val2}};"), + " \t[func_retval0$a], {{$val, $val2}};"), []>; class StoreRetvalV4Inst : NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, regclass:$val3, - regclass:$val4, i32imm:$a), + regclass:$val4, Offseti32imm:$a), !strconcat("st.param.v4", opstr, - " \t[func_retval0+$a], {{$val, $val2, $val3, $val4}};"), + " \t[func_retval0$a], {{$val, $val2, $val3, $val4}};"), []>; } @@ -2827,21 +2831,21 @@ multiclass LD { def _ari : NVPTXInst< (outs regclass:$dst), (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), + i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset), "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t$dst, [$addr+$offset];", []>; + "\t$dst, [$addr$offset];", []>; def _ari_64 : NVPTXInst< (outs regclass:$dst), (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), + LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset), "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t$dst, [$addr+$offset];", []>; + "\t$dst, [$addr$offset];", []>; def _asi : NVPTXInst< (outs regclass:$dst), (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset), + LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset), "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t$dst, [$addr+$offset];", []>; + "\t$dst, [$addr$offset];", []>; } let mayLoad=1, hasSideEffects=0 in { @@ -2876,23 +2880,23 @@ multiclass ST { (outs), (ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, - i32imm:$offset), + Offseti32imm:$offset), "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" - " \t[$addr+$offset], $src;", []>; + " \t[$addr$offset], $src;", []>; def _ari_64 : NVPTXInst< (outs), (ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, - i32imm:$offset), + Offseti32imm:$offset), "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" - " \t[$addr+$offset], $src;", []>; + " \t[$addr$offset], $src;", []>; def _asi : NVPTXInst< (outs), (ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, imem:$addr, - i32imm:$offset), + Offseti32imm:$offset), "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" - " \t[$addr+$offset], $src;", []>; + " \t[$addr$offset], $src;", []>; } let mayStore=1, hasSideEffects=0 in { @@ -2929,21 +2933,21 @@ multiclass LD_VEC { def _v2_ari : NVPTXInst< (outs regclass:$dst1, regclass:$dst2), (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), + LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset), "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2}}, [$addr+$offset];", []>; + "\t{{$dst1, $dst2}}, [$addr$offset];", []>; def _v2_ari_64 : NVPTXInst< (outs regclass:$dst1, regclass:$dst2), (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), + LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset), "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2}}, [$addr+$offset];", []>; + "\t{{$dst1, $dst2}}, [$addr$offset];", []>; def _v2_asi : NVPTXInst< (outs regclass:$dst1, regclass:$dst2), (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset), + LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset), "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2}}, [$addr+$offset];", []>; + "\t{{$dst1, $dst2}}, [$addr$offset];", []>; def _v4_avar : NVPTXInst< (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, @@ -2965,21 +2969,21 @@ multiclass LD_VEC { def _v4_ari : NVPTXInst< (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), + LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset), "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>; + "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr$offset];", []>; def _v4_ari_64 : NVPTXInst< (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), + LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset), "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>; + "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr$offset];", []>; def _v4_asi : NVPTXInst< (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), (ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset), + LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset), "ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>; + "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr$offset];", []>; } let mayLoad=1, hasSideEffects=0 in { defm LDV_i8 : LD_VEC; @@ -3016,23 +3020,23 @@ multiclass ST_VEC { (outs), (ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, - Int32Regs:$addr, i32imm:$offset), + Int32Regs:$addr, Offseti32imm:$offset), "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t[$addr+$offset], {{$src1, $src2}};", []>; + "\t[$addr$offset], {{$src1, $src2}};", []>; def _v2_ari_64 : NVPTXInst< (outs), (ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, - Int64Regs:$addr, i32imm:$offset), + Int64Regs:$addr, Offseti32imm:$offset), "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t[$addr+$offset], {{$src1, $src2}};", []>; + "\t[$addr$offset], {{$src1, $src2}};", []>; def _v2_asi : NVPTXInst< (outs), (ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, - imem:$addr, i32imm:$offset), + imem:$addr, Offseti32imm:$offset), "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t[$addr+$offset], {{$src1, $src2}};", []>; + "\t[$addr$offset], {{$src1, $src2}};", []>; def _v4_avar : NVPTXInst< (outs), (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, @@ -3058,23 +3062,23 @@ multiclass ST_VEC { (outs), (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), + LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset), "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>; + "\t[$addr$offset], {{$src1, $src2, $src3, $src4}};", []>; def _v4_ari_64 : NVPTXInst< (outs), (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), + LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset), "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>; + "\t[$addr$offset], {{$src1, $src2, $src3, $src4}};", []>; def _v4_asi : NVPTXInst< (outs), (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset), + LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset), "st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}" - "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>; + "$fromWidth \t[$addr$offset], {{$src1, $src2, $src3, $src4}};", []>; } let mayStore=1, hasSideEffects=0 in { @@ -3903,4 +3907,4 @@ def atomic_thread_fence_seq_cst_cta : Requires<[hasPTX<60>, hasSM<70>]>; def atomic_thread_fence_acq_rel_cta : NVPTXInst<(outs), (ins), "fence.acq_rel.cta;", []>, - Requires<[hasPTX<60>, hasSM<70>]>; \ No newline at end of file + Requires<[hasPTX<60>, hasSM<70>]>; diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 83417e570dabf7..3e05f3b0180a78 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -793,6 +793,9 @@ EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL, bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd( const MachineFunction &MF, EVT VT) const { + if (useSoftFloat()) + return false; + VT = VT.getScalarType(); if (!VT.isSimple()) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c4e87fdebcd1eb..88356c393f6699 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -34833,6 +34833,9 @@ bool X86TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { bool X86TargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const { + if (Subtarget.useSoftFloat()) + return false; + if (!Subtarget.hasAnyFMA()) return false; diff --git a/llvm/test/CodeGen/ARM/fmuladd-soft-float.ll b/llvm/test/CodeGen/ARM/fmuladd-soft-float.ll new file mode 100644 index 00000000000000..88c31325b64b76 --- /dev/null +++ b/llvm/test/CodeGen/ARM/fmuladd-soft-float.ll @@ -0,0 +1,406 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=arm < %s | FileCheck %s -check-prefix=SOFT-FLOAT +; RUN: llc -mtriple=arm -mattr=+vfp4d16sp < %s | FileCheck %s -check-prefix=SOFT-FLOAT-VFP32 +; RUN: llc -mtriple=arm -mattr=+vfp4d16sp,+fp64 < %s | FileCheck %s -check-prefix=SOFT-FLOAT-VFP64 + +define float @fmuladd_intrinsic_f32(float %a, float %b, float %c) #0 { +; SOFT-FLOAT-LABEL: fmuladd_intrinsic_f32: +; SOFT-FLOAT: @ %bb.0: +; SOFT-FLOAT-NEXT: push {r4, lr} +; SOFT-FLOAT-NEXT: mov r4, r2 +; SOFT-FLOAT-NEXT: bl __mulsf3 +; SOFT-FLOAT-NEXT: mov r1, r4 +; SOFT-FLOAT-NEXT: bl __addsf3 +; SOFT-FLOAT-NEXT: pop {r4, lr} +; SOFT-FLOAT-NEXT: mov pc, lr +; +; SOFT-FLOAT-VFP32-LABEL: fmuladd_intrinsic_f32: +; SOFT-FLOAT-VFP32: @ %bb.0: +; SOFT-FLOAT-VFP32-NEXT: push {r4, lr} +; SOFT-FLOAT-VFP32-NEXT: mov r4, r2 +; SOFT-FLOAT-VFP32-NEXT: bl __mulsf3 +; SOFT-FLOAT-VFP32-NEXT: mov r1, r4 +; SOFT-FLOAT-VFP32-NEXT: bl __addsf3 +; SOFT-FLOAT-VFP32-NEXT: pop {r4, lr} +; SOFT-FLOAT-VFP32-NEXT: mov pc, lr +; +; SOFT-FLOAT-VFP64-LABEL: fmuladd_intrinsic_f32: +; SOFT-FLOAT-VFP64: @ %bb.0: +; SOFT-FLOAT-VFP64-NEXT: push {r4, lr} +; SOFT-FLOAT-VFP64-NEXT: mov r4, r2 +; SOFT-FLOAT-VFP64-NEXT: bl __mulsf3 +; SOFT-FLOAT-VFP64-NEXT: mov r1, r4 +; SOFT-FLOAT-VFP64-NEXT: bl __addsf3 +; SOFT-FLOAT-VFP64-NEXT: pop {r4, lr} +; SOFT-FLOAT-VFP64-NEXT: mov pc, lr + %result = call float @llvm.fmuladd.f32(float %a, float %b, float %c) + ret float %result +} + +define double @fmuladd_intrinsic_f64(double %a, double %b, double %c) #0 { +; SOFT-FLOAT-LABEL: fmuladd_intrinsic_f64: +; SOFT-FLOAT: @ %bb.0: +; SOFT-FLOAT-NEXT: push {r11, lr} +; SOFT-FLOAT-NEXT: bl __muldf3 +; SOFT-FLOAT-NEXT: ldr r2, [sp, #8] +; SOFT-FLOAT-NEXT: ldr r3, [sp, #12] +; SOFT-FLOAT-NEXT: bl __adddf3 +; SOFT-FLOAT-NEXT: pop {r11, lr} +; SOFT-FLOAT-NEXT: mov pc, lr +; +; SOFT-FLOAT-VFP32-LABEL: fmuladd_intrinsic_f64: +; SOFT-FLOAT-VFP32: @ %bb.0: +; SOFT-FLOAT-VFP32-NEXT: push {r11, lr} +; SOFT-FLOAT-VFP32-NEXT: bl __muldf3 +; SOFT-FLOAT-VFP32-NEXT: ldr r2, [sp, #8] +; SOFT-FLOAT-VFP32-NEXT: ldr r3, [sp, #12] +; SOFT-FLOAT-VFP32-NEXT: bl __adddf3 +; SOFT-FLOAT-VFP32-NEXT: pop {r11, lr} +; SOFT-FLOAT-VFP32-NEXT: mov pc, lr +; +; SOFT-FLOAT-VFP64-LABEL: fmuladd_intrinsic_f64: +; SOFT-FLOAT-VFP64: @ %bb.0: +; SOFT-FLOAT-VFP64-NEXT: push {r11, lr} +; SOFT-FLOAT-VFP64-NEXT: bl __muldf3 +; SOFT-FLOAT-VFP64-NEXT: ldr r2, [sp, #8] +; SOFT-FLOAT-VFP64-NEXT: ldr r3, [sp, #12] +; SOFT-FLOAT-VFP64-NEXT: bl __adddf3 +; SOFT-FLOAT-VFP64-NEXT: pop {r11, lr} +; SOFT-FLOAT-VFP64-NEXT: mov pc, lr + %result = call double @llvm.fmuladd.f64(double %a, double %b, double %c) + ret double %result +} + +define float @fmuladd_contract_f32(float %a, float %b, float %c) #0 { +; SOFT-FLOAT-LABEL: fmuladd_contract_f32: +; SOFT-FLOAT: @ %bb.0: +; SOFT-FLOAT-NEXT: push {r4, lr} +; SOFT-FLOAT-NEXT: mov r4, r2 +; SOFT-FLOAT-NEXT: bl __mulsf3 +; SOFT-FLOAT-NEXT: mov r1, r4 +; SOFT-FLOAT-NEXT: bl __addsf3 +; SOFT-FLOAT-NEXT: pop {r4, lr} +; SOFT-FLOAT-NEXT: mov pc, lr +; +; SOFT-FLOAT-VFP32-LABEL: fmuladd_contract_f32: +; SOFT-FLOAT-VFP32: @ %bb.0: +; SOFT-FLOAT-VFP32-NEXT: push {r4, lr} +; SOFT-FLOAT-VFP32-NEXT: mov r4, r2 +; SOFT-FLOAT-VFP32-NEXT: bl __mulsf3 +; SOFT-FLOAT-VFP32-NEXT: mov r1, r4 +; SOFT-FLOAT-VFP32-NEXT: bl __addsf3 +; SOFT-FLOAT-VFP32-NEXT: pop {r4, lr} +; SOFT-FLOAT-VFP32-NEXT: mov pc, lr +; +; SOFT-FLOAT-VFP64-LABEL: fmuladd_contract_f32: +; SOFT-FLOAT-VFP64: @ %bb.0: +; SOFT-FLOAT-VFP64-NEXT: push {r4, lr} +; SOFT-FLOAT-VFP64-NEXT: mov r4, r2 +; SOFT-FLOAT-VFP64-NEXT: bl __mulsf3 +; SOFT-FLOAT-VFP64-NEXT: mov r1, r4 +; SOFT-FLOAT-VFP64-NEXT: bl __addsf3 +; SOFT-FLOAT-VFP64-NEXT: pop {r4, lr} +; SOFT-FLOAT-VFP64-NEXT: mov pc, lr + %product = fmul contract float %a, %b + %result = fadd contract float %product, %c + ret float %result +} + +define double @fmuladd_contract_f64(double %a, double %b, double %c) #0 { +; SOFT-FLOAT-LABEL: fmuladd_contract_f64: +; SOFT-FLOAT: @ %bb.0: +; SOFT-FLOAT-NEXT: push {r11, lr} +; SOFT-FLOAT-NEXT: bl __muldf3 +; SOFT-FLOAT-NEXT: ldr r2, [sp, #8] +; SOFT-FLOAT-NEXT: ldr r3, [sp, #12] +; SOFT-FLOAT-NEXT: bl __adddf3 +; SOFT-FLOAT-NEXT: pop {r11, lr} +; SOFT-FLOAT-NEXT: mov pc, lr +; +; SOFT-FLOAT-VFP32-LABEL: fmuladd_contract_f64: +; SOFT-FLOAT-VFP32: @ %bb.0: +; SOFT-FLOAT-VFP32-NEXT: push {r11, lr} +; SOFT-FLOAT-VFP32-NEXT: bl __muldf3 +; SOFT-FLOAT-VFP32-NEXT: ldr r2, [sp, #8] +; SOFT-FLOAT-VFP32-NEXT: ldr r3, [sp, #12] +; SOFT-FLOAT-VFP32-NEXT: bl __adddf3 +; SOFT-FLOAT-VFP32-NEXT: pop {r11, lr} +; SOFT-FLOAT-VFP32-NEXT: mov pc, lr +; +; SOFT-FLOAT-VFP64-LABEL: fmuladd_contract_f64: +; SOFT-FLOAT-VFP64: @ %bb.0: +; SOFT-FLOAT-VFP64-NEXT: push {r11, lr} +; SOFT-FLOAT-VFP64-NEXT: bl __muldf3 +; SOFT-FLOAT-VFP64-NEXT: ldr r2, [sp, #8] +; SOFT-FLOAT-VFP64-NEXT: ldr r3, [sp, #12] +; SOFT-FLOAT-VFP64-NEXT: bl __adddf3 +; SOFT-FLOAT-VFP64-NEXT: pop {r11, lr} +; SOFT-FLOAT-VFP64-NEXT: mov pc, lr + %product = fmul contract double %a, %b + %result = fadd contract double %product, %c + ret double %result +} + +define <4 x float> @fmuladd_contract_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 { +; SOFT-FLOAT-LABEL: fmuladd_contract_v4f32: +; SOFT-FLOAT: @ %bb.0: +; SOFT-FLOAT-NEXT: push {r4, r5, r6, r7, r11, lr} +; SOFT-FLOAT-NEXT: mov r7, r1 +; SOFT-FLOAT-NEXT: ldr r1, [sp, #24] +; SOFT-FLOAT-NEXT: mov r4, r3 +; SOFT-FLOAT-NEXT: mov r6, r2 +; SOFT-FLOAT-NEXT: bl __mulsf3 +; SOFT-FLOAT-NEXT: ldr r1, [sp, #40] +; SOFT-FLOAT-NEXT: bl __addsf3 +; SOFT-FLOAT-NEXT: ldr r1, [sp, #28] +; SOFT-FLOAT-NEXT: mov r5, r0 +; SOFT-FLOAT-NEXT: mov r0, r7 +; SOFT-FLOAT-NEXT: bl __mulsf3 +; SOFT-FLOAT-NEXT: ldr r1, [sp, #44] +; SOFT-FLOAT-NEXT: bl __addsf3 +; SOFT-FLOAT-NEXT: ldr r1, [sp, #32] +; SOFT-FLOAT-NEXT: mov r7, r0 +; SOFT-FLOAT-NEXT: mov r0, r6 +; SOFT-FLOAT-NEXT: bl __mulsf3 +; SOFT-FLOAT-NEXT: ldr r1, [sp, #48] +; SOFT-FLOAT-NEXT: bl __addsf3 +; SOFT-FLOAT-NEXT: ldr r1, [sp, #36] +; SOFT-FLOAT-NEXT: mov r6, r0 +; SOFT-FLOAT-NEXT: mov r0, r4 +; SOFT-FLOAT-NEXT: bl __mulsf3 +; SOFT-FLOAT-NEXT: ldr r1, [sp, #52] +; SOFT-FLOAT-NEXT: bl __addsf3 +; SOFT-FLOAT-NEXT: mov r3, r0 +; SOFT-FLOAT-NEXT: mov r0, r5 +; SOFT-FLOAT-NEXT: mov r1, r7 +; SOFT-FLOAT-NEXT: mov r2, r6 +; SOFT-FLOAT-NEXT: pop {r4, r5, r6, r7, r11, lr} +; SOFT-FLOAT-NEXT: mov pc, lr +; +; SOFT-FLOAT-VFP32-LABEL: fmuladd_contract_v4f32: +; SOFT-FLOAT-VFP32: @ %bb.0: +; SOFT-FLOAT-VFP32-NEXT: push {r4, r5, r6, r7, r11, lr} +; SOFT-FLOAT-VFP32-NEXT: mov r7, r1 +; SOFT-FLOAT-VFP32-NEXT: ldr r1, [sp, #24] +; SOFT-FLOAT-VFP32-NEXT: mov r4, r3 +; SOFT-FLOAT-VFP32-NEXT: mov r6, r2 +; SOFT-FLOAT-VFP32-NEXT: bl __mulsf3 +; SOFT-FLOAT-VFP32-NEXT: ldr r1, [sp, #40] +; SOFT-FLOAT-VFP32-NEXT: bl __addsf3 +; SOFT-FLOAT-VFP32-NEXT: ldr r1, [sp, #28] +; SOFT-FLOAT-VFP32-NEXT: mov r5, r0 +; SOFT-FLOAT-VFP32-NEXT: mov r0, r7 +; SOFT-FLOAT-VFP32-NEXT: bl __mulsf3 +; SOFT-FLOAT-VFP32-NEXT: ldr r1, [sp, #44] +; SOFT-FLOAT-VFP32-NEXT: bl __addsf3 +; SOFT-FLOAT-VFP32-NEXT: ldr r1, [sp, #32] +; SOFT-FLOAT-VFP32-NEXT: mov r7, r0 +; SOFT-FLOAT-VFP32-NEXT: mov r0, r6 +; SOFT-FLOAT-VFP32-NEXT: bl __mulsf3 +; SOFT-FLOAT-VFP32-NEXT: ldr r1, [sp, #48] +; SOFT-FLOAT-VFP32-NEXT: bl __addsf3 +; SOFT-FLOAT-VFP32-NEXT: ldr r1, [sp, #36] +; SOFT-FLOAT-VFP32-NEXT: mov r6, r0 +; SOFT-FLOAT-VFP32-NEXT: mov r0, r4 +; SOFT-FLOAT-VFP32-NEXT: bl __mulsf3 +; SOFT-FLOAT-VFP32-NEXT: ldr r1, [sp, #52] +; SOFT-FLOAT-VFP32-NEXT: bl __addsf3 +; SOFT-FLOAT-VFP32-NEXT: mov r3, r0 +; SOFT-FLOAT-VFP32-NEXT: mov r0, r5 +; SOFT-FLOAT-VFP32-NEXT: mov r1, r7 +; SOFT-FLOAT-VFP32-NEXT: mov r2, r6 +; SOFT-FLOAT-VFP32-NEXT: pop {r4, r5, r6, r7, r11, lr} +; SOFT-FLOAT-VFP32-NEXT: mov pc, lr +; +; SOFT-FLOAT-VFP64-LABEL: fmuladd_contract_v4f32: +; SOFT-FLOAT-VFP64: @ %bb.0: +; SOFT-FLOAT-VFP64-NEXT: push {r4, r5, r6, r7, r11, lr} +; SOFT-FLOAT-VFP64-NEXT: mov r7, r1 +; SOFT-FLOAT-VFP64-NEXT: ldr r1, [sp, #24] +; SOFT-FLOAT-VFP64-NEXT: mov r4, r3 +; SOFT-FLOAT-VFP64-NEXT: mov r6, r2 +; SOFT-FLOAT-VFP64-NEXT: bl __mulsf3 +; SOFT-FLOAT-VFP64-NEXT: ldr r1, [sp, #40] +; SOFT-FLOAT-VFP64-NEXT: bl __addsf3 +; SOFT-FLOAT-VFP64-NEXT: ldr r1, [sp, #28] +; SOFT-FLOAT-VFP64-NEXT: mov r5, r0 +; SOFT-FLOAT-VFP64-NEXT: mov r0, r7 +; SOFT-FLOAT-VFP64-NEXT: bl __mulsf3 +; SOFT-FLOAT-VFP64-NEXT: ldr r1, [sp, #44] +; SOFT-FLOAT-VFP64-NEXT: bl __addsf3 +; SOFT-FLOAT-VFP64-NEXT: ldr r1, [sp, #32] +; SOFT-FLOAT-VFP64-NEXT: mov r7, r0 +; SOFT-FLOAT-VFP64-NEXT: mov r0, r6 +; SOFT-FLOAT-VFP64-NEXT: bl __mulsf3 +; SOFT-FLOAT-VFP64-NEXT: ldr r1, [sp, #48] +; SOFT-FLOAT-VFP64-NEXT: bl __addsf3 +; SOFT-FLOAT-VFP64-NEXT: ldr r1, [sp, #36] +; SOFT-FLOAT-VFP64-NEXT: mov r6, r0 +; SOFT-FLOAT-VFP64-NEXT: mov r0, r4 +; SOFT-FLOAT-VFP64-NEXT: bl __mulsf3 +; SOFT-FLOAT-VFP64-NEXT: ldr r1, [sp, #52] +; SOFT-FLOAT-VFP64-NEXT: bl __addsf3 +; SOFT-FLOAT-VFP64-NEXT: mov r3, r0 +; SOFT-FLOAT-VFP64-NEXT: mov r0, r5 +; SOFT-FLOAT-VFP64-NEXT: mov r1, r7 +; SOFT-FLOAT-VFP64-NEXT: mov r2, r6 +; SOFT-FLOAT-VFP64-NEXT: pop {r4, r5, r6, r7, r11, lr} +; SOFT-FLOAT-VFP64-NEXT: mov pc, lr + %product = fmul contract <4 x float> %a, %b + %result = fadd contract <4 x float> %product, %c + ret <4 x float> %result +} + +define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) #0 { +; SOFT-FLOAT-LABEL: fmuladd_contract_v4f64: +; SOFT-FLOAT: @ %bb.0: +; SOFT-FLOAT-NEXT: push {r4, r5, r6, lr} +; SOFT-FLOAT-NEXT: mov r5, r3 +; SOFT-FLOAT-NEXT: mov r6, r2 +; SOFT-FLOAT-NEXT: mov r4, r0 +; SOFT-FLOAT-NEXT: ldr r0, [sp, #32] +; SOFT-FLOAT-NEXT: ldr r1, [sp, #36] +; SOFT-FLOAT-NEXT: ldr r2, [sp, #64] +; SOFT-FLOAT-NEXT: ldr r3, [sp, #68] +; SOFT-FLOAT-NEXT: bl __muldf3 +; SOFT-FLOAT-NEXT: ldr r2, [sp, #96] +; SOFT-FLOAT-NEXT: ldr r3, [sp, #100] +; SOFT-FLOAT-NEXT: bl __adddf3 +; SOFT-FLOAT-NEXT: str r0, [r4, #24] +; SOFT-FLOAT-NEXT: str r1, [r4, #28] +; SOFT-FLOAT-NEXT: ldr r0, [sp, #24] +; SOFT-FLOAT-NEXT: ldr r1, [sp, #28] +; SOFT-FLOAT-NEXT: ldr r2, [sp, #56] +; SOFT-FLOAT-NEXT: ldr r3, [sp, #60] +; SOFT-FLOAT-NEXT: bl __muldf3 +; SOFT-FLOAT-NEXT: ldr r2, [sp, #88] +; SOFT-FLOAT-NEXT: ldr r3, [sp, #92] +; SOFT-FLOAT-NEXT: bl __adddf3 +; SOFT-FLOAT-NEXT: str r0, [r4, #16] +; SOFT-FLOAT-NEXT: str r1, [r4, #20] +; SOFT-FLOAT-NEXT: ldr r0, [sp, #16] +; SOFT-FLOAT-NEXT: ldr r1, [sp, #20] +; SOFT-FLOAT-NEXT: ldr r2, [sp, #48] +; SOFT-FLOAT-NEXT: ldr r3, [sp, #52] +; SOFT-FLOAT-NEXT: bl __muldf3 +; SOFT-FLOAT-NEXT: ldr r2, [sp, #80] +; SOFT-FLOAT-NEXT: ldr r3, [sp, #84] +; SOFT-FLOAT-NEXT: bl __adddf3 +; SOFT-FLOAT-NEXT: ldr r2, [sp, #40] +; SOFT-FLOAT-NEXT: ldr r3, [sp, #44] +; SOFT-FLOAT-NEXT: str r0, [r4, #8] +; SOFT-FLOAT-NEXT: mov r0, r6 +; SOFT-FLOAT-NEXT: str r1, [r4, #12] +; SOFT-FLOAT-NEXT: mov r1, r5 +; SOFT-FLOAT-NEXT: bl __muldf3 +; SOFT-FLOAT-NEXT: ldr r2, [sp, #72] +; SOFT-FLOAT-NEXT: ldr r3, [sp, #76] +; SOFT-FLOAT-NEXT: bl __adddf3 +; SOFT-FLOAT-NEXT: stm r4, {r0, r1} +; SOFT-FLOAT-NEXT: pop {r4, r5, r6, lr} +; SOFT-FLOAT-NEXT: mov pc, lr +; +; SOFT-FLOAT-VFP32-LABEL: fmuladd_contract_v4f64: +; SOFT-FLOAT-VFP32: @ %bb.0: +; SOFT-FLOAT-VFP32-NEXT: push {r4, r5, r6, lr} +; SOFT-FLOAT-VFP32-NEXT: mov r5, r3 +; SOFT-FLOAT-VFP32-NEXT: mov r6, r2 +; SOFT-FLOAT-VFP32-NEXT: mov r4, r0 +; SOFT-FLOAT-VFP32-NEXT: ldr r0, [sp, #32] +; SOFT-FLOAT-VFP32-NEXT: ldr r1, [sp, #36] +; SOFT-FLOAT-VFP32-NEXT: ldr r2, [sp, #64] +; SOFT-FLOAT-VFP32-NEXT: ldr r3, [sp, #68] +; SOFT-FLOAT-VFP32-NEXT: bl __muldf3 +; SOFT-FLOAT-VFP32-NEXT: ldr r2, [sp, #96] +; SOFT-FLOAT-VFP32-NEXT: ldr r3, [sp, #100] +; SOFT-FLOAT-VFP32-NEXT: bl __adddf3 +; SOFT-FLOAT-VFP32-NEXT: str r0, [r4, #24] +; SOFT-FLOAT-VFP32-NEXT: str r1, [r4, #28] +; SOFT-FLOAT-VFP32-NEXT: ldr r0, [sp, #24] +; SOFT-FLOAT-VFP32-NEXT: ldr r1, [sp, #28] +; SOFT-FLOAT-VFP32-NEXT: ldr r2, [sp, #56] +; SOFT-FLOAT-VFP32-NEXT: ldr r3, [sp, #60] +; SOFT-FLOAT-VFP32-NEXT: bl __muldf3 +; SOFT-FLOAT-VFP32-NEXT: ldr r2, [sp, #88] +; SOFT-FLOAT-VFP32-NEXT: ldr r3, [sp, #92] +; SOFT-FLOAT-VFP32-NEXT: bl __adddf3 +; SOFT-FLOAT-VFP32-NEXT: str r0, [r4, #16] +; SOFT-FLOAT-VFP32-NEXT: str r1, [r4, #20] +; SOFT-FLOAT-VFP32-NEXT: ldr r0, [sp, #16] +; SOFT-FLOAT-VFP32-NEXT: ldr r1, [sp, #20] +; SOFT-FLOAT-VFP32-NEXT: ldr r2, [sp, #48] +; SOFT-FLOAT-VFP32-NEXT: ldr r3, [sp, #52] +; SOFT-FLOAT-VFP32-NEXT: bl __muldf3 +; SOFT-FLOAT-VFP32-NEXT: ldr r2, [sp, #80] +; SOFT-FLOAT-VFP32-NEXT: ldr r3, [sp, #84] +; SOFT-FLOAT-VFP32-NEXT: bl __adddf3 +; SOFT-FLOAT-VFP32-NEXT: ldr r2, [sp, #40] +; SOFT-FLOAT-VFP32-NEXT: ldr r3, [sp, #44] +; SOFT-FLOAT-VFP32-NEXT: str r0, [r4, #8] +; SOFT-FLOAT-VFP32-NEXT: mov r0, r6 +; SOFT-FLOAT-VFP32-NEXT: str r1, [r4, #12] +; SOFT-FLOAT-VFP32-NEXT: mov r1, r5 +; SOFT-FLOAT-VFP32-NEXT: bl __muldf3 +; SOFT-FLOAT-VFP32-NEXT: ldr r2, [sp, #72] +; SOFT-FLOAT-VFP32-NEXT: ldr r3, [sp, #76] +; SOFT-FLOAT-VFP32-NEXT: bl __adddf3 +; SOFT-FLOAT-VFP32-NEXT: stm r4, {r0, r1} +; SOFT-FLOAT-VFP32-NEXT: pop {r4, r5, r6, lr} +; SOFT-FLOAT-VFP32-NEXT: mov pc, lr +; +; SOFT-FLOAT-VFP64-LABEL: fmuladd_contract_v4f64: +; SOFT-FLOAT-VFP64: @ %bb.0: +; SOFT-FLOAT-VFP64-NEXT: push {r4, r5, r6, lr} +; SOFT-FLOAT-VFP64-NEXT: mov r5, r3 +; SOFT-FLOAT-VFP64-NEXT: mov r6, r2 +; SOFT-FLOAT-VFP64-NEXT: mov r4, r0 +; SOFT-FLOAT-VFP64-NEXT: ldr r0, [sp, #32] +; SOFT-FLOAT-VFP64-NEXT: ldr r1, [sp, #36] +; SOFT-FLOAT-VFP64-NEXT: ldr r2, [sp, #64] +; SOFT-FLOAT-VFP64-NEXT: ldr r3, [sp, #68] +; SOFT-FLOAT-VFP64-NEXT: bl __muldf3 +; SOFT-FLOAT-VFP64-NEXT: ldr r2, [sp, #96] +; SOFT-FLOAT-VFP64-NEXT: ldr r3, [sp, #100] +; SOFT-FLOAT-VFP64-NEXT: bl __adddf3 +; SOFT-FLOAT-VFP64-NEXT: str r0, [r4, #24] +; SOFT-FLOAT-VFP64-NEXT: str r1, [r4, #28] +; SOFT-FLOAT-VFP64-NEXT: ldr r0, [sp, #24] +; SOFT-FLOAT-VFP64-NEXT: ldr r1, [sp, #28] +; SOFT-FLOAT-VFP64-NEXT: ldr r2, [sp, #56] +; SOFT-FLOAT-VFP64-NEXT: ldr r3, [sp, #60] +; SOFT-FLOAT-VFP64-NEXT: bl __muldf3 +; SOFT-FLOAT-VFP64-NEXT: ldr r2, [sp, #88] +; SOFT-FLOAT-VFP64-NEXT: ldr r3, [sp, #92] +; SOFT-FLOAT-VFP64-NEXT: bl __adddf3 +; SOFT-FLOAT-VFP64-NEXT: str r0, [r4, #16] +; SOFT-FLOAT-VFP64-NEXT: str r1, [r4, #20] +; SOFT-FLOAT-VFP64-NEXT: ldr r0, [sp, #16] +; SOFT-FLOAT-VFP64-NEXT: ldr r1, [sp, #20] +; SOFT-FLOAT-VFP64-NEXT: ldr r2, [sp, #48] +; SOFT-FLOAT-VFP64-NEXT: ldr r3, [sp, #52] +; SOFT-FLOAT-VFP64-NEXT: bl __muldf3 +; SOFT-FLOAT-VFP64-NEXT: ldr r2, [sp, #80] +; SOFT-FLOAT-VFP64-NEXT: ldr r3, [sp, #84] +; SOFT-FLOAT-VFP64-NEXT: bl __adddf3 +; SOFT-FLOAT-VFP64-NEXT: ldr r2, [sp, #40] +; SOFT-FLOAT-VFP64-NEXT: ldr r3, [sp, #44] +; SOFT-FLOAT-VFP64-NEXT: str r0, [r4, #8] +; SOFT-FLOAT-VFP64-NEXT: mov r0, r6 +; SOFT-FLOAT-VFP64-NEXT: str r1, [r4, #12] +; SOFT-FLOAT-VFP64-NEXT: mov r1, r5 +; SOFT-FLOAT-VFP64-NEXT: bl __muldf3 +; SOFT-FLOAT-VFP64-NEXT: ldr r2, [sp, #72] +; SOFT-FLOAT-VFP64-NEXT: ldr r3, [sp, #76] +; SOFT-FLOAT-VFP64-NEXT: bl __adddf3 +; SOFT-FLOAT-VFP64-NEXT: stm r4, {r0, r1} +; SOFT-FLOAT-VFP64-NEXT: pop {r4, r5, r6, lr} +; SOFT-FLOAT-VFP64-NEXT: mov pc, lr + %product = fmul contract <4 x double> %a, %b + %result = fadd contract <4 x double> %product, %c + ret <4 x double> %result +} + +attributes #0 = { "use-soft-float"="true" } + +declare float @llvm.fmuladd.f32(float %a, float %b, float %c) +declare double @llvm.fmuladd.f64(double %a, double %b, double %c) diff --git a/llvm/test/CodeGen/Mips/fmuladd-soft-float.ll b/llvm/test/CodeGen/Mips/fmuladd-soft-float.ll new file mode 100644 index 00000000000000..bbfb7cf9ca907a --- /dev/null +++ b/llvm/test/CodeGen/Mips/fmuladd-soft-float.ll @@ -0,0 +1,932 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=mips < %s | FileCheck %s -check-prefix=SOFT-FLOAT-32 +; RUN: llc -mtriple=mips -mcpu mips32r2 < %s | FileCheck %s -check-prefix=SOFT-FLOAT-32R2 +; RUN: llc -mtriple=mips64 < %s | FileCheck %s -check-prefix=SOFT-FLOAT-64 +; RUN: llc -mtriple=mips64 -mcpu mips64r2 < %s | FileCheck %s -check-prefix=SOFT-FLOAT-64R2 + +define float @fmuladd_intrinsic_f32(float %a, float %b, float %c) #0 { +; SOFT-FLOAT-32-LABEL: fmuladd_intrinsic_f32: +; SOFT-FLOAT-32: # %bb.0: +; SOFT-FLOAT-32-NEXT: addiu $sp, $sp, -24 +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: sw $16, 16($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: .cfi_offset 31, -4 +; SOFT-FLOAT-32-NEXT: .cfi_offset 16, -8 +; SOFT-FLOAT-32-NEXT: jal __mulsf3 +; SOFT-FLOAT-32-NEXT: move $16, $6 +; SOFT-FLOAT-32-NEXT: move $4, $2 +; SOFT-FLOAT-32-NEXT: jal __addsf3 +; SOFT-FLOAT-32-NEXT: move $5, $16 +; SOFT-FLOAT-32-NEXT: lw $16, 16($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: jr $ra +; SOFT-FLOAT-32-NEXT: addiu $sp, $sp, 24 +; +; SOFT-FLOAT-32R2-LABEL: fmuladd_intrinsic_f32: +; SOFT-FLOAT-32R2: # %bb.0: +; SOFT-FLOAT-32R2-NEXT: addiu $sp, $sp, -24 +; SOFT-FLOAT-32R2-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-32R2-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: sw $16, 16($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 31, -4 +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 16, -8 +; SOFT-FLOAT-32R2-NEXT: jal __mulsf3 +; SOFT-FLOAT-32R2-NEXT: move $16, $6 +; SOFT-FLOAT-32R2-NEXT: move $4, $2 +; SOFT-FLOAT-32R2-NEXT: jal __addsf3 +; SOFT-FLOAT-32R2-NEXT: move $5, $16 +; SOFT-FLOAT-32R2-NEXT: lw $16, 16($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: jr $ra +; SOFT-FLOAT-32R2-NEXT: addiu $sp, $sp, 24 +; +; SOFT-FLOAT-64-LABEL: fmuladd_intrinsic_f32: +; SOFT-FLOAT-64: # %bb.0: +; SOFT-FLOAT-64-NEXT: daddiu $sp, $sp, -16 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: sd $16, 0($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: .cfi_offset 31, -8 +; SOFT-FLOAT-64-NEXT: .cfi_offset 16, -16 +; SOFT-FLOAT-64-NEXT: move $16, $6 +; SOFT-FLOAT-64-NEXT: sll $4, $4, 0 +; SOFT-FLOAT-64-NEXT: jal __mulsf3 +; SOFT-FLOAT-64-NEXT: sll $5, $5, 0 +; SOFT-FLOAT-64-NEXT: sll $4, $2, 0 +; SOFT-FLOAT-64-NEXT: jal __addsf3 +; SOFT-FLOAT-64-NEXT: sll $5, $16, 0 +; SOFT-FLOAT-64-NEXT: ld $16, 0($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: jr $ra +; SOFT-FLOAT-64-NEXT: daddiu $sp, $sp, 16 +; +; SOFT-FLOAT-64R2-LABEL: fmuladd_intrinsic_f32: +; SOFT-FLOAT-64R2: # %bb.0: +; SOFT-FLOAT-64R2-NEXT: daddiu $sp, $sp, -16 +; SOFT-FLOAT-64R2-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64R2-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: sd $16, 0($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 31, -8 +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 16, -16 +; SOFT-FLOAT-64R2-NEXT: move $16, $6 +; SOFT-FLOAT-64R2-NEXT: sll $4, $4, 0 +; SOFT-FLOAT-64R2-NEXT: jal __mulsf3 +; SOFT-FLOAT-64R2-NEXT: sll $5, $5, 0 +; SOFT-FLOAT-64R2-NEXT: sll $4, $2, 0 +; SOFT-FLOAT-64R2-NEXT: jal __addsf3 +; SOFT-FLOAT-64R2-NEXT: sll $5, $16, 0 +; SOFT-FLOAT-64R2-NEXT: ld $16, 0($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: jr $ra +; SOFT-FLOAT-64R2-NEXT: daddiu $sp, $sp, 16 + %result = call float @llvm.fmuladd.f32(float %a, float %b, float %c) + ret float %result +} + +define double @fmuladd_intrinsic_f64(double %a, double %b, double %c) #0 { +; SOFT-FLOAT-32-LABEL: fmuladd_intrinsic_f64: +; SOFT-FLOAT-32: # %bb.0: +; SOFT-FLOAT-32-NEXT: addiu $sp, $sp, -24 +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: .cfi_offset 31, -4 +; SOFT-FLOAT-32-NEXT: jal __muldf3 +; SOFT-FLOAT-32-NEXT: nop +; SOFT-FLOAT-32-NEXT: move $4, $2 +; SOFT-FLOAT-32-NEXT: lw $6, 40($sp) +; SOFT-FLOAT-32-NEXT: lw $7, 44($sp) +; SOFT-FLOAT-32-NEXT: jal __adddf3 +; SOFT-FLOAT-32-NEXT: move $5, $3 +; SOFT-FLOAT-32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: jr $ra +; SOFT-FLOAT-32-NEXT: addiu $sp, $sp, 24 +; +; SOFT-FLOAT-32R2-LABEL: fmuladd_intrinsic_f64: +; SOFT-FLOAT-32R2: # %bb.0: +; SOFT-FLOAT-32R2-NEXT: addiu $sp, $sp, -24 +; SOFT-FLOAT-32R2-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-32R2-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 31, -4 +; SOFT-FLOAT-32R2-NEXT: jal __muldf3 +; SOFT-FLOAT-32R2-NEXT: nop +; SOFT-FLOAT-32R2-NEXT: move $4, $2 +; SOFT-FLOAT-32R2-NEXT: lw $6, 40($sp) +; SOFT-FLOAT-32R2-NEXT: lw $7, 44($sp) +; SOFT-FLOAT-32R2-NEXT: jal __adddf3 +; SOFT-FLOAT-32R2-NEXT: move $5, $3 +; SOFT-FLOAT-32R2-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: jr $ra +; SOFT-FLOAT-32R2-NEXT: addiu $sp, $sp, 24 +; +; SOFT-FLOAT-64-LABEL: fmuladd_intrinsic_f64: +; SOFT-FLOAT-64: # %bb.0: +; SOFT-FLOAT-64-NEXT: daddiu $sp, $sp, -16 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: sd $16, 0($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: .cfi_offset 31, -8 +; SOFT-FLOAT-64-NEXT: .cfi_offset 16, -16 +; SOFT-FLOAT-64-NEXT: jal __muldf3 +; SOFT-FLOAT-64-NEXT: move $16, $6 +; SOFT-FLOAT-64-NEXT: move $4, $2 +; SOFT-FLOAT-64-NEXT: jal __adddf3 +; SOFT-FLOAT-64-NEXT: move $5, $16 +; SOFT-FLOAT-64-NEXT: ld $16, 0($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: jr $ra +; SOFT-FLOAT-64-NEXT: daddiu $sp, $sp, 16 +; +; SOFT-FLOAT-64R2-LABEL: fmuladd_intrinsic_f64: +; SOFT-FLOAT-64R2: # %bb.0: +; SOFT-FLOAT-64R2-NEXT: daddiu $sp, $sp, -16 +; SOFT-FLOAT-64R2-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64R2-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: sd $16, 0($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 31, -8 +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 16, -16 +; SOFT-FLOAT-64R2-NEXT: jal __muldf3 +; SOFT-FLOAT-64R2-NEXT: move $16, $6 +; SOFT-FLOAT-64R2-NEXT: move $4, $2 +; SOFT-FLOAT-64R2-NEXT: jal __adddf3 +; SOFT-FLOAT-64R2-NEXT: move $5, $16 +; SOFT-FLOAT-64R2-NEXT: ld $16, 0($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: jr $ra +; SOFT-FLOAT-64R2-NEXT: daddiu $sp, $sp, 16 + %result = call double @llvm.fmuladd.f64(double %a, double %b, double %c) + ret double %result +} + +define float @fmuladd_contract_f32(float %a, float %b, float %c) #0 { +; SOFT-FLOAT-32-LABEL: fmuladd_contract_f32: +; SOFT-FLOAT-32: # %bb.0: +; SOFT-FLOAT-32-NEXT: addiu $sp, $sp, -24 +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: sw $16, 16($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: .cfi_offset 31, -4 +; SOFT-FLOAT-32-NEXT: .cfi_offset 16, -8 +; SOFT-FLOAT-32-NEXT: jal __mulsf3 +; SOFT-FLOAT-32-NEXT: move $16, $6 +; SOFT-FLOAT-32-NEXT: move $4, $2 +; SOFT-FLOAT-32-NEXT: jal __addsf3 +; SOFT-FLOAT-32-NEXT: move $5, $16 +; SOFT-FLOAT-32-NEXT: lw $16, 16($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: jr $ra +; SOFT-FLOAT-32-NEXT: addiu $sp, $sp, 24 +; +; SOFT-FLOAT-32R2-LABEL: fmuladd_contract_f32: +; SOFT-FLOAT-32R2: # %bb.0: +; SOFT-FLOAT-32R2-NEXT: addiu $sp, $sp, -24 +; SOFT-FLOAT-32R2-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-32R2-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: sw $16, 16($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 31, -4 +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 16, -8 +; SOFT-FLOAT-32R2-NEXT: jal __mulsf3 +; SOFT-FLOAT-32R2-NEXT: move $16, $6 +; SOFT-FLOAT-32R2-NEXT: move $4, $2 +; SOFT-FLOAT-32R2-NEXT: jal __addsf3 +; SOFT-FLOAT-32R2-NEXT: move $5, $16 +; SOFT-FLOAT-32R2-NEXT: lw $16, 16($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: jr $ra +; SOFT-FLOAT-32R2-NEXT: addiu $sp, $sp, 24 +; +; SOFT-FLOAT-64-LABEL: fmuladd_contract_f32: +; SOFT-FLOAT-64: # %bb.0: +; SOFT-FLOAT-64-NEXT: daddiu $sp, $sp, -16 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: sd $16, 0($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: .cfi_offset 31, -8 +; SOFT-FLOAT-64-NEXT: .cfi_offset 16, -16 +; SOFT-FLOAT-64-NEXT: move $16, $6 +; SOFT-FLOAT-64-NEXT: sll $4, $4, 0 +; SOFT-FLOAT-64-NEXT: jal __mulsf3 +; SOFT-FLOAT-64-NEXT: sll $5, $5, 0 +; SOFT-FLOAT-64-NEXT: sll $4, $2, 0 +; SOFT-FLOAT-64-NEXT: jal __addsf3 +; SOFT-FLOAT-64-NEXT: sll $5, $16, 0 +; SOFT-FLOAT-64-NEXT: ld $16, 0($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: jr $ra +; SOFT-FLOAT-64-NEXT: daddiu $sp, $sp, 16 +; +; SOFT-FLOAT-64R2-LABEL: fmuladd_contract_f32: +; SOFT-FLOAT-64R2: # %bb.0: +; SOFT-FLOAT-64R2-NEXT: daddiu $sp, $sp, -16 +; SOFT-FLOAT-64R2-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64R2-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: sd $16, 0($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 31, -8 +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 16, -16 +; SOFT-FLOAT-64R2-NEXT: move $16, $6 +; SOFT-FLOAT-64R2-NEXT: sll $4, $4, 0 +; SOFT-FLOAT-64R2-NEXT: jal __mulsf3 +; SOFT-FLOAT-64R2-NEXT: sll $5, $5, 0 +; SOFT-FLOAT-64R2-NEXT: sll $4, $2, 0 +; SOFT-FLOAT-64R2-NEXT: jal __addsf3 +; SOFT-FLOAT-64R2-NEXT: sll $5, $16, 0 +; SOFT-FLOAT-64R2-NEXT: ld $16, 0($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: jr $ra +; SOFT-FLOAT-64R2-NEXT: daddiu $sp, $sp, 16 + %product = fmul contract float %a, %b + %result = fadd contract float %product, %c + ret float %result +} + +define double @fmuladd_contract_f64(double %a, double %b, double %c) #0 { +; SOFT-FLOAT-32-LABEL: fmuladd_contract_f64: +; SOFT-FLOAT-32: # %bb.0: +; SOFT-FLOAT-32-NEXT: addiu $sp, $sp, -24 +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: .cfi_offset 31, -4 +; SOFT-FLOAT-32-NEXT: jal __muldf3 +; SOFT-FLOAT-32-NEXT: nop +; SOFT-FLOAT-32-NEXT: move $4, $2 +; SOFT-FLOAT-32-NEXT: lw $6, 40($sp) +; SOFT-FLOAT-32-NEXT: lw $7, 44($sp) +; SOFT-FLOAT-32-NEXT: jal __adddf3 +; SOFT-FLOAT-32-NEXT: move $5, $3 +; SOFT-FLOAT-32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: jr $ra +; SOFT-FLOAT-32-NEXT: addiu $sp, $sp, 24 +; +; SOFT-FLOAT-32R2-LABEL: fmuladd_contract_f64: +; SOFT-FLOAT-32R2: # %bb.0: +; SOFT-FLOAT-32R2-NEXT: addiu $sp, $sp, -24 +; SOFT-FLOAT-32R2-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-32R2-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 31, -4 +; SOFT-FLOAT-32R2-NEXT: jal __muldf3 +; SOFT-FLOAT-32R2-NEXT: nop +; SOFT-FLOAT-32R2-NEXT: move $4, $2 +; SOFT-FLOAT-32R2-NEXT: lw $6, 40($sp) +; SOFT-FLOAT-32R2-NEXT: lw $7, 44($sp) +; SOFT-FLOAT-32R2-NEXT: jal __adddf3 +; SOFT-FLOAT-32R2-NEXT: move $5, $3 +; SOFT-FLOAT-32R2-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: jr $ra +; SOFT-FLOAT-32R2-NEXT: addiu $sp, $sp, 24 +; +; SOFT-FLOAT-64-LABEL: fmuladd_contract_f64: +; SOFT-FLOAT-64: # %bb.0: +; SOFT-FLOAT-64-NEXT: daddiu $sp, $sp, -16 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: sd $16, 0($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: .cfi_offset 31, -8 +; SOFT-FLOAT-64-NEXT: .cfi_offset 16, -16 +; SOFT-FLOAT-64-NEXT: jal __muldf3 +; SOFT-FLOAT-64-NEXT: move $16, $6 +; SOFT-FLOAT-64-NEXT: move $4, $2 +; SOFT-FLOAT-64-NEXT: jal __adddf3 +; SOFT-FLOAT-64-NEXT: move $5, $16 +; SOFT-FLOAT-64-NEXT: ld $16, 0($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: jr $ra +; SOFT-FLOAT-64-NEXT: daddiu $sp, $sp, 16 +; +; SOFT-FLOAT-64R2-LABEL: fmuladd_contract_f64: +; SOFT-FLOAT-64R2: # %bb.0: +; SOFT-FLOAT-64R2-NEXT: daddiu $sp, $sp, -16 +; SOFT-FLOAT-64R2-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64R2-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: sd $16, 0($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 31, -8 +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 16, -16 +; SOFT-FLOAT-64R2-NEXT: jal __muldf3 +; SOFT-FLOAT-64R2-NEXT: move $16, $6 +; SOFT-FLOAT-64R2-NEXT: move $4, $2 +; SOFT-FLOAT-64R2-NEXT: jal __adddf3 +; SOFT-FLOAT-64R2-NEXT: move $5, $16 +; SOFT-FLOAT-64R2-NEXT: ld $16, 0($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: jr $ra +; SOFT-FLOAT-64R2-NEXT: daddiu $sp, $sp, 16 + %product = fmul contract double %a, %b + %result = fadd contract double %product, %c + ret double %result +} + +define <4 x float> @fmuladd_contract_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 { +; SOFT-FLOAT-32-LABEL: fmuladd_contract_v4f32: +; SOFT-FLOAT-32: # %bb.0: +; SOFT-FLOAT-32-NEXT: addiu $sp, $sp, -48 +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 48 +; SOFT-FLOAT-32-NEXT: sw $ra, 44($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: sw $21, 40($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: sw $20, 36($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: sw $19, 32($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: sw $18, 28($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: sw $17, 24($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: sw $16, 20($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: .cfi_offset 31, -4 +; SOFT-FLOAT-32-NEXT: .cfi_offset 21, -8 +; SOFT-FLOAT-32-NEXT: .cfi_offset 20, -12 +; SOFT-FLOAT-32-NEXT: .cfi_offset 19, -16 +; SOFT-FLOAT-32-NEXT: .cfi_offset 18, -20 +; SOFT-FLOAT-32-NEXT: .cfi_offset 17, -24 +; SOFT-FLOAT-32-NEXT: .cfi_offset 16, -28 +; SOFT-FLOAT-32-NEXT: move $17, $7 +; SOFT-FLOAT-32-NEXT: move $16, $4 +; SOFT-FLOAT-32-NEXT: lw $4, 64($sp) +; SOFT-FLOAT-32-NEXT: lw $5, 80($sp) +; SOFT-FLOAT-32-NEXT: jal __mulsf3 +; SOFT-FLOAT-32-NEXT: move $18, $6 +; SOFT-FLOAT-32-NEXT: lw $5, 96($sp) +; SOFT-FLOAT-32-NEXT: jal __addsf3 +; SOFT-FLOAT-32-NEXT: move $4, $2 +; SOFT-FLOAT-32-NEXT: lw $4, 68($sp) +; SOFT-FLOAT-32-NEXT: lw $5, 84($sp) +; SOFT-FLOAT-32-NEXT: jal __mulsf3 +; SOFT-FLOAT-32-NEXT: move $19, $2 +; SOFT-FLOAT-32-NEXT: lw $5, 100($sp) +; SOFT-FLOAT-32-NEXT: jal __addsf3 +; SOFT-FLOAT-32-NEXT: move $4, $2 +; SOFT-FLOAT-32-NEXT: move $20, $2 +; SOFT-FLOAT-32-NEXT: lw $5, 76($sp) +; SOFT-FLOAT-32-NEXT: jal __mulsf3 +; SOFT-FLOAT-32-NEXT: move $4, $17 +; SOFT-FLOAT-32-NEXT: move $4, $2 +; SOFT-FLOAT-32-NEXT: lw $17, 88($sp) +; SOFT-FLOAT-32-NEXT: lw $21, 72($sp) +; SOFT-FLOAT-32-NEXT: lw $5, 92($sp) +; SOFT-FLOAT-32-NEXT: sw $20, 12($16) +; SOFT-FLOAT-32-NEXT: jal __addsf3 +; SOFT-FLOAT-32-NEXT: sw $19, 8($16) +; SOFT-FLOAT-32-NEXT: sw $2, 4($16) +; SOFT-FLOAT-32-NEXT: move $4, $18 +; SOFT-FLOAT-32-NEXT: jal __mulsf3 +; SOFT-FLOAT-32-NEXT: move $5, $21 +; SOFT-FLOAT-32-NEXT: move $4, $2 +; SOFT-FLOAT-32-NEXT: jal __addsf3 +; SOFT-FLOAT-32-NEXT: move $5, $17 +; SOFT-FLOAT-32-NEXT: sw $2, 0($16) +; SOFT-FLOAT-32-NEXT: lw $16, 20($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: lw $17, 24($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: lw $18, 28($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: lw $19, 32($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: lw $20, 36($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: lw $21, 40($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: lw $ra, 44($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: jr $ra +; SOFT-FLOAT-32-NEXT: addiu $sp, $sp, 48 +; +; SOFT-FLOAT-32R2-LABEL: fmuladd_contract_v4f32: +; SOFT-FLOAT-32R2: # %bb.0: +; SOFT-FLOAT-32R2-NEXT: addiu $sp, $sp, -48 +; SOFT-FLOAT-32R2-NEXT: .cfi_def_cfa_offset 48 +; SOFT-FLOAT-32R2-NEXT: sw $ra, 44($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: sw $21, 40($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: sw $20, 36($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: sw $19, 32($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: sw $18, 28($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: sw $17, 24($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: sw $16, 20($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 31, -4 +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 21, -8 +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 20, -12 +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 19, -16 +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 18, -20 +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 17, -24 +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 16, -28 +; SOFT-FLOAT-32R2-NEXT: move $17, $7 +; SOFT-FLOAT-32R2-NEXT: move $16, $4 +; SOFT-FLOAT-32R2-NEXT: lw $4, 64($sp) +; SOFT-FLOAT-32R2-NEXT: lw $5, 80($sp) +; SOFT-FLOAT-32R2-NEXT: jal __mulsf3 +; SOFT-FLOAT-32R2-NEXT: move $18, $6 +; SOFT-FLOAT-32R2-NEXT: lw $5, 96($sp) +; SOFT-FLOAT-32R2-NEXT: jal __addsf3 +; SOFT-FLOAT-32R2-NEXT: move $4, $2 +; SOFT-FLOAT-32R2-NEXT: lw $4, 68($sp) +; SOFT-FLOAT-32R2-NEXT: lw $5, 84($sp) +; SOFT-FLOAT-32R2-NEXT: jal __mulsf3 +; SOFT-FLOAT-32R2-NEXT: move $19, $2 +; SOFT-FLOAT-32R2-NEXT: lw $5, 100($sp) +; SOFT-FLOAT-32R2-NEXT: jal __addsf3 +; SOFT-FLOAT-32R2-NEXT: move $4, $2 +; SOFT-FLOAT-32R2-NEXT: move $20, $2 +; SOFT-FLOAT-32R2-NEXT: lw $5, 76($sp) +; SOFT-FLOAT-32R2-NEXT: jal __mulsf3 +; SOFT-FLOAT-32R2-NEXT: move $4, $17 +; SOFT-FLOAT-32R2-NEXT: move $4, $2 +; SOFT-FLOAT-32R2-NEXT: lw $17, 88($sp) +; SOFT-FLOAT-32R2-NEXT: lw $21, 72($sp) +; SOFT-FLOAT-32R2-NEXT: lw $5, 92($sp) +; SOFT-FLOAT-32R2-NEXT: sw $20, 12($16) +; SOFT-FLOAT-32R2-NEXT: jal __addsf3 +; SOFT-FLOAT-32R2-NEXT: sw $19, 8($16) +; SOFT-FLOAT-32R2-NEXT: sw $2, 4($16) +; SOFT-FLOAT-32R2-NEXT: move $4, $18 +; SOFT-FLOAT-32R2-NEXT: jal __mulsf3 +; SOFT-FLOAT-32R2-NEXT: move $5, $21 +; SOFT-FLOAT-32R2-NEXT: move $4, $2 +; SOFT-FLOAT-32R2-NEXT: jal __addsf3 +; SOFT-FLOAT-32R2-NEXT: move $5, $17 +; SOFT-FLOAT-32R2-NEXT: sw $2, 0($16) +; SOFT-FLOAT-32R2-NEXT: lw $16, 20($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: lw $17, 24($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: lw $18, 28($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: lw $19, 32($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: lw $20, 36($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: lw $21, 40($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: lw $ra, 44($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: jr $ra +; SOFT-FLOAT-32R2-NEXT: addiu $sp, $sp, 48 +; +; SOFT-FLOAT-64-LABEL: fmuladd_contract_v4f32: +; SOFT-FLOAT-64: # %bb.0: +; SOFT-FLOAT-64-NEXT: daddiu $sp, $sp, -64 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 64 +; SOFT-FLOAT-64-NEXT: sd $ra, 56($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: sd $22, 48($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: sd $21, 40($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: sd $20, 32($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: sd $19, 24($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: sd $18, 16($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: sd $17, 8($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: sd $16, 0($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: .cfi_offset 31, -8 +; SOFT-FLOAT-64-NEXT: .cfi_offset 22, -16 +; SOFT-FLOAT-64-NEXT: .cfi_offset 21, -24 +; SOFT-FLOAT-64-NEXT: .cfi_offset 20, -32 +; SOFT-FLOAT-64-NEXT: .cfi_offset 19, -40 +; SOFT-FLOAT-64-NEXT: .cfi_offset 18, -48 +; SOFT-FLOAT-64-NEXT: .cfi_offset 17, -56 +; SOFT-FLOAT-64-NEXT: .cfi_offset 16, -64 +; SOFT-FLOAT-64-NEXT: move $16, $9 +; SOFT-FLOAT-64-NEXT: move $17, $8 +; SOFT-FLOAT-64-NEXT: move $18, $7 +; SOFT-FLOAT-64-NEXT: move $19, $6 +; SOFT-FLOAT-64-NEXT: move $20, $5 +; SOFT-FLOAT-64-NEXT: move $21, $4 +; SOFT-FLOAT-64-NEXT: sll $4, $4, 0 +; SOFT-FLOAT-64-NEXT: jal __mulsf3 +; SOFT-FLOAT-64-NEXT: sll $5, $6, 0 +; SOFT-FLOAT-64-NEXT: move $22, $2 +; SOFT-FLOAT-64-NEXT: dsra $4, $21, 32 +; SOFT-FLOAT-64-NEXT: jal __mulsf3 +; SOFT-FLOAT-64-NEXT: dsra $5, $19, 32 +; SOFT-FLOAT-64-NEXT: sll $4, $2, 0 +; SOFT-FLOAT-64-NEXT: jal __addsf3 +; SOFT-FLOAT-64-NEXT: dsra $5, $17, 32 +; SOFT-FLOAT-64-NEXT: # kill: def $v0 killed $v0 def $v0_64 +; SOFT-FLOAT-64-NEXT: sll $4, $22, 0 +; SOFT-FLOAT-64-NEXT: sll $5, $17, 0 +; SOFT-FLOAT-64-NEXT: jal __addsf3 +; SOFT-FLOAT-64-NEXT: dsll $17, $2, 32 +; SOFT-FLOAT-64-NEXT: dsll $1, $2, 32 +; SOFT-FLOAT-64-NEXT: dsrl $1, $1, 32 +; SOFT-FLOAT-64-NEXT: sll $4, $20, 0 +; SOFT-FLOAT-64-NEXT: sll $5, $18, 0 +; SOFT-FLOAT-64-NEXT: jal __mulsf3 +; SOFT-FLOAT-64-NEXT: or $17, $1, $17 +; SOFT-FLOAT-64-NEXT: move $19, $2 +; SOFT-FLOAT-64-NEXT: dsra $4, $20, 32 +; SOFT-FLOAT-64-NEXT: jal __mulsf3 +; SOFT-FLOAT-64-NEXT: dsra $5, $18, 32 +; SOFT-FLOAT-64-NEXT: sll $4, $2, 0 +; SOFT-FLOAT-64-NEXT: jal __addsf3 +; SOFT-FLOAT-64-NEXT: dsra $5, $16, 32 +; SOFT-FLOAT-64-NEXT: # kill: def $v0 killed $v0 def $v0_64 +; SOFT-FLOAT-64-NEXT: dsll $18, $2, 32 +; SOFT-FLOAT-64-NEXT: sll $4, $19, 0 +; SOFT-FLOAT-64-NEXT: jal __addsf3 +; SOFT-FLOAT-64-NEXT: sll $5, $16, 0 +; SOFT-FLOAT-64-NEXT: dsll $1, $2, 32 +; SOFT-FLOAT-64-NEXT: dsrl $1, $1, 32 +; SOFT-FLOAT-64-NEXT: or $3, $1, $18 +; SOFT-FLOAT-64-NEXT: move $2, $17 +; SOFT-FLOAT-64-NEXT: ld $16, 0($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: ld $17, 8($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: ld $18, 16($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: ld $19, 24($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: ld $20, 32($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: ld $21, 40($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: ld $22, 48($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: ld $ra, 56($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: jr $ra +; SOFT-FLOAT-64-NEXT: daddiu $sp, $sp, 64 +; +; SOFT-FLOAT-64R2-LABEL: fmuladd_contract_v4f32: +; SOFT-FLOAT-64R2: # %bb.0: +; SOFT-FLOAT-64R2-NEXT: daddiu $sp, $sp, -64 +; SOFT-FLOAT-64R2-NEXT: .cfi_def_cfa_offset 64 +; SOFT-FLOAT-64R2-NEXT: sd $ra, 56($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: sd $22, 48($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: sd $21, 40($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: sd $20, 32($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: sd $19, 24($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: sd $18, 16($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: sd $17, 8($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: sd $16, 0($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 31, -8 +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 22, -16 +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 21, -24 +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 20, -32 +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 19, -40 +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 18, -48 +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 17, -56 +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 16, -64 +; SOFT-FLOAT-64R2-NEXT: move $16, $9 +; SOFT-FLOAT-64R2-NEXT: move $17, $8 +; SOFT-FLOAT-64R2-NEXT: move $18, $7 +; SOFT-FLOAT-64R2-NEXT: move $19, $6 +; SOFT-FLOAT-64R2-NEXT: move $20, $5 +; SOFT-FLOAT-64R2-NEXT: move $21, $4 +; SOFT-FLOAT-64R2-NEXT: dsra $4, $4, 32 +; SOFT-FLOAT-64R2-NEXT: jal __mulsf3 +; SOFT-FLOAT-64R2-NEXT: dsra $5, $6, 32 +; SOFT-FLOAT-64R2-NEXT: move $22, $2 +; SOFT-FLOAT-64R2-NEXT: sll $4, $21, 0 +; SOFT-FLOAT-64R2-NEXT: jal __mulsf3 +; SOFT-FLOAT-64R2-NEXT: sll $5, $19, 0 +; SOFT-FLOAT-64R2-NEXT: sll $4, $2, 0 +; SOFT-FLOAT-64R2-NEXT: jal __addsf3 +; SOFT-FLOAT-64R2-NEXT: sll $5, $17, 0 +; SOFT-FLOAT-64R2-NEXT: sll $4, $22, 0 +; SOFT-FLOAT-64R2-NEXT: dsra $5, $17, 32 +; SOFT-FLOAT-64R2-NEXT: jal __addsf3 +; SOFT-FLOAT-64R2-NEXT: dext $17, $2, 0, 32 +; SOFT-FLOAT-64R2-NEXT: # kill: def $v0 killed $v0 def $v0_64 +; SOFT-FLOAT-64R2-NEXT: dsll $1, $2, 32 +; SOFT-FLOAT-64R2-NEXT: dsra $4, $20, 32 +; SOFT-FLOAT-64R2-NEXT: dsra $5, $18, 32 +; SOFT-FLOAT-64R2-NEXT: jal __mulsf3 +; SOFT-FLOAT-64R2-NEXT: or $17, $17, $1 +; SOFT-FLOAT-64R2-NEXT: move $19, $2 +; SOFT-FLOAT-64R2-NEXT: sll $4, $20, 0 +; SOFT-FLOAT-64R2-NEXT: jal __mulsf3 +; SOFT-FLOAT-64R2-NEXT: sll $5, $18, 0 +; SOFT-FLOAT-64R2-NEXT: sll $4, $2, 0 +; SOFT-FLOAT-64R2-NEXT: jal __addsf3 +; SOFT-FLOAT-64R2-NEXT: sll $5, $16, 0 +; SOFT-FLOAT-64R2-NEXT: dext $18, $2, 0, 32 +; SOFT-FLOAT-64R2-NEXT: sll $4, $19, 0 +; SOFT-FLOAT-64R2-NEXT: jal __addsf3 +; SOFT-FLOAT-64R2-NEXT: dsra $5, $16, 32 +; SOFT-FLOAT-64R2-NEXT: # kill: def $v0 killed $v0 def $v0_64 +; SOFT-FLOAT-64R2-NEXT: dsll $1, $2, 32 +; SOFT-FLOAT-64R2-NEXT: or $3, $18, $1 +; SOFT-FLOAT-64R2-NEXT: move $2, $17 +; SOFT-FLOAT-64R2-NEXT: ld $16, 0($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: ld $17, 8($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: ld $18, 16($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: ld $19, 24($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: ld $20, 32($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: ld $21, 40($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: ld $22, 48($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: ld $ra, 56($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: jr $ra +; SOFT-FLOAT-64R2-NEXT: daddiu $sp, $sp, 64 + %product = fmul contract <4 x float> %a, %b + %result = fadd contract <4 x float> %product, %c + ret <4 x float> %result +} + +define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) #0 { +; SOFT-FLOAT-32-LABEL: fmuladd_contract_v4f64: +; SOFT-FLOAT-32: # %bb.0: +; SOFT-FLOAT-32-NEXT: addiu $sp, $sp, -64 +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 64 +; SOFT-FLOAT-32-NEXT: sw $ra, 60($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: sw $fp, 56($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: sw $23, 52($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: sw $22, 48($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: sw $21, 44($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: sw $20, 40($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: sw $19, 36($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: sw $18, 32($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: sw $17, 28($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: sw $16, 24($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: .cfi_offset 31, -4 +; SOFT-FLOAT-32-NEXT: .cfi_offset 30, -8 +; SOFT-FLOAT-32-NEXT: .cfi_offset 23, -12 +; SOFT-FLOAT-32-NEXT: .cfi_offset 22, -16 +; SOFT-FLOAT-32-NEXT: .cfi_offset 21, -20 +; SOFT-FLOAT-32-NEXT: .cfi_offset 20, -24 +; SOFT-FLOAT-32-NEXT: .cfi_offset 19, -28 +; SOFT-FLOAT-32-NEXT: .cfi_offset 18, -32 +; SOFT-FLOAT-32-NEXT: .cfi_offset 17, -36 +; SOFT-FLOAT-32-NEXT: .cfi_offset 16, -40 +; SOFT-FLOAT-32-NEXT: sw $7, 20($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: sw $6, 16($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: move $16, $4 +; SOFT-FLOAT-32-NEXT: lw $4, 88($sp) +; SOFT-FLOAT-32-NEXT: lw $5, 92($sp) +; SOFT-FLOAT-32-NEXT: lw $6, 120($sp) +; SOFT-FLOAT-32-NEXT: lw $7, 124($sp) +; SOFT-FLOAT-32-NEXT: jal __muldf3 +; SOFT-FLOAT-32-NEXT: nop +; SOFT-FLOAT-32-NEXT: move $4, $2 +; SOFT-FLOAT-32-NEXT: lw $6, 152($sp) +; SOFT-FLOAT-32-NEXT: lw $7, 156($sp) +; SOFT-FLOAT-32-NEXT: jal __adddf3 +; SOFT-FLOAT-32-NEXT: move $5, $3 +; SOFT-FLOAT-32-NEXT: move $19, $2 +; SOFT-FLOAT-32-NEXT: lw $4, 96($sp) +; SOFT-FLOAT-32-NEXT: lw $5, 100($sp) +; SOFT-FLOAT-32-NEXT: lw $6, 128($sp) +; SOFT-FLOAT-32-NEXT: lw $7, 132($sp) +; SOFT-FLOAT-32-NEXT: jal __muldf3 +; SOFT-FLOAT-32-NEXT: move $20, $3 +; SOFT-FLOAT-32-NEXT: move $4, $2 +; SOFT-FLOAT-32-NEXT: lw $6, 160($sp) +; SOFT-FLOAT-32-NEXT: lw $7, 164($sp) +; SOFT-FLOAT-32-NEXT: jal __adddf3 +; SOFT-FLOAT-32-NEXT: move $5, $3 +; SOFT-FLOAT-32-NEXT: move $21, $2 +; SOFT-FLOAT-32-NEXT: lw $4, 80($sp) +; SOFT-FLOAT-32-NEXT: lw $5, 84($sp) +; SOFT-FLOAT-32-NEXT: lw $6, 112($sp) +; SOFT-FLOAT-32-NEXT: lw $7, 116($sp) +; SOFT-FLOAT-32-NEXT: jal __muldf3 +; SOFT-FLOAT-32-NEXT: move $22, $3 +; SOFT-FLOAT-32-NEXT: move $4, $2 +; SOFT-FLOAT-32-NEXT: move $5, $3 +; SOFT-FLOAT-32-NEXT: lw $23, 140($sp) +; SOFT-FLOAT-32-NEXT: lw $fp, 136($sp) +; SOFT-FLOAT-32-NEXT: lw $17, 108($sp) +; SOFT-FLOAT-32-NEXT: lw $18, 104($sp) +; SOFT-FLOAT-32-NEXT: lw $7, 148($sp) +; SOFT-FLOAT-32-NEXT: lw $6, 144($sp) +; SOFT-FLOAT-32-NEXT: sw $22, 28($16) +; SOFT-FLOAT-32-NEXT: sw $21, 24($16) +; SOFT-FLOAT-32-NEXT: sw $20, 20($16) +; SOFT-FLOAT-32-NEXT: jal __adddf3 +; SOFT-FLOAT-32-NEXT: sw $19, 16($16) +; SOFT-FLOAT-32-NEXT: sw $3, 12($16) +; SOFT-FLOAT-32-NEXT: sw $2, 8($16) +; SOFT-FLOAT-32-NEXT: lw $4, 16($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: lw $5, 20($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: move $6, $18 +; SOFT-FLOAT-32-NEXT: jal __muldf3 +; SOFT-FLOAT-32-NEXT: move $7, $17 +; SOFT-FLOAT-32-NEXT: move $4, $2 +; SOFT-FLOAT-32-NEXT: move $5, $3 +; SOFT-FLOAT-32-NEXT: move $6, $fp +; SOFT-FLOAT-32-NEXT: jal __adddf3 +; SOFT-FLOAT-32-NEXT: move $7, $23 +; SOFT-FLOAT-32-NEXT: sw $3, 4($16) +; SOFT-FLOAT-32-NEXT: sw $2, 0($16) +; SOFT-FLOAT-32-NEXT: lw $16, 24($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: lw $17, 28($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: lw $18, 32($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: lw $19, 36($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: lw $20, 40($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: lw $21, 44($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: lw $22, 48($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: lw $23, 52($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: lw $fp, 56($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: lw $ra, 60($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: jr $ra +; SOFT-FLOAT-32-NEXT: addiu $sp, $sp, 64 +; +; SOFT-FLOAT-32R2-LABEL: fmuladd_contract_v4f64: +; SOFT-FLOAT-32R2: # %bb.0: +; SOFT-FLOAT-32R2-NEXT: addiu $sp, $sp, -64 +; SOFT-FLOAT-32R2-NEXT: .cfi_def_cfa_offset 64 +; SOFT-FLOAT-32R2-NEXT: sw $ra, 60($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: sw $fp, 56($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: sw $23, 52($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: sw $22, 48($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: sw $21, 44($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: sw $20, 40($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: sw $19, 36($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: sw $18, 32($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: sw $17, 28($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: sw $16, 24($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 31, -4 +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 30, -8 +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 23, -12 +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 22, -16 +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 21, -20 +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 20, -24 +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 19, -28 +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 18, -32 +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 17, -36 +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 16, -40 +; SOFT-FLOAT-32R2-NEXT: sw $7, 20($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: sw $6, 16($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: move $16, $4 +; SOFT-FLOAT-32R2-NEXT: lw $4, 88($sp) +; SOFT-FLOAT-32R2-NEXT: lw $5, 92($sp) +; SOFT-FLOAT-32R2-NEXT: lw $6, 120($sp) +; SOFT-FLOAT-32R2-NEXT: lw $7, 124($sp) +; SOFT-FLOAT-32R2-NEXT: jal __muldf3 +; SOFT-FLOAT-32R2-NEXT: nop +; SOFT-FLOAT-32R2-NEXT: move $4, $2 +; SOFT-FLOAT-32R2-NEXT: lw $6, 152($sp) +; SOFT-FLOAT-32R2-NEXT: lw $7, 156($sp) +; SOFT-FLOAT-32R2-NEXT: jal __adddf3 +; SOFT-FLOAT-32R2-NEXT: move $5, $3 +; SOFT-FLOAT-32R2-NEXT: move $19, $2 +; SOFT-FLOAT-32R2-NEXT: lw $4, 96($sp) +; SOFT-FLOAT-32R2-NEXT: lw $5, 100($sp) +; SOFT-FLOAT-32R2-NEXT: lw $6, 128($sp) +; SOFT-FLOAT-32R2-NEXT: lw $7, 132($sp) +; SOFT-FLOAT-32R2-NEXT: jal __muldf3 +; SOFT-FLOAT-32R2-NEXT: move $20, $3 +; SOFT-FLOAT-32R2-NEXT: move $4, $2 +; SOFT-FLOAT-32R2-NEXT: lw $6, 160($sp) +; SOFT-FLOAT-32R2-NEXT: lw $7, 164($sp) +; SOFT-FLOAT-32R2-NEXT: jal __adddf3 +; SOFT-FLOAT-32R2-NEXT: move $5, $3 +; SOFT-FLOAT-32R2-NEXT: move $21, $2 +; SOFT-FLOAT-32R2-NEXT: lw $4, 80($sp) +; SOFT-FLOAT-32R2-NEXT: lw $5, 84($sp) +; SOFT-FLOAT-32R2-NEXT: lw $6, 112($sp) +; SOFT-FLOAT-32R2-NEXT: lw $7, 116($sp) +; SOFT-FLOAT-32R2-NEXT: jal __muldf3 +; SOFT-FLOAT-32R2-NEXT: move $22, $3 +; SOFT-FLOAT-32R2-NEXT: move $4, $2 +; SOFT-FLOAT-32R2-NEXT: move $5, $3 +; SOFT-FLOAT-32R2-NEXT: lw $23, 140($sp) +; SOFT-FLOAT-32R2-NEXT: lw $fp, 136($sp) +; SOFT-FLOAT-32R2-NEXT: lw $17, 108($sp) +; SOFT-FLOAT-32R2-NEXT: lw $18, 104($sp) +; SOFT-FLOAT-32R2-NEXT: lw $7, 148($sp) +; SOFT-FLOAT-32R2-NEXT: lw $6, 144($sp) +; SOFT-FLOAT-32R2-NEXT: sw $22, 28($16) +; SOFT-FLOAT-32R2-NEXT: sw $21, 24($16) +; SOFT-FLOAT-32R2-NEXT: sw $20, 20($16) +; SOFT-FLOAT-32R2-NEXT: jal __adddf3 +; SOFT-FLOAT-32R2-NEXT: sw $19, 16($16) +; SOFT-FLOAT-32R2-NEXT: sw $3, 12($16) +; SOFT-FLOAT-32R2-NEXT: sw $2, 8($16) +; SOFT-FLOAT-32R2-NEXT: lw $4, 16($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: lw $5, 20($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: move $6, $18 +; SOFT-FLOAT-32R2-NEXT: jal __muldf3 +; SOFT-FLOAT-32R2-NEXT: move $7, $17 +; SOFT-FLOAT-32R2-NEXT: move $4, $2 +; SOFT-FLOAT-32R2-NEXT: move $5, $3 +; SOFT-FLOAT-32R2-NEXT: move $6, $fp +; SOFT-FLOAT-32R2-NEXT: jal __adddf3 +; SOFT-FLOAT-32R2-NEXT: move $7, $23 +; SOFT-FLOAT-32R2-NEXT: sw $3, 4($16) +; SOFT-FLOAT-32R2-NEXT: sw $2, 0($16) +; SOFT-FLOAT-32R2-NEXT: lw $16, 24($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: lw $17, 28($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: lw $18, 32($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: lw $19, 36($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: lw $20, 40($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: lw $21, 44($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: lw $22, 48($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: lw $23, 52($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: lw $fp, 56($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: lw $ra, 60($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: jr $ra +; SOFT-FLOAT-32R2-NEXT: addiu $sp, $sp, 64 +; +; SOFT-FLOAT-64-LABEL: fmuladd_contract_v4f64: +; SOFT-FLOAT-64: # %bb.0: +; SOFT-FLOAT-64-NEXT: daddiu $sp, $sp, -64 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 64 +; SOFT-FLOAT-64-NEXT: sd $ra, 56($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: sd $22, 48($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: sd $21, 40($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: sd $20, 32($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: sd $19, 24($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: sd $18, 16($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: sd $17, 8($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: sd $16, 0($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: .cfi_offset 31, -8 +; SOFT-FLOAT-64-NEXT: .cfi_offset 22, -16 +; SOFT-FLOAT-64-NEXT: .cfi_offset 21, -24 +; SOFT-FLOAT-64-NEXT: .cfi_offset 20, -32 +; SOFT-FLOAT-64-NEXT: .cfi_offset 19, -40 +; SOFT-FLOAT-64-NEXT: .cfi_offset 18, -48 +; SOFT-FLOAT-64-NEXT: .cfi_offset 17, -56 +; SOFT-FLOAT-64-NEXT: .cfi_offset 16, -64 +; SOFT-FLOAT-64-NEXT: move $17, $10 +; SOFT-FLOAT-64-NEXT: move $18, $9 +; SOFT-FLOAT-64-NEXT: move $19, $8 +; SOFT-FLOAT-64-NEXT: move $20, $6 +; SOFT-FLOAT-64-NEXT: move $21, $5 +; SOFT-FLOAT-64-NEXT: move $16, $4 +; SOFT-FLOAT-64-NEXT: move $4, $7 +; SOFT-FLOAT-64-NEXT: jal __muldf3 +; SOFT-FLOAT-64-NEXT: move $5, $11 +; SOFT-FLOAT-64-NEXT: ld $5, 88($sp) +; SOFT-FLOAT-64-NEXT: jal __adddf3 +; SOFT-FLOAT-64-NEXT: move $4, $2 +; SOFT-FLOAT-64-NEXT: move $22, $2 +; SOFT-FLOAT-64-NEXT: ld $5, 64($sp) +; SOFT-FLOAT-64-NEXT: jal __muldf3 +; SOFT-FLOAT-64-NEXT: move $4, $19 +; SOFT-FLOAT-64-NEXT: ld $5, 96($sp) +; SOFT-FLOAT-64-NEXT: jal __adddf3 +; SOFT-FLOAT-64-NEXT: move $4, $2 +; SOFT-FLOAT-64-NEXT: move $19, $2 +; SOFT-FLOAT-64-NEXT: move $4, $20 +; SOFT-FLOAT-64-NEXT: jal __muldf3 +; SOFT-FLOAT-64-NEXT: move $5, $17 +; SOFT-FLOAT-64-NEXT: move $4, $2 +; SOFT-FLOAT-64-NEXT: ld $17, 72($sp) +; SOFT-FLOAT-64-NEXT: ld $5, 80($sp) +; SOFT-FLOAT-64-NEXT: sd $19, 24($16) +; SOFT-FLOAT-64-NEXT: jal __adddf3 +; SOFT-FLOAT-64-NEXT: sd $22, 16($16) +; SOFT-FLOAT-64-NEXT: sd $2, 8($16) +; SOFT-FLOAT-64-NEXT: move $4, $21 +; SOFT-FLOAT-64-NEXT: jal __muldf3 +; SOFT-FLOAT-64-NEXT: move $5, $18 +; SOFT-FLOAT-64-NEXT: move $4, $2 +; SOFT-FLOAT-64-NEXT: jal __adddf3 +; SOFT-FLOAT-64-NEXT: move $5, $17 +; SOFT-FLOAT-64-NEXT: sd $2, 0($16) +; SOFT-FLOAT-64-NEXT: ld $16, 0($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: ld $17, 8($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: ld $18, 16($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: ld $19, 24($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: ld $20, 32($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: ld $21, 40($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: ld $22, 48($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: ld $ra, 56($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: jr $ra +; SOFT-FLOAT-64-NEXT: daddiu $sp, $sp, 64 +; +; SOFT-FLOAT-64R2-LABEL: fmuladd_contract_v4f64: +; SOFT-FLOAT-64R2: # %bb.0: +; SOFT-FLOAT-64R2-NEXT: daddiu $sp, $sp, -64 +; SOFT-FLOAT-64R2-NEXT: .cfi_def_cfa_offset 64 +; SOFT-FLOAT-64R2-NEXT: sd $ra, 56($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: sd $22, 48($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: sd $21, 40($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: sd $20, 32($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: sd $19, 24($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: sd $18, 16($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: sd $17, 8($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: sd $16, 0($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 31, -8 +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 22, -16 +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 21, -24 +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 20, -32 +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 19, -40 +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 18, -48 +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 17, -56 +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 16, -64 +; SOFT-FLOAT-64R2-NEXT: move $17, $10 +; SOFT-FLOAT-64R2-NEXT: move $18, $9 +; SOFT-FLOAT-64R2-NEXT: move $19, $8 +; SOFT-FLOAT-64R2-NEXT: move $20, $6 +; SOFT-FLOAT-64R2-NEXT: move $21, $5 +; SOFT-FLOAT-64R2-NEXT: move $16, $4 +; SOFT-FLOAT-64R2-NEXT: move $4, $7 +; SOFT-FLOAT-64R2-NEXT: jal __muldf3 +; SOFT-FLOAT-64R2-NEXT: move $5, $11 +; SOFT-FLOAT-64R2-NEXT: ld $5, 88($sp) +; SOFT-FLOAT-64R2-NEXT: jal __adddf3 +; SOFT-FLOAT-64R2-NEXT: move $4, $2 +; SOFT-FLOAT-64R2-NEXT: move $22, $2 +; SOFT-FLOAT-64R2-NEXT: ld $5, 64($sp) +; SOFT-FLOAT-64R2-NEXT: jal __muldf3 +; SOFT-FLOAT-64R2-NEXT: move $4, $19 +; SOFT-FLOAT-64R2-NEXT: ld $5, 96($sp) +; SOFT-FLOAT-64R2-NEXT: jal __adddf3 +; SOFT-FLOAT-64R2-NEXT: move $4, $2 +; SOFT-FLOAT-64R2-NEXT: move $19, $2 +; SOFT-FLOAT-64R2-NEXT: move $4, $20 +; SOFT-FLOAT-64R2-NEXT: jal __muldf3 +; SOFT-FLOAT-64R2-NEXT: move $5, $17 +; SOFT-FLOAT-64R2-NEXT: move $4, $2 +; SOFT-FLOAT-64R2-NEXT: ld $17, 72($sp) +; SOFT-FLOAT-64R2-NEXT: ld $5, 80($sp) +; SOFT-FLOAT-64R2-NEXT: sd $19, 24($16) +; SOFT-FLOAT-64R2-NEXT: jal __adddf3 +; SOFT-FLOAT-64R2-NEXT: sd $22, 16($16) +; SOFT-FLOAT-64R2-NEXT: sd $2, 8($16) +; SOFT-FLOAT-64R2-NEXT: move $4, $21 +; SOFT-FLOAT-64R2-NEXT: jal __muldf3 +; SOFT-FLOAT-64R2-NEXT: move $5, $18 +; SOFT-FLOAT-64R2-NEXT: move $4, $2 +; SOFT-FLOAT-64R2-NEXT: jal __adddf3 +; SOFT-FLOAT-64R2-NEXT: move $5, $17 +; SOFT-FLOAT-64R2-NEXT: sd $2, 0($16) +; SOFT-FLOAT-64R2-NEXT: ld $16, 0($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: ld $17, 8($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: ld $18, 16($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: ld $19, 24($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: ld $20, 32($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: ld $21, 40($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: ld $22, 48($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: ld $ra, 56($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: jr $ra +; SOFT-FLOAT-64R2-NEXT: daddiu $sp, $sp, 64 + %product = fmul contract <4 x double> %a, %b + %result = fadd contract <4 x double> %product, %c + ret <4 x double> %result +} + +attributes #0 = { "use-soft-float"="true" } + +declare float @llvm.fmuladd.f32(float %a, float %b, float %c) +declare double @llvm.fmuladd.f64(double %a, double %b, double %c) diff --git a/llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll b/llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll index bc58a700cb9828..028fab7ae54d6a 100644 --- a/llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll +++ b/llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll @@ -19,7 +19,7 @@ define i32 @f(ptr %p) { ; ENABLED-NEXT: ld.param.u64 %rd1, [f_param_0]; ; ENABLED-NEXT: ld.v2.u32 {%r1, %r2}, [%rd1]; ; ENABLED-NEXT: add.s32 %r3, %r1, %r2; -; ENABLED-NEXT: st.param.b32 [func_retval0+0], %r3; +; ENABLED-NEXT: st.param.b32 [func_retval0], %r3; ; ENABLED-NEXT: ret; ; ; DISABLED-LABEL: f( @@ -32,7 +32,7 @@ define i32 @f(ptr %p) { ; DISABLED-NEXT: ld.u32 %r1, [%rd1]; ; DISABLED-NEXT: ld.u32 %r2, [%rd1+4]; ; DISABLED-NEXT: add.s32 %r3, %r1, %r2; -; DISABLED-NEXT: st.param.b32 [func_retval0+0], %r3; +; DISABLED-NEXT: st.param.b32 [func_retval0], %r3; ; DISABLED-NEXT: ret; %p.1 = getelementptr i32, ptr %p, i32 1 %v0 = load i32, ptr %p, align 8 @@ -68,7 +68,7 @@ define half @fh(ptr %p) { ; ENABLED-NEXT: cvt.f32.f16 %f11, %rs5; ; ENABLED-NEXT: add.rn.f32 %f12, %f10, %f11; ; ENABLED-NEXT: cvt.rn.f16.f32 %rs9, %f12; -; ENABLED-NEXT: st.param.b16 [func_retval0+0], %rs9; +; ENABLED-NEXT: st.param.b16 [func_retval0], %rs9; ; ENABLED-NEXT: ret; ; ; DISABLED-LABEL: fh( @@ -100,7 +100,7 @@ define half @fh(ptr %p) { ; DISABLED-NEXT: cvt.f32.f16 %f11, %rs5; ; DISABLED-NEXT: add.rn.f32 %f12, %f10, %f11; ; DISABLED-NEXT: cvt.rn.f16.f32 %rs9, %f12; -; DISABLED-NEXT: st.param.b16 [func_retval0+0], %rs9; +; DISABLED-NEXT: st.param.b16 [func_retval0], %rs9; ; DISABLED-NEXT: ret; %p.1 = getelementptr half, ptr %p, i32 1 %p.2 = getelementptr half, ptr %p, i32 2 @@ -132,7 +132,7 @@ define float @ff(ptr %p) { ; ENABLED-NEXT: add.rn.f32 %f7, %f3, %f4; ; ENABLED-NEXT: add.rn.f32 %f8, %f6, %f7; ; ENABLED-NEXT: add.rn.f32 %f9, %f8, %f5; -; ENABLED-NEXT: st.param.f32 [func_retval0+0], %f9; +; ENABLED-NEXT: st.param.f32 [func_retval0], %f9; ; ENABLED-NEXT: ret; ; ; DISABLED-LABEL: ff( @@ -151,7 +151,7 @@ define float @ff(ptr %p) { ; DISABLED-NEXT: add.rn.f32 %f7, %f3, %f4; ; DISABLED-NEXT: add.rn.f32 %f8, %f6, %f7; ; DISABLED-NEXT: add.rn.f32 %f9, %f8, %f5; -; DISABLED-NEXT: st.param.f32 [func_retval0+0], %f9; +; DISABLED-NEXT: st.param.f32 [func_retval0], %f9; ; DISABLED-NEXT: ret; %p.1 = getelementptr float, ptr %p, i32 1 %p.2 = getelementptr float, ptr %p, i32 2 diff --git a/llvm/test/CodeGen/NVPTX/activemask.ll b/llvm/test/CodeGen/NVPTX/activemask.ll index 1496b2ebdd4427..e1d169d17c60e9 100644 --- a/llvm/test/CodeGen/NVPTX/activemask.ll +++ b/llvm/test/CodeGen/NVPTX/activemask.ll @@ -6,7 +6,7 @@ declare i32 @llvm.nvvm.activemask() ; CHECK-LABEL: activemask( ; ; CHECK: activemask.b32 %[[REG:.+]]; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %[[REG]]; +; CHECK-NEXT: st.param.b32 [func_retval0], %[[REG]]; ; CHECK-NEXT: ret; define dso_local i32 @activemask() { entry: @@ -18,7 +18,7 @@ entry: ; ; CHECK: activemask.b32 %[[REG:.+]]; ; CHECK: activemask.b32 %[[REG]]; -; CHECK: .param.b32 [func_retval0+0], %[[REG]]; +; CHECK: .param.b32 [func_retval0], %[[REG]]; ; CHECK-NEXT: ret; define dso_local i32 @convergent(i1 %cond) { entry: diff --git a/llvm/test/CodeGen/NVPTX/addr-mode.ll b/llvm/test/CodeGen/NVPTX/addr-mode.ll index a6a085c0e2e33e..ca2a74f7e54a3e 100644 --- a/llvm/test/CodeGen/NVPTX/addr-mode.ll +++ b/llvm/test/CodeGen/NVPTX/addr-mode.ll @@ -12,7 +12,7 @@ define i32 @test_addr_mode_i64(ptr %x) { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.u64 %rd1, [test_addr_mode_i64_param_0]; ; CHECK-NEXT: ld.u32 %r1, [%rd1+-4]; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; ; CHECK-NEXT: ret; %addr = getelementptr i32, ptr %x, i64 -1 %res = load i32, ptr %addr @@ -28,7 +28,7 @@ define i32 @test_addr_mode_i32(ptr %x) { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.u64 %rd1, [test_addr_mode_i32_param_0]; ; CHECK-NEXT: ld.u32 %r1, [%rd1+-4]; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; ; CHECK-NEXT: ret; %addr = getelementptr i32, ptr %x, i32 -1 %res = load i32, ptr %addr @@ -44,7 +44,7 @@ define i32 @test_addr_mode_i16(ptr %x) { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.u64 %rd1, [test_addr_mode_i16_param_0]; ; CHECK-NEXT: ld.u32 %r1, [%rd1+-4]; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; ; CHECK-NEXT: ret; %addr = getelementptr i32, ptr %x, i16 -1 %res = load i32, ptr %addr @@ -60,7 +60,7 @@ define i32 @test_addr_mode_i8(ptr %x) { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.u64 %rd1, [test_addr_mode_i8_param_0]; ; CHECK-NEXT: ld.u32 %r1, [%rd1+-4]; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; ; CHECK-NEXT: ret; %addr = getelementptr i32, ptr %x, i8 -1 %res = load i32, ptr %addr @@ -77,7 +77,7 @@ define i32 @test_addr_mode_i64_large(ptr %x) { ; CHECK-NEXT: ld.param.u64 %rd1, [test_addr_mode_i64_large_param_0]; ; CHECK-NEXT: add.s64 %rd2, %rd1, 17179869172; ; CHECK-NEXT: ld.u32 %r1, [%rd2]; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; ; CHECK-NEXT: ret; %addr = getelementptr i32, ptr %x, i64 4294967293 %res = load i32, ptr %addr diff --git a/llvm/test/CodeGen/NVPTX/aggregate-return.ll b/llvm/test/CodeGen/NVPTX/aggregate-return.ll index 5983d71e065dd4..4bda8049b267b9 100644 --- a/llvm/test/CodeGen/NVPTX/aggregate-return.ll +++ b/llvm/test/CodeGen/NVPTX/aggregate-return.ll @@ -10,7 +10,7 @@ define void @test_v2f32(<2 x float> %input, ptr %output) { ; CHECK-LABEL: @test_v2f32 %call = tail call <2 x float> @barv(<2 x float> %input) ; CHECK: .param .align 8 .b8 retval0[8]; -; CHECK: ld.param.v2.f32 {[[E0:%f[0-9]+]], [[E1:%f[0-9]+]]}, [retval0+0]; +; CHECK: ld.param.v2.f32 {[[E0:%f[0-9]+]], [[E1:%f[0-9]+]]}, [retval0]; store <2 x float> %call, ptr %output, align 8 ; CHECK: st.v2.f32 [{{%rd[0-9]+}}], {[[E0]], [[E1]]} ret void @@ -21,7 +21,7 @@ define void @test_v3f32(<3 x float> %input, ptr %output) { ; %call = tail call <3 x float> @barv3(<3 x float> %input) ; CHECK: .param .align 16 .b8 retval0[16]; -; CHECK-DAG: ld.param.v2.f32 {[[E0:%f[0-9]+]], [[E1:%f[0-9]+]]}, [retval0+0]; +; CHECK-DAG: ld.param.v2.f32 {[[E0:%f[0-9]+]], [[E1:%f[0-9]+]]}, [retval0]; ; CHECK-DAG: ld.param.f32 [[E2:%f[0-9]+]], [retval0+8]; ; Make sure we don't load more values than than we need to. ; CHECK-NOT: ld.param.f32 [[E3:%f[0-9]+]], [retval0+12]; @@ -38,7 +38,7 @@ define void @test_a2f32([2 x float] %input, ptr %output) { ; CHECK-LABEL: @test_a2f32 %call = tail call [2 x float] @bara([2 x float] %input) ; CHECK: .param .align 4 .b8 retval0[8]; -; CHECK-DAG: ld.param.f32 [[ELEMA1:%f[0-9]+]], [retval0+0]; +; CHECK-DAG: ld.param.f32 [[ELEMA1:%f[0-9]+]], [retval0]; ; CHECK-DAG: ld.param.f32 [[ELEMA2:%f[0-9]+]], [retval0+4]; store [2 x float] %call, ptr %output, align 4 ; CHECK: } @@ -52,7 +52,7 @@ define void @test_s2f32({float, float} %input, ptr %output) { ; CHECK-LABEL: @test_s2f32 %call = tail call {float, float} @bars({float, float} %input) ; CHECK: .param .align 4 .b8 retval0[8]; -; CHECK-DAG: ld.param.f32 [[ELEMS1:%f[0-9]+]], [retval0+0]; +; CHECK-DAG: ld.param.f32 [[ELEMS1:%f[0-9]+]], [retval0]; ; CHECK-DAG: ld.param.f32 [[ELEMS2:%f[0-9]+]], [retval0+4]; store {float, float} %call, ptr %output, align 4 ; CHECK: } diff --git a/llvm/test/CodeGen/NVPTX/bf16-instructions.ll b/llvm/test/CodeGen/NVPTX/bf16-instructions.ll index 95bca39c73ad73..80815b3ca37c05 100644 --- a/llvm/test/CodeGen/NVPTX/bf16-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/bf16-instructions.ll @@ -37,7 +37,7 @@ define bfloat @test_fadd(bfloat %0, bfloat %1) { ; SM70-NEXT: or.b32 %r9, %r5, 4194304; ; SM70-NEXT: selp.b32 %r10, %r9, %r8, %p1; ; SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r10; } -; SM70-NEXT: st.param.b16 [func_retval0+0], %rs1; +; SM70-NEXT: st.param.b16 [func_retval0], %rs1; ; SM70-NEXT: ret; ; ; SM80-LABEL: test_fadd( @@ -52,7 +52,7 @@ define bfloat @test_fadd(bfloat %0, bfloat %1) { ; SM80-NEXT: cvt.f32.bf16 %f2, %rs1; ; SM80-NEXT: add.rn.f32 %f3, %f2, %f1; ; SM80-NEXT: cvt.rn.bf16.f32 %rs3, %f3; -; SM80-NEXT: st.param.b16 [func_retval0+0], %rs3; +; SM80-NEXT: st.param.b16 [func_retval0], %rs3; ; SM80-NEXT: ret; ; ; SM80-FTZ-LABEL: test_fadd( @@ -67,7 +67,7 @@ define bfloat @test_fadd(bfloat %0, bfloat %1) { ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f2, %rs1; ; SM80-FTZ-NEXT: add.rn.ftz.f32 %f3, %f2, %f1; ; SM80-FTZ-NEXT: cvt.rn.bf16.f32 %rs3, %f3; -; SM80-FTZ-NEXT: st.param.b16 [func_retval0+0], %rs3; +; SM80-FTZ-NEXT: st.param.b16 [func_retval0], %rs3; ; SM80-FTZ-NEXT: ret; ; ; SM90-LABEL: test_fadd( @@ -78,7 +78,7 @@ define bfloat @test_fadd(bfloat %0, bfloat %1) { ; SM90-NEXT: ld.param.b16 %rs1, [test_fadd_param_0]; ; SM90-NEXT: ld.param.b16 %rs2, [test_fadd_param_1]; ; SM90-NEXT: add.rn.bf16 %rs3, %rs1, %rs2; -; SM90-NEXT: st.param.b16 [func_retval0+0], %rs3; +; SM90-NEXT: st.param.b16 [func_retval0], %rs3; ; SM90-NEXT: ret; %3 = fadd bfloat %0, %1 ret bfloat %3 @@ -108,7 +108,7 @@ define bfloat @test_fsub(bfloat %0, bfloat %1) { ; SM70-NEXT: or.b32 %r9, %r5, 4194304; ; SM70-NEXT: selp.b32 %r10, %r9, %r8, %p1; ; SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r10; } -; SM70-NEXT: st.param.b16 [func_retval0+0], %rs1; +; SM70-NEXT: st.param.b16 [func_retval0], %rs1; ; SM70-NEXT: ret; ; ; SM80-LABEL: test_fsub( @@ -123,7 +123,7 @@ define bfloat @test_fsub(bfloat %0, bfloat %1) { ; SM80-NEXT: cvt.f32.bf16 %f2, %rs1; ; SM80-NEXT: sub.rn.f32 %f3, %f2, %f1; ; SM80-NEXT: cvt.rn.bf16.f32 %rs3, %f3; -; SM80-NEXT: st.param.b16 [func_retval0+0], %rs3; +; SM80-NEXT: st.param.b16 [func_retval0], %rs3; ; SM80-NEXT: ret; ; ; SM80-FTZ-LABEL: test_fsub( @@ -138,7 +138,7 @@ define bfloat @test_fsub(bfloat %0, bfloat %1) { ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f2, %rs1; ; SM80-FTZ-NEXT: sub.rn.ftz.f32 %f3, %f2, %f1; ; SM80-FTZ-NEXT: cvt.rn.bf16.f32 %rs3, %f3; -; SM80-FTZ-NEXT: st.param.b16 [func_retval0+0], %rs3; +; SM80-FTZ-NEXT: st.param.b16 [func_retval0], %rs3; ; SM80-FTZ-NEXT: ret; ; ; SM90-LABEL: test_fsub( @@ -149,7 +149,7 @@ define bfloat @test_fsub(bfloat %0, bfloat %1) { ; SM90-NEXT: ld.param.b16 %rs1, [test_fsub_param_0]; ; SM90-NEXT: ld.param.b16 %rs2, [test_fsub_param_1]; ; SM90-NEXT: sub.rn.bf16 %rs3, %rs1, %rs2; -; SM90-NEXT: st.param.b16 [func_retval0+0], %rs3; +; SM90-NEXT: st.param.b16 [func_retval0], %rs3; ; SM90-NEXT: ret; %3 = fsub bfloat %0, %1 ret bfloat %3 @@ -199,7 +199,7 @@ define <2 x bfloat> @test_faddx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 { ; SM70-NEXT: selp.b32 %r22, %r21, %r20, %p2; ; SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs11}, %r22; } ; SM70-NEXT: mov.b32 %r23, {%rs11, %rs7}; -; SM70-NEXT: st.param.b32 [func_retval0+0], %r23; +; SM70-NEXT: st.param.b32 [func_retval0], %r23; ; SM70-NEXT: ret; ; ; SM80-LABEL: test_faddx2( @@ -222,7 +222,7 @@ define <2 x bfloat> @test_faddx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 { ; SM80-NEXT: add.rn.f32 %f6, %f5, %f4; ; SM80-NEXT: cvt.rn.bf16.f32 %rs6, %f6; ; SM80-NEXT: mov.b32 %r3, {%rs6, %rs5}; -; SM80-NEXT: st.param.b32 [func_retval0+0], %r3; +; SM80-NEXT: st.param.b32 [func_retval0], %r3; ; SM80-NEXT: ret; ; ; SM80-FTZ-LABEL: test_faddx2( @@ -245,7 +245,7 @@ define <2 x bfloat> @test_faddx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 { ; SM80-FTZ-NEXT: add.rn.ftz.f32 %f6, %f5, %f4; ; SM80-FTZ-NEXT: cvt.rn.bf16.f32 %rs6, %f6; ; SM80-FTZ-NEXT: mov.b32 %r3, {%rs6, %rs5}; -; SM80-FTZ-NEXT: st.param.b32 [func_retval0+0], %r3; +; SM80-FTZ-NEXT: st.param.b32 [func_retval0], %r3; ; SM80-FTZ-NEXT: ret; ; ; SM90-LABEL: test_faddx2( @@ -256,7 +256,7 @@ define <2 x bfloat> @test_faddx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 { ; SM90-NEXT: ld.param.b32 %r1, [test_faddx2_param_1]; ; SM90-NEXT: ld.param.b32 %r2, [test_faddx2_param_0]; ; SM90-NEXT: add.rn.bf16x2 %r3, %r2, %r1; -; SM90-NEXT: st.param.b32 [func_retval0+0], %r3; +; SM90-NEXT: st.param.b32 [func_retval0], %r3; ; SM90-NEXT: ret; %r = fadd <2 x bfloat> %a, %b ret <2 x bfloat> %r @@ -306,7 +306,7 @@ define <2 x bfloat> @test_fsubx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 { ; SM70-NEXT: selp.b32 %r22, %r21, %r20, %p2; ; SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs11}, %r22; } ; SM70-NEXT: mov.b32 %r23, {%rs11, %rs7}; -; SM70-NEXT: st.param.b32 [func_retval0+0], %r23; +; SM70-NEXT: st.param.b32 [func_retval0], %r23; ; SM70-NEXT: ret; ; ; SM80-LABEL: test_fsubx2( @@ -329,7 +329,7 @@ define <2 x bfloat> @test_fsubx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 { ; SM80-NEXT: sub.rn.f32 %f6, %f5, %f4; ; SM80-NEXT: cvt.rn.bf16.f32 %rs6, %f6; ; SM80-NEXT: mov.b32 %r3, {%rs6, %rs5}; -; SM80-NEXT: st.param.b32 [func_retval0+0], %r3; +; SM80-NEXT: st.param.b32 [func_retval0], %r3; ; SM80-NEXT: ret; ; ; SM80-FTZ-LABEL: test_fsubx2( @@ -352,7 +352,7 @@ define <2 x bfloat> @test_fsubx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 { ; SM80-FTZ-NEXT: sub.rn.ftz.f32 %f6, %f5, %f4; ; SM80-FTZ-NEXT: cvt.rn.bf16.f32 %rs6, %f6; ; SM80-FTZ-NEXT: mov.b32 %r3, {%rs6, %rs5}; -; SM80-FTZ-NEXT: st.param.b32 [func_retval0+0], %r3; +; SM80-FTZ-NEXT: st.param.b32 [func_retval0], %r3; ; SM80-FTZ-NEXT: ret; ; ; SM90-LABEL: test_fsubx2( @@ -363,7 +363,7 @@ define <2 x bfloat> @test_fsubx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 { ; SM90-NEXT: ld.param.b32 %r1, [test_fsubx2_param_1]; ; SM90-NEXT: ld.param.b32 %r2, [test_fsubx2_param_0]; ; SM90-NEXT: sub.rn.bf16x2 %r3, %r2, %r1; -; SM90-NEXT: st.param.b32 [func_retval0+0], %r3; +; SM90-NEXT: st.param.b32 [func_retval0], %r3; ; SM90-NEXT: ret; %r = fsub <2 x bfloat> %a, %b ret <2 x bfloat> %r @@ -413,7 +413,7 @@ define <2 x bfloat> @test_fmulx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 { ; SM70-NEXT: selp.b32 %r22, %r21, %r20, %p2; ; SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs11}, %r22; } ; SM70-NEXT: mov.b32 %r23, {%rs11, %rs7}; -; SM70-NEXT: st.param.b32 [func_retval0+0], %r23; +; SM70-NEXT: st.param.b32 [func_retval0], %r23; ; SM70-NEXT: ret; ; ; SM80-LABEL: test_fmulx2( @@ -436,7 +436,7 @@ define <2 x bfloat> @test_fmulx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 { ; SM80-NEXT: mul.rn.f32 %f6, %f5, %f4; ; SM80-NEXT: cvt.rn.bf16.f32 %rs6, %f6; ; SM80-NEXT: mov.b32 %r3, {%rs6, %rs5}; -; SM80-NEXT: st.param.b32 [func_retval0+0], %r3; +; SM80-NEXT: st.param.b32 [func_retval0], %r3; ; SM80-NEXT: ret; ; ; SM80-FTZ-LABEL: test_fmulx2( @@ -459,7 +459,7 @@ define <2 x bfloat> @test_fmulx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 { ; SM80-FTZ-NEXT: mul.rn.ftz.f32 %f6, %f5, %f4; ; SM80-FTZ-NEXT: cvt.rn.bf16.f32 %rs6, %f6; ; SM80-FTZ-NEXT: mov.b32 %r3, {%rs6, %rs5}; -; SM80-FTZ-NEXT: st.param.b32 [func_retval0+0], %r3; +; SM80-FTZ-NEXT: st.param.b32 [func_retval0], %r3; ; SM80-FTZ-NEXT: ret; ; ; SM90-LABEL: test_fmulx2( @@ -470,7 +470,7 @@ define <2 x bfloat> @test_fmulx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 { ; SM90-NEXT: ld.param.b32 %r1, [test_fmulx2_param_1]; ; SM90-NEXT: ld.param.b32 %r2, [test_fmulx2_param_0]; ; SM90-NEXT: mul.rn.bf16x2 %r3, %r2, %r1; -; SM90-NEXT: st.param.b32 [func_retval0+0], %r3; +; SM90-NEXT: st.param.b32 [func_retval0], %r3; ; SM90-NEXT: ret; %r = fmul <2 x bfloat> %a, %b ret <2 x bfloat> %r @@ -520,7 +520,7 @@ define <2 x bfloat> @test_fdiv(<2 x bfloat> %a, <2 x bfloat> %b) #0 { ; SM70-NEXT: selp.b32 %r22, %r21, %r20, %p2; ; SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs11}, %r22; } ; SM70-NEXT: mov.b32 %r23, {%rs11, %rs7}; -; SM70-NEXT: st.param.b32 [func_retval0+0], %r23; +; SM70-NEXT: st.param.b32 [func_retval0], %r23; ; SM70-NEXT: ret; ; ; SM80-LABEL: test_fdiv( @@ -543,7 +543,7 @@ define <2 x bfloat> @test_fdiv(<2 x bfloat> %a, <2 x bfloat> %b) #0 { ; SM80-NEXT: div.rn.f32 %f6, %f5, %f4; ; SM80-NEXT: cvt.rn.bf16.f32 %rs6, %f6; ; SM80-NEXT: mov.b32 %r3, {%rs6, %rs5}; -; SM80-NEXT: st.param.b32 [func_retval0+0], %r3; +; SM80-NEXT: st.param.b32 [func_retval0], %r3; ; SM80-NEXT: ret; ; ; SM80-FTZ-LABEL: test_fdiv( @@ -566,7 +566,7 @@ define <2 x bfloat> @test_fdiv(<2 x bfloat> %a, <2 x bfloat> %b) #0 { ; SM80-FTZ-NEXT: div.rn.ftz.f32 %f6, %f5, %f4; ; SM80-FTZ-NEXT: cvt.rn.bf16.f32 %rs6, %f6; ; SM80-FTZ-NEXT: mov.b32 %r3, {%rs6, %rs5}; -; SM80-FTZ-NEXT: st.param.b32 [func_retval0+0], %r3; +; SM80-FTZ-NEXT: st.param.b32 [func_retval0], %r3; ; SM80-FTZ-NEXT: ret; ; ; SM90-LABEL: test_fdiv( @@ -589,7 +589,7 @@ define <2 x bfloat> @test_fdiv(<2 x bfloat> %a, <2 x bfloat> %b) #0 { ; SM90-NEXT: div.rn.f32 %f6, %f5, %f4; ; SM90-NEXT: cvt.rn.bf16.f32 %rs6, %f6; ; SM90-NEXT: mov.b32 %r3, {%rs6, %rs5}; -; SM90-NEXT: st.param.b32 [func_retval0+0], %r3; +; SM90-NEXT: st.param.b32 [func_retval0], %r3; ; SM90-NEXT: ret; %r = fdiv <2 x bfloat> %a, %b ret <2 x bfloat> %r @@ -602,7 +602,7 @@ define bfloat @test_extract_0(<2 x bfloat> %a) #0 { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b16 %rs1, [test_extract_0_param_0]; -; CHECK-NEXT: st.param.b16 [func_retval0+0], %rs1; +; CHECK-NEXT: st.param.b16 [func_retval0], %rs1; ; CHECK-NEXT: ret; %e = extractelement <2 x bfloat> %a, i32 0 ret bfloat %e @@ -615,7 +615,7 @@ define bfloat @test_extract_1(<2 x bfloat> %a) #0 { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b16 %rs1, [test_extract_1_param_0+2]; -; CHECK-NEXT: st.param.b16 [func_retval0+0], %rs1; +; CHECK-NEXT: st.param.b16 [func_retval0], %rs1; ; CHECK-NEXT: ret; %e = extractelement <2 x bfloat> %a, i32 1 ret bfloat %e @@ -631,7 +631,7 @@ define float @test_fpext_float(bfloat %a) #0 { ; SM70-NEXT: ld.param.u16 %r1, [test_fpext_float_param_0]; ; SM70-NEXT: shl.b32 %r2, %r1, 16; ; SM70-NEXT: mov.b32 %f1, %r2; -; SM70-NEXT: st.param.f32 [func_retval0+0], %f1; +; SM70-NEXT: st.param.f32 [func_retval0], %f1; ; SM70-NEXT: ret; ; ; SM80-LABEL: test_fpext_float( @@ -642,7 +642,7 @@ define float @test_fpext_float(bfloat %a) #0 { ; SM80-NEXT: // %bb.0: ; SM80-NEXT: ld.param.b16 %rs1, [test_fpext_float_param_0]; ; SM80-NEXT: cvt.f32.bf16 %f1, %rs1; -; SM80-NEXT: st.param.f32 [func_retval0+0], %f1; +; SM80-NEXT: st.param.f32 [func_retval0], %f1; ; SM80-NEXT: ret; ; ; SM80-FTZ-LABEL: test_fpext_float( @@ -653,7 +653,7 @@ define float @test_fpext_float(bfloat %a) #0 { ; SM80-FTZ-NEXT: // %bb.0: ; SM80-FTZ-NEXT: ld.param.b16 %rs1, [test_fpext_float_param_0]; ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f1, %rs1; -; SM80-FTZ-NEXT: st.param.f32 [func_retval0+0], %f1; +; SM80-FTZ-NEXT: st.param.f32 [func_retval0], %f1; ; SM80-FTZ-NEXT: ret; ; ; SM90-LABEL: test_fpext_float( @@ -664,7 +664,7 @@ define float @test_fpext_float(bfloat %a) #0 { ; SM90-NEXT: // %bb.0: ; SM90-NEXT: ld.param.b16 %rs1, [test_fpext_float_param_0]; ; SM90-NEXT: cvt.f32.bf16 %f1, %rs1; -; SM90-NEXT: st.param.f32 [func_retval0+0], %f1; +; SM90-NEXT: st.param.f32 [func_retval0], %f1; ; SM90-NEXT: ret; %r = fpext bfloat %a to float ret float %r @@ -688,7 +688,7 @@ define bfloat @test_fptrunc_float(float %a) #0 { ; SM70-NEXT: or.b32 %r5, %r1, 4194304; ; SM70-NEXT: selp.b32 %r6, %r5, %r4, %p1; ; SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r6; } -; SM70-NEXT: st.param.b16 [func_retval0+0], %rs1; +; SM70-NEXT: st.param.b16 [func_retval0], %rs1; ; SM70-NEXT: ret; ; ; SM80-LABEL: test_fptrunc_float( @@ -699,7 +699,7 @@ define bfloat @test_fptrunc_float(float %a) #0 { ; SM80-NEXT: // %bb.0: ; SM80-NEXT: ld.param.f32 %f1, [test_fptrunc_float_param_0]; ; SM80-NEXT: cvt.rn.bf16.f32 %rs1, %f1; -; SM80-NEXT: st.param.b16 [func_retval0+0], %rs1; +; SM80-NEXT: st.param.b16 [func_retval0], %rs1; ; SM80-NEXT: ret; ; ; SM80-FTZ-LABEL: test_fptrunc_float( @@ -710,7 +710,7 @@ define bfloat @test_fptrunc_float(float %a) #0 { ; SM80-FTZ-NEXT: // %bb.0: ; SM80-FTZ-NEXT: ld.param.f32 %f1, [test_fptrunc_float_param_0]; ; SM80-FTZ-NEXT: cvt.rn.bf16.f32 %rs1, %f1; -; SM80-FTZ-NEXT: st.param.b16 [func_retval0+0], %rs1; +; SM80-FTZ-NEXT: st.param.b16 [func_retval0], %rs1; ; SM80-FTZ-NEXT: ret; ; ; SM90-LABEL: test_fptrunc_float( @@ -721,7 +721,7 @@ define bfloat @test_fptrunc_float(float %a) #0 { ; SM90-NEXT: // %bb.0: ; SM90-NEXT: ld.param.f32 %f1, [test_fptrunc_float_param_0]; ; SM90-NEXT: cvt.rn.bf16.f32 %rs1, %f1; -; SM90-NEXT: st.param.b16 [func_retval0+0], %rs1; +; SM90-NEXT: st.param.b16 [func_retval0], %rs1; ; SM90-NEXT: ret; %r = fptrunc float %a to bfloat ret bfloat %r @@ -748,7 +748,7 @@ define bfloat @test_fadd_imm_1(bfloat %a) #0 { ; SM70-NEXT: or.b32 %r7, %r3, 4194304; ; SM70-NEXT: selp.b32 %r8, %r7, %r6, %p1; ; SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r8; } -; SM70-NEXT: st.param.b16 [func_retval0+0], %rs1; +; SM70-NEXT: st.param.b16 [func_retval0], %rs1; ; SM70-NEXT: ret; ; ; SM80-LABEL: test_fadd_imm_1( @@ -761,7 +761,7 @@ define bfloat @test_fadd_imm_1(bfloat %a) #0 { ; SM80-NEXT: cvt.f32.bf16 %f1, %rs1; ; SM80-NEXT: add.rn.f32 %f2, %f1, 0f3F800000; ; SM80-NEXT: cvt.rn.bf16.f32 %rs2, %f2; -; SM80-NEXT: st.param.b16 [func_retval0+0], %rs2; +; SM80-NEXT: st.param.b16 [func_retval0], %rs2; ; SM80-NEXT: ret; ; ; SM80-FTZ-LABEL: test_fadd_imm_1( @@ -774,7 +774,7 @@ define bfloat @test_fadd_imm_1(bfloat %a) #0 { ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f1, %rs1; ; SM80-FTZ-NEXT: add.rn.ftz.f32 %f2, %f1, 0f3F800000; ; SM80-FTZ-NEXT: cvt.rn.bf16.f32 %rs2, %f2; -; SM80-FTZ-NEXT: st.param.b16 [func_retval0+0], %rs2; +; SM80-FTZ-NEXT: st.param.b16 [func_retval0], %rs2; ; SM80-FTZ-NEXT: ret; ; ; SM90-LABEL: test_fadd_imm_1( @@ -785,7 +785,7 @@ define bfloat @test_fadd_imm_1(bfloat %a) #0 { ; SM90-NEXT: ld.param.b16 %rs1, [test_fadd_imm_1_param_0]; ; SM90-NEXT: mov.b16 %rs2, 0x3F80; ; SM90-NEXT: add.rn.bf16 %rs3, %rs1, %rs2; -; SM90-NEXT: st.param.b16 [func_retval0+0], %rs3; +; SM90-NEXT: st.param.b16 [func_retval0], %rs3; ; SM90-NEXT: ret; %r = fadd bfloat %a, 1.0 ret bfloat %r @@ -805,7 +805,7 @@ define bfloat @test_select_cc_bf16_f64(double %a, double %b, bfloat %c, bfloat % ; CHECK-NEXT: ld.param.b16 %rs1, [test_select_cc_bf16_f64_param_2]; ; CHECK-NEXT: ld.param.b16 %rs2, [test_select_cc_bf16_f64_param_3]; ; CHECK-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1; -; CHECK-NEXT: st.param.b16 [func_retval0+0], %rs3; +; CHECK-NEXT: st.param.b16 [func_retval0], %rs3; ; CHECK-NEXT: ret; %cc = fcmp olt double %a, %b %r = select i1 %cc, bfloat %c, bfloat %d @@ -851,7 +851,7 @@ define <8 x float> @test_extload_bf16x8(ptr addrspace(3) noundef %arg) #0 { ; SM70-NEXT: cvt.u32.u16 %r19, %rs1; ; SM70-NEXT: shl.b32 %r20, %r19, 16; ; SM70-NEXT: mov.b32 %f8, %r20; -; SM70-NEXT: st.param.v4.f32 [func_retval0+0], {%f8, %f7, %f6, %f5}; +; SM70-NEXT: st.param.v4.f32 [func_retval0], {%f8, %f7, %f6, %f5}; ; SM70-NEXT: st.param.v4.f32 [func_retval0+16], {%f4, %f3, %f2, %f1}; ; SM70-NEXT: ret; ; @@ -877,7 +877,7 @@ define <8 x float> @test_extload_bf16x8(ptr addrspace(3) noundef %arg) #0 { ; SM80-NEXT: cvt.f32.bf16 %f6, %rs3; ; SM80-NEXT: cvt.f32.bf16 %f7, %rs2; ; SM80-NEXT: cvt.f32.bf16 %f8, %rs1; -; SM80-NEXT: st.param.v4.f32 [func_retval0+0], {%f8, %f7, %f6, %f5}; +; SM80-NEXT: st.param.v4.f32 [func_retval0], {%f8, %f7, %f6, %f5}; ; SM80-NEXT: st.param.v4.f32 [func_retval0+16], {%f4, %f3, %f2, %f1}; ; SM80-NEXT: ret; ; @@ -903,7 +903,7 @@ define <8 x float> @test_extload_bf16x8(ptr addrspace(3) noundef %arg) #0 { ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f6, %rs3; ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f7, %rs2; ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f8, %rs1; -; SM80-FTZ-NEXT: st.param.v4.f32 [func_retval0+0], {%f8, %f7, %f6, %f5}; +; SM80-FTZ-NEXT: st.param.v4.f32 [func_retval0], {%f8, %f7, %f6, %f5}; ; SM80-FTZ-NEXT: st.param.v4.f32 [func_retval0+16], {%f4, %f3, %f2, %f1}; ; SM80-FTZ-NEXT: ret; ; @@ -929,7 +929,7 @@ define <8 x float> @test_extload_bf16x8(ptr addrspace(3) noundef %arg) #0 { ; SM90-NEXT: cvt.f32.bf16 %f6, %rs3; ; SM90-NEXT: cvt.f32.bf16 %f7, %rs2; ; SM90-NEXT: cvt.f32.bf16 %f8, %rs1; -; SM90-NEXT: st.param.v4.f32 [func_retval0+0], {%f8, %f7, %f6, %f5}; +; SM90-NEXT: st.param.v4.f32 [func_retval0], {%f8, %f7, %f6, %f5}; ; SM90-NEXT: st.param.v4.f32 [func_retval0+16], {%f4, %f3, %f2, %f1}; ; SM90-NEXT: ret; %load = load <8 x bfloat>, ptr addrspace(3) %arg, align 16 @@ -950,7 +950,7 @@ define i16 @test_fptosi_i16(bfloat %a) { ; SM70-NEXT: mov.b32 %f1, %r2; ; SM70-NEXT: cvt.rzi.s16.f32 %rs1, %f1; ; SM70-NEXT: cvt.u32.u16 %r3, %rs1; -; SM70-NEXT: st.param.b32 [func_retval0+0], %r3; +; SM70-NEXT: st.param.b32 [func_retval0], %r3; ; SM70-NEXT: ret; ; ; SM80-LABEL: test_fptosi_i16( @@ -964,7 +964,7 @@ define i16 @test_fptosi_i16(bfloat %a) { ; SM80-NEXT: cvt.f32.bf16 %f1, %rs1; ; SM80-NEXT: cvt.rzi.s16.f32 %rs2, %f1; ; SM80-NEXT: cvt.u32.u16 %r1, %rs2; -; SM80-NEXT: st.param.b32 [func_retval0+0], %r1; +; SM80-NEXT: st.param.b32 [func_retval0], %r1; ; SM80-NEXT: ret; ; ; SM80-FTZ-LABEL: test_fptosi_i16( @@ -978,7 +978,7 @@ define i16 @test_fptosi_i16(bfloat %a) { ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f1, %rs1; ; SM80-FTZ-NEXT: cvt.rzi.ftz.s16.f32 %rs2, %f1; ; SM80-FTZ-NEXT: cvt.u32.u16 %r1, %rs2; -; SM80-FTZ-NEXT: st.param.b32 [func_retval0+0], %r1; +; SM80-FTZ-NEXT: st.param.b32 [func_retval0], %r1; ; SM80-FTZ-NEXT: ret; ; ; SM90-LABEL: test_fptosi_i16( @@ -990,7 +990,7 @@ define i16 @test_fptosi_i16(bfloat %a) { ; SM90-NEXT: ld.param.b16 %rs1, [test_fptosi_i16_param_0]; ; SM90-NEXT: cvt.rzi.s16.bf16 %rs2, %rs1; ; SM90-NEXT: cvt.u32.u16 %r1, %rs2; -; SM90-NEXT: st.param.b32 [func_retval0+0], %r1; +; SM90-NEXT: st.param.b32 [func_retval0], %r1; ; SM90-NEXT: ret; %r = fptosi bfloat %a to i16 ret i16 %r @@ -1009,7 +1009,7 @@ define i16 @test_fptoui_i16(bfloat %a) { ; SM70-NEXT: mov.b32 %f1, %r2; ; SM70-NEXT: cvt.rzi.u16.f32 %rs1, %f1; ; SM70-NEXT: cvt.u32.u16 %r3, %rs1; -; SM70-NEXT: st.param.b32 [func_retval0+0], %r3; +; SM70-NEXT: st.param.b32 [func_retval0], %r3; ; SM70-NEXT: ret; ; ; SM80-LABEL: test_fptoui_i16( @@ -1023,7 +1023,7 @@ define i16 @test_fptoui_i16(bfloat %a) { ; SM80-NEXT: cvt.f32.bf16 %f1, %rs1; ; SM80-NEXT: cvt.rzi.u16.f32 %rs2, %f1; ; SM80-NEXT: cvt.u32.u16 %r1, %rs2; -; SM80-NEXT: st.param.b32 [func_retval0+0], %r1; +; SM80-NEXT: st.param.b32 [func_retval0], %r1; ; SM80-NEXT: ret; ; ; SM80-FTZ-LABEL: test_fptoui_i16( @@ -1037,7 +1037,7 @@ define i16 @test_fptoui_i16(bfloat %a) { ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f1, %rs1; ; SM80-FTZ-NEXT: cvt.rzi.ftz.u16.f32 %rs2, %f1; ; SM80-FTZ-NEXT: cvt.u32.u16 %r1, %rs2; -; SM80-FTZ-NEXT: st.param.b32 [func_retval0+0], %r1; +; SM80-FTZ-NEXT: st.param.b32 [func_retval0], %r1; ; SM80-FTZ-NEXT: ret; ; ; SM90-LABEL: test_fptoui_i16( @@ -1049,7 +1049,7 @@ define i16 @test_fptoui_i16(bfloat %a) { ; SM90-NEXT: ld.param.b16 %rs1, [test_fptoui_i16_param_0]; ; SM90-NEXT: cvt.rzi.u16.bf16 %rs2, %rs1; ; SM90-NEXT: cvt.u32.u16 %r1, %rs2; -; SM90-NEXT: st.param.b32 [func_retval0+0], %r1; +; SM90-NEXT: st.param.b32 [func_retval0], %r1; ; SM90-NEXT: ret; %r = fptoui bfloat %a to i16 ret i16 %r @@ -1074,7 +1074,7 @@ define bfloat @test_sitofp_i16(i16 %a) { ; SM70-NEXT: or.b32 %r5, %r1, 4194304; ; SM70-NEXT: selp.b32 %r6, %r5, %r4, %p1; ; SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs2}, %r6; } -; SM70-NEXT: st.param.b16 [func_retval0+0], %rs2; +; SM70-NEXT: st.param.b16 [func_retval0], %rs2; ; SM70-NEXT: ret; ; ; SM80-LABEL: test_sitofp_i16( @@ -1086,7 +1086,7 @@ define bfloat @test_sitofp_i16(i16 %a) { ; SM80-NEXT: ld.param.u16 %rs1, [test_sitofp_i16_param_0]; ; SM80-NEXT: cvt.rn.f32.s16 %f1, %rs1; ; SM80-NEXT: cvt.rn.bf16.f32 %rs2, %f1; -; SM80-NEXT: st.param.b16 [func_retval0+0], %rs2; +; SM80-NEXT: st.param.b16 [func_retval0], %rs2; ; SM80-NEXT: ret; ; ; SM80-FTZ-LABEL: test_sitofp_i16( @@ -1098,7 +1098,7 @@ define bfloat @test_sitofp_i16(i16 %a) { ; SM80-FTZ-NEXT: ld.param.u16 %rs1, [test_sitofp_i16_param_0]; ; SM80-FTZ-NEXT: cvt.rn.f32.s16 %f1, %rs1; ; SM80-FTZ-NEXT: cvt.rn.bf16.f32 %rs2, %f1; -; SM80-FTZ-NEXT: st.param.b16 [func_retval0+0], %rs2; +; SM80-FTZ-NEXT: st.param.b16 [func_retval0], %rs2; ; SM80-FTZ-NEXT: ret; ; ; SM90-LABEL: test_sitofp_i16( @@ -1108,7 +1108,7 @@ define bfloat @test_sitofp_i16(i16 %a) { ; SM90-NEXT: // %bb.0: ; SM90-NEXT: ld.param.u16 %rs1, [test_sitofp_i16_param_0]; ; SM90-NEXT: cvt.rn.bf16.s16 %rs2, %rs1; -; SM90-NEXT: st.param.b16 [func_retval0+0], %rs2; +; SM90-NEXT: st.param.b16 [func_retval0], %rs2; ; SM90-NEXT: ret; %r = sitofp i16 %a to bfloat ret bfloat %r @@ -1133,7 +1133,7 @@ define bfloat @test_uitofp_i8(i8 %a) { ; SM70-NEXT: or.b32 %r5, %r1, 4194304; ; SM70-NEXT: selp.b32 %r6, %r5, %r4, %p1; ; SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs2}, %r6; } -; SM70-NEXT: st.param.b16 [func_retval0+0], %rs2; +; SM70-NEXT: st.param.b16 [func_retval0], %rs2; ; SM70-NEXT: ret; ; ; SM80-LABEL: test_uitofp_i8( @@ -1145,7 +1145,7 @@ define bfloat @test_uitofp_i8(i8 %a) { ; SM80-NEXT: ld.param.u8 %rs1, [test_uitofp_i8_param_0]; ; SM80-NEXT: cvt.rn.f32.u16 %f1, %rs1; ; SM80-NEXT: cvt.rn.bf16.f32 %rs2, %f1; -; SM80-NEXT: st.param.b16 [func_retval0+0], %rs2; +; SM80-NEXT: st.param.b16 [func_retval0], %rs2; ; SM80-NEXT: ret; ; ; SM80-FTZ-LABEL: test_uitofp_i8( @@ -1157,7 +1157,7 @@ define bfloat @test_uitofp_i8(i8 %a) { ; SM80-FTZ-NEXT: ld.param.u8 %rs1, [test_uitofp_i8_param_0]; ; SM80-FTZ-NEXT: cvt.rn.f32.u16 %f1, %rs1; ; SM80-FTZ-NEXT: cvt.rn.bf16.f32 %rs2, %f1; -; SM80-FTZ-NEXT: st.param.b16 [func_retval0+0], %rs2; +; SM80-FTZ-NEXT: st.param.b16 [func_retval0], %rs2; ; SM80-FTZ-NEXT: ret; ; ; SM90-LABEL: test_uitofp_i8( @@ -1167,7 +1167,7 @@ define bfloat @test_uitofp_i8(i8 %a) { ; SM90-NEXT: // %bb.0: ; SM90-NEXT: ld.param.u8 %rs1, [test_uitofp_i8_param_0]; ; SM90-NEXT: cvt.rn.bf16.u16 %rs2, %rs1; -; SM90-NEXT: st.param.b16 [func_retval0+0], %rs2; +; SM90-NEXT: st.param.b16 [func_retval0], %rs2; ; SM90-NEXT: ret; %r = uitofp i8 %a to bfloat ret bfloat %r @@ -1195,7 +1195,7 @@ define bfloat @test_uitofp_i1(i1 %a) { ; SM70-NEXT: or.b32 %r6, %r2, 4194304; ; SM70-NEXT: selp.b32 %r7, %r6, %r5, %p2; ; SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs3}, %r7; } -; SM70-NEXT: st.param.b16 [func_retval0+0], %rs3; +; SM70-NEXT: st.param.b16 [func_retval0], %rs3; ; SM70-NEXT: ret; ; ; SM80-LABEL: test_uitofp_i1( @@ -1212,7 +1212,7 @@ define bfloat @test_uitofp_i1(i1 %a) { ; SM80-NEXT: selp.u32 %r1, 1, 0, %p1; ; SM80-NEXT: cvt.rn.f32.u32 %f1, %r1; ; SM80-NEXT: cvt.rn.bf16.f32 %rs3, %f1; -; SM80-NEXT: st.param.b16 [func_retval0+0], %rs3; +; SM80-NEXT: st.param.b16 [func_retval0], %rs3; ; SM80-NEXT: ret; ; ; SM80-FTZ-LABEL: test_uitofp_i1( @@ -1229,7 +1229,7 @@ define bfloat @test_uitofp_i1(i1 %a) { ; SM80-FTZ-NEXT: selp.u32 %r1, 1, 0, %p1; ; SM80-FTZ-NEXT: cvt.rn.f32.u32 %f1, %r1; ; SM80-FTZ-NEXT: cvt.rn.bf16.f32 %rs3, %f1; -; SM80-FTZ-NEXT: st.param.b16 [func_retval0+0], %rs3; +; SM80-FTZ-NEXT: st.param.b16 [func_retval0], %rs3; ; SM80-FTZ-NEXT: ret; ; ; SM90-LABEL: test_uitofp_i1( @@ -1244,7 +1244,7 @@ define bfloat @test_uitofp_i1(i1 %a) { ; SM90-NEXT: setp.eq.b16 %p1, %rs2, 1; ; SM90-NEXT: selp.u32 %r1, 1, 0, %p1; ; SM90-NEXT: cvt.rn.bf16.u32 %rs3, %r1; -; SM90-NEXT: st.param.b16 [func_retval0+0], %rs3; +; SM90-NEXT: st.param.b16 [func_retval0], %rs3; ; SM90-NEXT: ret; %r = uitofp i1 %a to bfloat ret bfloat %r @@ -1269,7 +1269,7 @@ define bfloat @test_uitofp_i16(i16 %a) { ; SM70-NEXT: or.b32 %r5, %r1, 4194304; ; SM70-NEXT: selp.b32 %r6, %r5, %r4, %p1; ; SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs2}, %r6; } -; SM70-NEXT: st.param.b16 [func_retval0+0], %rs2; +; SM70-NEXT: st.param.b16 [func_retval0], %rs2; ; SM70-NEXT: ret; ; ; SM80-LABEL: test_uitofp_i16( @@ -1281,7 +1281,7 @@ define bfloat @test_uitofp_i16(i16 %a) { ; SM80-NEXT: ld.param.u16 %rs1, [test_uitofp_i16_param_0]; ; SM80-NEXT: cvt.rn.f32.u16 %f1, %rs1; ; SM80-NEXT: cvt.rn.bf16.f32 %rs2, %f1; -; SM80-NEXT: st.param.b16 [func_retval0+0], %rs2; +; SM80-NEXT: st.param.b16 [func_retval0], %rs2; ; SM80-NEXT: ret; ; ; SM80-FTZ-LABEL: test_uitofp_i16( @@ -1293,7 +1293,7 @@ define bfloat @test_uitofp_i16(i16 %a) { ; SM80-FTZ-NEXT: ld.param.u16 %rs1, [test_uitofp_i16_param_0]; ; SM80-FTZ-NEXT: cvt.rn.f32.u16 %f1, %rs1; ; SM80-FTZ-NEXT: cvt.rn.bf16.f32 %rs2, %f1; -; SM80-FTZ-NEXT: st.param.b16 [func_retval0+0], %rs2; +; SM80-FTZ-NEXT: st.param.b16 [func_retval0], %rs2; ; SM80-FTZ-NEXT: ret; ; ; SM90-LABEL: test_uitofp_i16( @@ -1303,7 +1303,7 @@ define bfloat @test_uitofp_i16(i16 %a) { ; SM90-NEXT: // %bb.0: ; SM90-NEXT: ld.param.u16 %rs1, [test_uitofp_i16_param_0]; ; SM90-NEXT: cvt.rn.bf16.u16 %rs2, %rs1; -; SM90-NEXT: st.param.b16 [func_retval0+0], %rs2; +; SM90-NEXT: st.param.b16 [func_retval0], %rs2; ; SM90-NEXT: ret; %r = uitofp i16 %a to bfloat ret bfloat %r @@ -1328,7 +1328,7 @@ define bfloat @test_uitofp_i32(i32 %a) { ; SM70-NEXT: or.b32 %r6, %r2, 4194304; ; SM70-NEXT: selp.b32 %r7, %r6, %r5, %p1; ; SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r7; } -; SM70-NEXT: st.param.b16 [func_retval0+0], %rs1; +; SM70-NEXT: st.param.b16 [func_retval0], %rs1; ; SM70-NEXT: ret; ; ; SM80-LABEL: test_uitofp_i32( @@ -1341,7 +1341,7 @@ define bfloat @test_uitofp_i32(i32 %a) { ; SM80-NEXT: ld.param.u32 %r1, [test_uitofp_i32_param_0]; ; SM80-NEXT: cvt.rn.f32.u32 %f1, %r1; ; SM80-NEXT: cvt.rn.bf16.f32 %rs1, %f1; -; SM80-NEXT: st.param.b16 [func_retval0+0], %rs1; +; SM80-NEXT: st.param.b16 [func_retval0], %rs1; ; SM80-NEXT: ret; ; ; SM80-FTZ-LABEL: test_uitofp_i32( @@ -1354,7 +1354,7 @@ define bfloat @test_uitofp_i32(i32 %a) { ; SM80-FTZ-NEXT: ld.param.u32 %r1, [test_uitofp_i32_param_0]; ; SM80-FTZ-NEXT: cvt.rn.f32.u32 %f1, %r1; ; SM80-FTZ-NEXT: cvt.rn.bf16.f32 %rs1, %f1; -; SM80-FTZ-NEXT: st.param.b16 [func_retval0+0], %rs1; +; SM80-FTZ-NEXT: st.param.b16 [func_retval0], %rs1; ; SM80-FTZ-NEXT: ret; ; ; SM90-LABEL: test_uitofp_i32( @@ -1365,7 +1365,7 @@ define bfloat @test_uitofp_i32(i32 %a) { ; SM90-NEXT: // %bb.0: ; SM90-NEXT: ld.param.u32 %r1, [test_uitofp_i32_param_0]; ; SM90-NEXT: cvt.rn.bf16.u32 %rs1, %r1; -; SM90-NEXT: st.param.b16 [func_retval0+0], %rs1; +; SM90-NEXT: st.param.b16 [func_retval0], %rs1; ; SM90-NEXT: ret; %r = uitofp i32 %a to bfloat ret bfloat %r @@ -1391,7 +1391,7 @@ define bfloat @test_uitofp_i64(i64 %a) { ; SM70-NEXT: or.b32 %r5, %r1, 4194304; ; SM70-NEXT: selp.b32 %r6, %r5, %r4, %p1; ; SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r6; } -; SM70-NEXT: st.param.b16 [func_retval0+0], %rs1; +; SM70-NEXT: st.param.b16 [func_retval0], %rs1; ; SM70-NEXT: ret; ; ; SM80-LABEL: test_uitofp_i64( @@ -1404,7 +1404,7 @@ define bfloat @test_uitofp_i64(i64 %a) { ; SM80-NEXT: ld.param.u64 %rd1, [test_uitofp_i64_param_0]; ; SM80-NEXT: cvt.rn.f32.u64 %f1, %rd1; ; SM80-NEXT: cvt.rn.bf16.f32 %rs1, %f1; -; SM80-NEXT: st.param.b16 [func_retval0+0], %rs1; +; SM80-NEXT: st.param.b16 [func_retval0], %rs1; ; SM80-NEXT: ret; ; ; SM80-FTZ-LABEL: test_uitofp_i64( @@ -1417,7 +1417,7 @@ define bfloat @test_uitofp_i64(i64 %a) { ; SM80-FTZ-NEXT: ld.param.u64 %rd1, [test_uitofp_i64_param_0]; ; SM80-FTZ-NEXT: cvt.rn.f32.u64 %f1, %rd1; ; SM80-FTZ-NEXT: cvt.rn.bf16.f32 %rs1, %f1; -; SM80-FTZ-NEXT: st.param.b16 [func_retval0+0], %rs1; +; SM80-FTZ-NEXT: st.param.b16 [func_retval0], %rs1; ; SM80-FTZ-NEXT: ret; ; ; SM90-LABEL: test_uitofp_i64( @@ -1428,7 +1428,7 @@ define bfloat @test_uitofp_i64(i64 %a) { ; SM90-NEXT: // %bb.0: ; SM90-NEXT: ld.param.u64 %rd1, [test_uitofp_i64_param_0]; ; SM90-NEXT: cvt.rn.bf16.u64 %rs1, %rd1; -; SM90-NEXT: st.param.b16 [func_retval0+0], %rs1; +; SM90-NEXT: st.param.b16 [func_retval0], %rs1; ; SM90-NEXT: ret; %r = uitofp i64 %a to bfloat ret bfloat %r @@ -1455,7 +1455,7 @@ define bfloat @test_roundeven(bfloat %a) { ; SM70-NEXT: or.b32 %r7, %r3, 4194304; ; SM70-NEXT: selp.b32 %r8, %r7, %r6, %p1; ; SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r8; } -; SM70-NEXT: st.param.b16 [func_retval0+0], %rs1; +; SM70-NEXT: st.param.b16 [func_retval0], %rs1; ; SM70-NEXT: ret; ; ; SM80-LABEL: test_roundeven( @@ -1468,7 +1468,7 @@ define bfloat @test_roundeven(bfloat %a) { ; SM80-NEXT: cvt.f32.bf16 %f1, %rs1; ; SM80-NEXT: cvt.rni.f32.f32 %f2, %f1; ; SM80-NEXT: cvt.rn.bf16.f32 %rs2, %f2; -; SM80-NEXT: st.param.b16 [func_retval0+0], %rs2; +; SM80-NEXT: st.param.b16 [func_retval0], %rs2; ; SM80-NEXT: ret; ; ; SM80-FTZ-LABEL: test_roundeven( @@ -1481,7 +1481,7 @@ define bfloat @test_roundeven(bfloat %a) { ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f1, %rs1; ; SM80-FTZ-NEXT: cvt.rni.ftz.f32.f32 %f2, %f1; ; SM80-FTZ-NEXT: cvt.rn.bf16.f32 %rs2, %f2; -; SM80-FTZ-NEXT: st.param.b16 [func_retval0+0], %rs2; +; SM80-FTZ-NEXT: st.param.b16 [func_retval0], %rs2; ; SM80-FTZ-NEXT: ret; ; ; SM90-LABEL: test_roundeven( @@ -1491,7 +1491,7 @@ define bfloat @test_roundeven(bfloat %a) { ; SM90-NEXT: // %bb.0: ; SM90-NEXT: ld.param.b16 %rs1, [test_roundeven_param_0]; ; SM90-NEXT: cvt.rni.bf16.bf16 %rs2, %rs1; -; SM90-NEXT: st.param.b16 [func_retval0+0], %rs2; +; SM90-NEXT: st.param.b16 [func_retval0], %rs2; ; SM90-NEXT: ret; %r = call bfloat @llvm.roundeven.bf16(bfloat %a) ret bfloat %r @@ -1527,7 +1527,7 @@ define bfloat @test_maximum(bfloat %a, bfloat %b) { ; SM70-NEXT: mov.b32 %f3, %r6; ; SM70-NEXT: setp.eq.f32 %p5, %f3, 0f00000000; ; SM70-NEXT: selp.b16 %rs10, %rs8, %rs6, %p5; -; SM70-NEXT: st.param.b16 [func_retval0+0], %rs10; +; SM70-NEXT: st.param.b16 [func_retval0], %rs10; ; SM70-NEXT: ret; ; ; SM80-LABEL: test_maximum( @@ -1538,7 +1538,7 @@ define bfloat @test_maximum(bfloat %a, bfloat %b) { ; SM80-NEXT: ld.param.b16 %rs1, [test_maximum_param_0]; ; SM80-NEXT: ld.param.b16 %rs2, [test_maximum_param_1]; ; SM80-NEXT: max.NaN.bf16 %rs3, %rs1, %rs2; -; SM80-NEXT: st.param.b16 [func_retval0+0], %rs3; +; SM80-NEXT: st.param.b16 [func_retval0], %rs3; ; SM80-NEXT: ret; ; ; SM80-FTZ-LABEL: test_maximum( @@ -1549,7 +1549,7 @@ define bfloat @test_maximum(bfloat %a, bfloat %b) { ; SM80-FTZ-NEXT: ld.param.b16 %rs1, [test_maximum_param_0]; ; SM80-FTZ-NEXT: ld.param.b16 %rs2, [test_maximum_param_1]; ; SM80-FTZ-NEXT: max.NaN.bf16 %rs3, %rs1, %rs2; -; SM80-FTZ-NEXT: st.param.b16 [func_retval0+0], %rs3; +; SM80-FTZ-NEXT: st.param.b16 [func_retval0], %rs3; ; SM80-FTZ-NEXT: ret; ; ; SM90-LABEL: test_maximum( @@ -1560,7 +1560,7 @@ define bfloat @test_maximum(bfloat %a, bfloat %b) { ; SM90-NEXT: ld.param.b16 %rs1, [test_maximum_param_0]; ; SM90-NEXT: ld.param.b16 %rs2, [test_maximum_param_1]; ; SM90-NEXT: max.NaN.bf16 %rs3, %rs1, %rs2; -; SM90-NEXT: st.param.b16 [func_retval0+0], %rs3; +; SM90-NEXT: st.param.b16 [func_retval0], %rs3; ; SM90-NEXT: ret; %r = call bfloat @llvm.maximum.bf16(bfloat %a, bfloat %b) ret bfloat %r @@ -1590,7 +1590,7 @@ define bfloat @test_maxnum(bfloat %a, bfloat %b) { ; SM70-NEXT: or.b32 %r9, %r5, 4194304; ; SM70-NEXT: selp.b32 %r10, %r9, %r8, %p1; ; SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r10; } -; SM70-NEXT: st.param.b16 [func_retval0+0], %rs1; +; SM70-NEXT: st.param.b16 [func_retval0], %rs1; ; SM70-NEXT: ret; ; ; SM80-LABEL: test_maxnum( @@ -1601,7 +1601,7 @@ define bfloat @test_maxnum(bfloat %a, bfloat %b) { ; SM80-NEXT: ld.param.b16 %rs1, [test_maxnum_param_0]; ; SM80-NEXT: ld.param.b16 %rs2, [test_maxnum_param_1]; ; SM80-NEXT: max.bf16 %rs3, %rs1, %rs2; -; SM80-NEXT: st.param.b16 [func_retval0+0], %rs3; +; SM80-NEXT: st.param.b16 [func_retval0], %rs3; ; SM80-NEXT: ret; ; ; SM80-FTZ-LABEL: test_maxnum( @@ -1612,7 +1612,7 @@ define bfloat @test_maxnum(bfloat %a, bfloat %b) { ; SM80-FTZ-NEXT: ld.param.b16 %rs1, [test_maxnum_param_0]; ; SM80-FTZ-NEXT: ld.param.b16 %rs2, [test_maxnum_param_1]; ; SM80-FTZ-NEXT: max.bf16 %rs3, %rs1, %rs2; -; SM80-FTZ-NEXT: st.param.b16 [func_retval0+0], %rs3; +; SM80-FTZ-NEXT: st.param.b16 [func_retval0], %rs3; ; SM80-FTZ-NEXT: ret; ; ; SM90-LABEL: test_maxnum( @@ -1623,7 +1623,7 @@ define bfloat @test_maxnum(bfloat %a, bfloat %b) { ; SM90-NEXT: ld.param.b16 %rs1, [test_maxnum_param_0]; ; SM90-NEXT: ld.param.b16 %rs2, [test_maxnum_param_1]; ; SM90-NEXT: max.bf16 %rs3, %rs1, %rs2; -; SM90-NEXT: st.param.b16 [func_retval0+0], %rs3; +; SM90-NEXT: st.param.b16 [func_retval0], %rs3; ; SM90-NEXT: ret; %r = call bfloat @llvm.maxnum.bf16(bfloat %a, bfloat %b) ret bfloat %r @@ -1681,7 +1681,7 @@ define <2 x bfloat> @test_maximum_v2(<2 x bfloat> %a, <2 x bfloat> %b) { ; SM70-NEXT: setp.eq.f32 %p10, %f6, 0f00000000; ; SM70-NEXT: selp.b16 %rs20, %rs18, %rs16, %p10; ; SM70-NEXT: mov.b32 %r15, {%rs20, %rs12}; -; SM70-NEXT: st.param.b32 [func_retval0+0], %r15; +; SM70-NEXT: st.param.b32 [func_retval0], %r15; ; SM70-NEXT: ret; ; ; SM80-LABEL: test_maximum_v2( @@ -1692,7 +1692,7 @@ define <2 x bfloat> @test_maximum_v2(<2 x bfloat> %a, <2 x bfloat> %b) { ; SM80-NEXT: ld.param.b32 %r1, [test_maximum_v2_param_1]; ; SM80-NEXT: ld.param.b32 %r2, [test_maximum_v2_param_0]; ; SM80-NEXT: max.NaN.bf16x2 %r3, %r2, %r1; -; SM80-NEXT: st.param.b32 [func_retval0+0], %r3; +; SM80-NEXT: st.param.b32 [func_retval0], %r3; ; SM80-NEXT: ret; ; ; SM80-FTZ-LABEL: test_maximum_v2( @@ -1703,7 +1703,7 @@ define <2 x bfloat> @test_maximum_v2(<2 x bfloat> %a, <2 x bfloat> %b) { ; SM80-FTZ-NEXT: ld.param.b32 %r1, [test_maximum_v2_param_1]; ; SM80-FTZ-NEXT: ld.param.b32 %r2, [test_maximum_v2_param_0]; ; SM80-FTZ-NEXT: max.NaN.bf16x2 %r3, %r2, %r1; -; SM80-FTZ-NEXT: st.param.b32 [func_retval0+0], %r3; +; SM80-FTZ-NEXT: st.param.b32 [func_retval0], %r3; ; SM80-FTZ-NEXT: ret; ; ; SM90-LABEL: test_maximum_v2( @@ -1714,7 +1714,7 @@ define <2 x bfloat> @test_maximum_v2(<2 x bfloat> %a, <2 x bfloat> %b) { ; SM90-NEXT: ld.param.b32 %r1, [test_maximum_v2_param_1]; ; SM90-NEXT: ld.param.b32 %r2, [test_maximum_v2_param_0]; ; SM90-NEXT: max.NaN.bf16x2 %r3, %r2, %r1; -; SM90-NEXT: st.param.b32 [func_retval0+0], %r3; +; SM90-NEXT: st.param.b32 [func_retval0], %r3; ; SM90-NEXT: ret; %r = call <2 x bfloat> @llvm.maximum.bf16(<2 x bfloat> %a, <2 x bfloat> %b) ret <2 x bfloat> %r @@ -1764,7 +1764,7 @@ define <2 x bfloat> @test_maxnum_v2(<2 x bfloat> %a, <2 x bfloat> %b) { ; SM70-NEXT: selp.b32 %r22, %r21, %r20, %p2; ; SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs11}, %r22; } ; SM70-NEXT: mov.b32 %r23, {%rs11, %rs7}; -; SM70-NEXT: st.param.b32 [func_retval0+0], %r23; +; SM70-NEXT: st.param.b32 [func_retval0], %r23; ; SM70-NEXT: ret; ; ; SM80-LABEL: test_maxnum_v2( @@ -1775,7 +1775,7 @@ define <2 x bfloat> @test_maxnum_v2(<2 x bfloat> %a, <2 x bfloat> %b) { ; SM80-NEXT: ld.param.b32 %r1, [test_maxnum_v2_param_1]; ; SM80-NEXT: ld.param.b32 %r2, [test_maxnum_v2_param_0]; ; SM80-NEXT: max.bf16x2 %r3, %r2, %r1; -; SM80-NEXT: st.param.b32 [func_retval0+0], %r3; +; SM80-NEXT: st.param.b32 [func_retval0], %r3; ; SM80-NEXT: ret; ; ; SM80-FTZ-LABEL: test_maxnum_v2( @@ -1786,7 +1786,7 @@ define <2 x bfloat> @test_maxnum_v2(<2 x bfloat> %a, <2 x bfloat> %b) { ; SM80-FTZ-NEXT: ld.param.b32 %r1, [test_maxnum_v2_param_1]; ; SM80-FTZ-NEXT: ld.param.b32 %r2, [test_maxnum_v2_param_0]; ; SM80-FTZ-NEXT: max.bf16x2 %r3, %r2, %r1; -; SM80-FTZ-NEXT: st.param.b32 [func_retval0+0], %r3; +; SM80-FTZ-NEXT: st.param.b32 [func_retval0], %r3; ; SM80-FTZ-NEXT: ret; ; ; SM90-LABEL: test_maxnum_v2( @@ -1797,7 +1797,7 @@ define <2 x bfloat> @test_maxnum_v2(<2 x bfloat> %a, <2 x bfloat> %b) { ; SM90-NEXT: ld.param.b32 %r1, [test_maxnum_v2_param_1]; ; SM90-NEXT: ld.param.b32 %r2, [test_maxnum_v2_param_0]; ; SM90-NEXT: max.bf16x2 %r3, %r2, %r1; -; SM90-NEXT: st.param.b32 [func_retval0+0], %r3; +; SM90-NEXT: st.param.b32 [func_retval0], %r3; ; SM90-NEXT: ret; %r = call <2 x bfloat> @llvm.maxnum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b) ret <2 x bfloat> %r diff --git a/llvm/test/CodeGen/NVPTX/bf16x2-instructions-approx.ll b/llvm/test/CodeGen/NVPTX/bf16x2-instructions-approx.ll index f61205eb88fc24..a53c90ac6db8b6 100644 --- a/llvm/test/CodeGen/NVPTX/bf16x2-instructions-approx.ll +++ b/llvm/test/CodeGen/NVPTX/bf16x2-instructions-approx.ll @@ -16,7 +16,7 @@ declare <2 x bfloat> @llvm.cos.f16(<2 x bfloat> %a) #0 ; CHECK-DAG: cvt.rn.bf16.f32 [[R0:%rs[0-9]+]], [[RF0]]; ; CHECK-DAG: cvt.rn.bf16.f32 [[R1:%rs[0-9]+]], [[RF1]]; ; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define <2 x bfloat> @test_sin(<2 x bfloat> %a) #0 #1 { %r = call <2 x bfloat> @llvm.sin.f16(<2 x bfloat> %a) @@ -33,7 +33,7 @@ define <2 x bfloat> @test_sin(<2 x bfloat> %a) #0 #1 { ; CHECK-DAG: cvt.rn.bf16.f32 [[R0:%rs[0-9]+]], [[RF0]]; ; CHECK-DAG: cvt.rn.bf16.f32 [[R1:%rs[0-9]+]], [[RF1]]; ; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define <2 x bfloat> @test_cos(<2 x bfloat> %a) #0 #1 { %r = call <2 x bfloat> @llvm.cos.f16(<2 x bfloat> %a) diff --git a/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll index 8d40a9ef54dca9..925ae4245a4c20 100644 --- a/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll @@ -7,7 +7,7 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" ; CHECK-LABEL: test_ret_const( ; CHECK: mov.b32 [[T:%r[0-9+]]], 1073758080; -; CHECK: st.param.b32 [func_retval0+0], [[T]]; +; CHECK: st.param.b32 [func_retval0], [[T]]; ; CHECK-NEXT: ret; define <2 x bfloat> @test_ret_const() #0 { @@ -30,7 +30,7 @@ define <2 x bfloat> @test_ret_const() #0 { ; SM80-DAG: cvt.rn.bf16.f32 [[R1:%rs[0-9]+]], [[FR1]] ; SM80-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; -; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define <2 x bfloat> @test_fadd_imm_0(<2 x bfloat> %a) #0 { @@ -47,7 +47,7 @@ define <2 x bfloat> @test_fadd_imm_0(<2 x bfloat> %a) #0 { ; SM80: add.rn.f32 [[FR:%f[0-9]+]], [[FA]], 0f3F800000; ; SM80: cvt.rn.bf16.f32 [[R:%rs[0-9]+]], [[FR]]; -; CHECK: st.param.b16 [func_retval0+0], [[R]]; +; CHECK: st.param.b16 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define bfloat @test_fadd_imm_1(bfloat %a) #0 { @@ -72,7 +72,7 @@ define bfloat @test_fadd_imm_1(bfloat %a) #0 { ; SM80-DAG: cvt.rn.bf16.f32 [[R1:%rs[0-9]+]], [[FR1]]; ; SM80: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}; -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define <2 x bfloat> @test_fsubx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 { @@ -97,7 +97,7 @@ define <2 x bfloat> @test_fsubx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 { ; SM80-DAG: cvt.rn.bf16.f32 [[R1:%rs[0-9]+]], [[FR1]]; ; SM80: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}; -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define <2 x bfloat> @test_fmulx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 { @@ -119,7 +119,7 @@ define <2 x bfloat> @test_fmulx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 { ; CHECK-DAG: cvt.rn.bf16.f32 [[R0:%rs[0-9]+]], [[FR0]]; ; CHECK-DAG: cvt.rn.bf16.f32 [[R1:%rs[0-9]+]], [[FR1]]; ; CHECK-NEXT: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define <2 x bfloat> @test_fdiv(<2 x bfloat> %a, <2 x bfloat> %b) #0 { @@ -131,7 +131,7 @@ define <2 x bfloat> @test_fdiv(<2 x bfloat> %a, <2 x bfloat> %b) #0 { ; CHECK-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_fneg_param_0]; ; CHECK-DAG: xor.b32 [[IHH0:%r[0-9]+]], [[A]], -2147450880; -; CHECK-NEXT: st.param.b32 [func_retval0+0], [[IHH0]]; +; CHECK-NEXT: st.param.b32 [func_retval0], [[IHH0]]; ; CHECK-NEXT: ret; define <2 x bfloat> @test_fneg(<2 x bfloat> %a) #0 { %r = fneg <2 x bfloat> %a @@ -175,15 +175,15 @@ declare <2 x bfloat> @test_callee(<2 x bfloat> %a, <2 x bfloat> %b) #0 ; CHECK: { ; CHECK-DAG: .param .align 4 .b8 param0[4]; ; CHECK-DAG: .param .align 4 .b8 param1[4]; -; CHECK-DAG: st.param.b32 [param0+0], [[A]]; -; CHECK-DAG: st.param.b32 [param1+0], [[B]]; +; CHECK-DAG: st.param.b32 [param0], [[A]]; +; CHECK-DAG: st.param.b32 [param1], [[B]]; ; CHECK-DAG: .param .align 4 .b8 retval0[4]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_callee, ; CHECK: ); -; CHECK-NEXT: ld.param.b32 [[R:%r[0-9]+]], [retval0+0]; +; CHECK-NEXT: ld.param.b32 [[R:%r[0-9]+]], [retval0]; ; CHECK-NEXT: } -; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define <2 x bfloat> @test_call(<2 x bfloat> %a, <2 x bfloat> %b) #0 { @@ -197,7 +197,7 @@ define <2 x bfloat> @test_call(<2 x bfloat> %a, <2 x bfloat> %b) #0 { ; CHECK-DAG: ld.param.u8 [[C:%rs[0-9]+]], [test_select_param_2] ; CHECK-DAG: setp.eq.b16 [[PRED:%p[0-9]+]], %rs{{.*}}, 1; ; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], [[A]], [[B]], [[PRED]]; -; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define <2 x bfloat> @test_select(<2 x bfloat> %a, <2 x bfloat> %b, i1 zeroext %c) #0 { @@ -227,7 +227,7 @@ define <2 x bfloat> @test_select(<2 x bfloat> %a, <2 x bfloat> %b, i1 zeroext %c ; CHECK-DAG: selp.b16 [[R0:%rs[0-9]+]], [[A0]], [[B0]], [[P0]]; ; CHECK-DAG: selp.b16 [[R1:%rs[0-9]+]], [[A1]], [[B1]], [[P1]]; ; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define <2 x bfloat> @test_select_cc(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c, <2 x bfloat> %d) #0 { @@ -255,7 +255,7 @@ define <2 x bfloat> @test_select_cc(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloa ; ; CHECK-DAG: selp.f32 [[R0:%f[0-9]+]], [[A0]], [[B0]], [[P0]]; ; CHECK-DAG: selp.f32 [[R1:%f[0-9]+]], [[A1]], [[B1]], [[P1]]; -; CHECK-NEXT: st.param.v2.f32 [func_retval0+0], {[[R0]], [[R1]]}; +; CHECK-NEXT: st.param.v2.f32 [func_retval0], {[[R0]], [[R1]]}; ; CHECK-NEXT: ret; define <2 x float> @test_select_cc_f32_bf16(<2 x float> %a, <2 x float> %b, <2 x bfloat> %c, <2 x bfloat> %d) #0 { @@ -276,7 +276,7 @@ define <2 x float> @test_select_cc_f32_bf16(<2 x float> %a, <2 x float> %b, ; CHECK-DAG: selp.b16 [[R0:%rs[0-9]+]], [[A0]], [[B0]], [[P0]]; ; CHECK-DAG: selp.b16 [[R1:%rs[0-9]+]], [[A1]], [[B1]], [[P1]]; ; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define <2 x bfloat> @test_select_cc_bf16_f32(<2 x bfloat> %a, <2 x bfloat> %b, <2 x float> %c, <2 x float> %d) #0 { @@ -290,7 +290,7 @@ define <2 x bfloat> @test_select_cc_bf16_f32(<2 x bfloat> %a, <2 x bfloat> %b, ; CHECK-DAG: cvt.rn.bf16.f32 [[R0:%rs[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.rn.bf16.f32 [[R1:%rs[0-9]+]], [[A1]]; ; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define <2 x bfloat> @test_fptrunc_2xfloat(<2 x float> %a) #0 { %r = fptrunc <2 x float> %a to <2 x bfloat> @@ -302,7 +302,7 @@ define <2 x bfloat> @test_fptrunc_2xfloat(<2 x float> %a) #0 { ; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: cvt.f32.bf16 [[R0:%f[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.f32.bf16 [[R1:%f[0-9]+]], [[A1]]; -; CHECK-NEXT: st.param.v2.f32 [func_retval0+0], {[[R0]], [[R1]]}; +; CHECK-NEXT: st.param.v2.f32 [func_retval0], {[[R0]], [[R1]]}; ; CHECK: ret; define <2 x float> @test_fpext_2xfloat(<2 x bfloat> %a) #0 { %r = fpext <2 x bfloat> %a to <2 x float> @@ -311,7 +311,7 @@ define <2 x float> @test_fpext_2xfloat(<2 x bfloat> %a) #0 { ; CHECK-LABEL: test_bitcast_2xbf16_to_2xi16( ; CHECK: ld.param.u32 [[A:%r[0-9]+]], [test_bitcast_2xbf16_to_2xi16_param_0]; -; CHECK: st.param.b32 [func_retval0+0], [[A]] +; CHECK: st.param.b32 [func_retval0], [[A]] ; CHECK: ret; define <2 x i16> @test_bitcast_2xbf16_to_2xi16(<2 x bfloat> %a) #0 { %r = bitcast <2 x bfloat> %a to <2 x i16> @@ -321,7 +321,7 @@ define <2 x i16> @test_bitcast_2xbf16_to_2xi16(<2 x bfloat> %a) #0 { ; CHECK-LABEL: test_bitcast_2xi16_to_2xbf16( ; CHECK: ld.param.b32 [[R]], [test_bitcast_2xi16_to_2xbf16_param_0]; -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define <2 x bfloat> @test_bitcast_2xi16_to_2xbf16(<2 x i16> %a) #0 { %r = bitcast <2 x i16> %a to <2 x bfloat> @@ -362,7 +362,7 @@ declare <2 x bfloat> @llvm.fmuladd.f16(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bf ; CHECK-DAG: cvt.rn.bf16.f32 [[R0:%rs[0-9]+]], [[RF0]]; ; CHECK-DAG: cvt.rn.bf16.f32 [[R1:%rs[0-9]+]], [[RF1]]; ; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define <2 x bfloat> @test_sqrt(<2 x bfloat> %a) #0 { %r = call <2 x bfloat> @llvm.sqrt.f16(<2 x bfloat> %a) @@ -375,7 +375,7 @@ define <2 x bfloat> @test_sqrt(<2 x bfloat> %a) #0 { ; CHECK-DAG: ld.param.b32 [[C:%r[0-9]+]], [test_fmuladd_param_2]; ; ; CHECK: fma.rn.bf16x2 [[RA:%r[0-9]+]], [[A]], [[B]], [[C]]; -; CHECK-NEXT: st.param.b32 [func_retval0+0], [[RA]]; +; CHECK-NEXT: st.param.b32 [func_retval0], [[RA]]; ; CHECK: ret; define <2 x bfloat> @test_fmuladd(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c) #0 { %r = call <2 x bfloat> @llvm.fmuladd.f16(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c) @@ -385,7 +385,7 @@ define <2 x bfloat> @test_fmuladd(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> ; CHECK-LABEL: test_fabs( ; CHECK: ld.param.u32 [[A:%r[0-9]+]], [test_fabs_param_0]; ; CHECK: and.b32 [[R:%r[0-9]+]], [[A]], 2147450879; -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define <2 x bfloat> @test_fabs(<2 x bfloat> %a) #0 { %r = call <2 x bfloat> @llvm.fabs.f16(<2 x bfloat> %a) @@ -407,7 +407,7 @@ define <2 x bfloat> @test_fabs_add(<2 x bfloat> %a, <2 x bfloat> %b) #0 { ; CHECK-DAG: ld.param.b32 [[AF0:%r[0-9]+]], [test_minnum_param_0]; ; CHECK-DAG: ld.param.b32 [[BF0:%r[0-9]+]], [test_minnum_param_1]; ; CHECK-DAG: min.bf16x2 [[RF0:%r[0-9]+]], [[AF0]], [[BF0]]; -; CHECK: st.param.b32 [func_retval0+0], [[RF0]]; +; CHECK: st.param.b32 [func_retval0], [[RF0]]; ; CHECK: ret; define <2 x bfloat> @test_minnum(<2 x bfloat> %a, <2 x bfloat> %b) #0 { %r = call <2 x bfloat> @llvm.minnum.f16(<2 x bfloat> %a, <2 x bfloat> %b) @@ -418,7 +418,7 @@ define <2 x bfloat> @test_minnum(<2 x bfloat> %a, <2 x bfloat> %b) #0 { ; CHECK-DAG: ld.param.b32 [[AF0:%r[0-9]+]], [test_maxnum_param_0]; ; CHECK-DAG: ld.param.b32 [[BF0:%r[0-9]+]], [test_maxnum_param_1]; ; CHECK-DAG: max.bf16x2 [[RF0:%r[0-9]+]], [[AF0]], [[BF0]]; -; CHECK: st.param.b32 [func_retval0+0], [[RF0]]; +; CHECK: st.param.b32 [func_retval0], [[RF0]]; ; CHECK: ret; define <2 x bfloat> @test_maxnum(<2 x bfloat> %a, <2 x bfloat> %b) #0 { %r = call <2 x bfloat> @llvm.maxnum.f16(<2 x bfloat> %a, <2 x bfloat> %b) @@ -439,7 +439,7 @@ define <2 x bfloat> @test_maxnum(<2 x bfloat> %a, <2 x bfloat> %b) #0 { ; SM80-DAG: cvt.rn.bf16.f32 [[R0:%rs[0-9]+]], [[RF0]]; ; SM80-DAG: cvt.rn.bf16.f32 [[R1:%rs[0-9]+]], [[RF1]]; ; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define <2 x bfloat> @test_floor(<2 x bfloat> %a) #0 { %r = call <2 x bfloat> @llvm.floor.f16(<2 x bfloat> %a) @@ -458,7 +458,7 @@ define <2 x bfloat> @test_floor(<2 x bfloat> %a) #0 { ; SM80-DAG: cvt.rn.bf16.f32 [[R0:%rs[0-9]+]], [[RF0]]; ; SM80-DAG: cvt.rn.bf16.f32 [[R1:%rs[0-9]+]], [[RF1]]; ; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define <2 x bfloat> @test_ceil(<2 x bfloat> %a) #0 { %r = call <2 x bfloat> @llvm.ceil.f16(<2 x bfloat> %a) @@ -471,7 +471,7 @@ define <2 x bfloat> @test_ceil(<2 x bfloat> %a) #0 { ; SM90: cvt.rzi.bf16.bf16 [[R1:%rs[0-9]+]], [[A1]]; ; SM90: cvt.rzi.bf16.bf16 [[R0:%rs[0-9]+]], [[A0]]; ; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define <2 x bfloat> @test_trunc(<2 x bfloat> %a) #0 { %r = call <2 x bfloat> @llvm.trunc.f16(<2 x bfloat> %a) @@ -484,7 +484,7 @@ define <2 x bfloat> @test_trunc(<2 x bfloat> %a) #0 { ; SM90: cvt.rni.bf16.bf16 [[R1:%rs[0-9]+]], [[A1]]; ; SM90: cvt.rni.bf16.bf16 [[R0:%rs[0-9]+]], [[A0]]; ; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define <2 x bfloat> @test_rint(<2 x bfloat> %a) #0 { %r = call <2 x bfloat> @llvm.rint.f16(<2 x bfloat> %a) @@ -498,7 +498,7 @@ define <2 x bfloat> @test_rint(<2 x bfloat> %a) #0 { ; CHECK: or.b32 {{.*}}, [[R1]], 1056964608; ; CHECK: and.b32 [[R2:%r[0-9]+]], {{.*}}, -2147483648; ; CHECK: or.b32 {{.*}}, [[R2]], 1056964608; -; CHECK: st.param.b32 [func_retval0+0], {{.*}}; +; CHECK: st.param.b32 [func_retval0], {{.*}}; ; CHECK: ret; define <2 x bfloat> @test_round(<2 x bfloat> %a) #0 { %r = call <2 x bfloat> @llvm.round.f16(<2 x bfloat> %a) @@ -526,7 +526,7 @@ define <2 x bfloat> @test_round(<2 x bfloat> %a) #0 { ; SM90-DAG: and.b32 [[R1:%r[0-9]+]], [[B]], -2147450880; ; SM90-DAG: and.b32 [[R2:%r[0-9]+]], [[A]], 2147450879; ; SM90-DAG: or.b32 [[R:%r[0-9]+]], [[R2]], [[R1]]; -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define <2 x bfloat> @test_copysign(<2 x bfloat> %a, <2 x bfloat> %b) #0 { %r = call <2 x bfloat> @llvm.copysign.f16(<2 x bfloat> %a, <2 x bfloat> %b) diff --git a/llvm/test/CodeGen/NVPTX/bswap.ll b/llvm/test/CodeGen/NVPTX/bswap.ll index 3f929ec6a75d0a..461cecf57270eb 100644 --- a/llvm/test/CodeGen/NVPTX/bswap.ll +++ b/llvm/test/CodeGen/NVPTX/bswap.ll @@ -16,7 +16,7 @@ define i16 @bswap16(i16 %a) { ; CHECK-NEXT: shl.b16 %rs3, %rs1, 8; ; CHECK-NEXT: or.b16 %rs4, %rs3, %rs2; ; CHECK-NEXT: cvt.u32.u16 %r1, %rs4; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; ; CHECK-NEXT: ret; %b = tail call i16 @llvm.bswap.i16(i16 %a) ret i16 %b @@ -31,7 +31,7 @@ define i32 @bswap32(i32 %a) { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.u32 %r1, [bswap32_param_0]; ; CHECK-NEXT: prmt.b32 %r2, %r1, 0, 291; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %b = tail call i32 @llvm.bswap.i32(i32 %a) ret i32 %b @@ -46,7 +46,7 @@ define <2 x i16> @bswapv2i16(<2 x i16> %a) #0 { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.u32 %r1, [bswapv2i16_param_0]; ; CHECK-NEXT: prmt.b32 %r2, %r1, 0, 8961; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %b = tail call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %a) ret <2 x i16> %b @@ -65,7 +65,7 @@ define i64 @bswap64(i64 %a) { ; CHECK-NEXT: { .reg .b32 tmp; mov.b64 {tmp, %r3}, %rd1; } ; CHECK-NEXT: prmt.b32 %r4, %r3, 0, 291; ; CHECK-NEXT: mov.b64 %rd2, {%r4, %r2}; -; CHECK-NEXT: st.param.b64 [func_retval0+0], %rd2; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd2; ; CHECK-NEXT: ret; %b = tail call i64 @llvm.bswap.i64(i64 %a) ret i64 %b diff --git a/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll b/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll index 3fbed871850bc3..0ce9a58b2e6ecb 100644 --- a/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll +++ b/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll @@ -45,9 +45,9 @@ entry: store float %3, ptr %arrayidx7, align 4 ; CHECK: .param .b64 param0; -; CHECK-NEXT: st.param.b64 [param0+0], %rd[[A_REG]] +; CHECK-NEXT: st.param.b64 [param0], %rd[[A_REG]] ; CHECK-NEXT: .param .b64 param1; -; CHECK-NEXT: st.param.b64 [param1+0], %rd[[SP_REG]] +; CHECK-NEXT: st.param.b64 [param1], %rd[[SP_REG]] ; CHECK-NEXT: call.uni ; CHECK-NEXT: callee, diff --git a/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll b/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll index bd723a296e620f..5cf70a6aea5c22 100644 --- a/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll +++ b/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll @@ -14,7 +14,7 @@ target triple = "nvptx64-nvidia-cuda" %complex_half = type { half, half } ; CHECK: .param .align 2 .b8 param2[4]; -; CHECK: st.param.b16 [param2+0], %rs1; +; CHECK: st.param.b16 [param2], %rs1; ; CHECK: st.param.b16 [param2+2], %rs2; ; CHECK: .param .align 2 .b8 retval0[4]; ; CHECK-NEXT: prototype_0 : .callprototype (.param .align 2 .b8 _[4]) _ (.param .b32 _, .param .b32 _, .param .align 2 .b8 _[4]); @@ -37,7 +37,7 @@ define internal void @callee(ptr byval(%"class.complex") %byval_arg) { define void @boom() { %fp = call ptr @usefp(ptr @callee) ; CHECK: .param .align 2 .b8 param0[4]; - ; CHECK: st.param.b16 [param0+0], %rs1; + ; CHECK: st.param.b16 [param0], %rs1; ; CHECK: st.param.b16 [param0+2], %rs2; ; CHECK: .callprototype ()_ (.param .align 2 .b8 _[4]); call void %fp(ptr byval(%"class.complex") null) diff --git a/llvm/test/CodeGen/NVPTX/chain-different-as.ll b/llvm/test/CodeGen/NVPTX/chain-different-as.ll index 18d06647cfe05f..293281e17dd36a 100644 --- a/llvm/test/CodeGen/NVPTX/chain-different-as.ll +++ b/llvm/test/CodeGen/NVPTX/chain-different-as.ll @@ -11,7 +11,7 @@ define i64 @test() nounwind readnone { ; CHECK-NEXT: mov.u64 %rd2, 42; ; CHECK-NEXT: st.u64 [%rd1], %rd2; ; CHECK-NEXT: ld.global.u64 %rd3, [%rd1]; -; CHECK-NEXT: st.param.b64 [func_retval0+0], %rd3; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; ; CHECK-NEXT: ret; %addr0 = inttoptr i64 1 to ptr %addr1 = inttoptr i64 1 to ptr addrspace(1) diff --git a/llvm/test/CodeGen/NVPTX/cmpxchg.ll b/llvm/test/CodeGen/NVPTX/cmpxchg.ll index 85ae5f0c8f6013..f7cc32b962b9c8 100644 --- a/llvm/test/CodeGen/NVPTX/cmpxchg.ll +++ b/llvm/test/CodeGen/NVPTX/cmpxchg.ll @@ -47,7 +47,7 @@ define i8 @relaxed_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM30-NEXT: mov.u32 %r20, %r8; ; SM30-NEXT: @%p2 bra $L__BB0_1; ; SM30-NEXT: $L__BB0_3: // %partword.cmpxchg.end -; SM30-NEXT: st.param.b32 [func_retval0+0], %r13; +; SM30-NEXT: st.param.b32 [func_retval0], %r13; ; SM30-NEXT: ret; ; ; SM70-LABEL: relaxed_sys_i8( @@ -87,7 +87,7 @@ define i8 @relaxed_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: @%p2 bra $L__BB0_1; ; SM70-NEXT: $L__BB0_3: // %partword.cmpxchg.end ; SM70-NEXT: cvt.u32.u16 %r2, %rs9; -; SM70-NEXT: st.param.b32 [func_retval0+0], %r2; +; SM70-NEXT: st.param.b32 [func_retval0], %r2; ; SM70-NEXT: ret; %pairold = cmpxchg ptr %addr, i8 %cmp, i8 %new seq_cst seq_cst ret i8 %new @@ -132,7 +132,7 @@ define i16 @relaxed_sys_i16(ptr %addr, i16 %cmp, i16 %new) { ; SM30-NEXT: mov.u32 %r19, %r8; ; SM30-NEXT: @%p2 bra $L__BB1_1; ; SM30-NEXT: $L__BB1_3: // %partword.cmpxchg.end -; SM30-NEXT: st.param.b32 [func_retval0+0], %r14; +; SM30-NEXT: st.param.b32 [func_retval0], %r14; ; SM30-NEXT: ret; ; ; SM70-LABEL: relaxed_sys_i16( @@ -147,7 +147,7 @@ define i16 @relaxed_sys_i16(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: ld.param.u16 %rs2, [relaxed_sys_i16_param_2]; ; SM70-NEXT: atom.cas.b16 %rs3, [%rd1], %rs1, %rs2; ; SM70-NEXT: cvt.u32.u16 %r1, %rs2; -; SM70-NEXT: st.param.b32 [func_retval0+0], %r1; +; SM70-NEXT: st.param.b32 [func_retval0], %r1; ; SM70-NEXT: ret; %pairold = cmpxchg ptr %addr, i16 %cmp, i16 %new seq_cst seq_cst ret i16 %new @@ -165,7 +165,7 @@ define i32 @relaxed_sys_i32(ptr %addr, i32 %cmp, i32 %new) { ; SM30-NEXT: ld.param.u32 %r1, [relaxed_sys_i32_param_1]; ; SM30-NEXT: ld.param.u32 %r2, [relaxed_sys_i32_param_2]; ; SM30-NEXT: atom.cas.b32 %r3, [%rd1], %r1, %r2; -; SM30-NEXT: st.param.b32 [func_retval0+0], %r2; +; SM30-NEXT: st.param.b32 [func_retval0], %r2; ; SM30-NEXT: ret; ; ; SM70-LABEL: relaxed_sys_i32( @@ -178,7 +178,7 @@ define i32 @relaxed_sys_i32(ptr %addr, i32 %cmp, i32 %new) { ; SM70-NEXT: ld.param.u32 %r1, [relaxed_sys_i32_param_1]; ; SM70-NEXT: ld.param.u32 %r2, [relaxed_sys_i32_param_2]; ; SM70-NEXT: atom.cas.b32 %r3, [%rd1], %r1, %r2; -; SM70-NEXT: st.param.b32 [func_retval0+0], %r2; +; SM70-NEXT: st.param.b32 [func_retval0], %r2; ; SM70-NEXT: ret; %pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new seq_cst seq_cst ret i32 %new @@ -195,7 +195,7 @@ define i64 @relaxed_sys_i64(ptr %addr, i64 %cmp, i64 %new) { ; SM30-NEXT: ld.param.u64 %rd2, [relaxed_sys_i64_param_1]; ; SM30-NEXT: ld.param.u64 %rd3, [relaxed_sys_i64_param_2]; ; SM30-NEXT: atom.cas.b64 %rd4, [%rd1], %rd2, %rd3; -; SM30-NEXT: st.param.b64 [func_retval0+0], %rd3; +; SM30-NEXT: st.param.b64 [func_retval0], %rd3; ; SM30-NEXT: ret; ; ; SM70-LABEL: relaxed_sys_i64( @@ -207,7 +207,7 @@ define i64 @relaxed_sys_i64(ptr %addr, i64 %cmp, i64 %new) { ; SM70-NEXT: ld.param.u64 %rd2, [relaxed_sys_i64_param_1]; ; SM70-NEXT: ld.param.u64 %rd3, [relaxed_sys_i64_param_2]; ; SM70-NEXT: atom.cas.b64 %rd4, [%rd1], %rd2, %rd3; -; SM70-NEXT: st.param.b64 [func_retval0+0], %rd3; +; SM70-NEXT: st.param.b64 [func_retval0], %rd3; ; SM70-NEXT: ret; %pairold = cmpxchg ptr %addr, i64 %cmp, i64 %new seq_cst seq_cst ret i64 %new diff --git a/llvm/test/CodeGen/NVPTX/combine-mad.ll b/llvm/test/CodeGen/NVPTX/combine-mad.ll index 56bfaa14c5877c..1b22cfde39725f 100644 --- a/llvm/test/CodeGen/NVPTX/combine-mad.ll +++ b/llvm/test/CodeGen/NVPTX/combine-mad.ll @@ -14,7 +14,7 @@ define i32 @test1(i32 %n, i32 %m) { ; CHECK-NEXT: ld.param.u32 %r1, [test1_param_0]; ; CHECK-NEXT: ld.param.u32 %r2, [test1_param_1]; ; CHECK-NEXT: mad.lo.s32 %r3, %r2, %r1, %r2; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NEXT: ret; %add = add i32 %n, 1 %mul = mul i32 %add, %m @@ -31,7 +31,7 @@ define i32 @test1_rev(i32 %n, i32 %m) { ; CHECK-NEXT: ld.param.u32 %r1, [test1_rev_param_0]; ; CHECK-NEXT: ld.param.u32 %r2, [test1_rev_param_1]; ; CHECK-NEXT: mad.lo.s32 %r3, %r2, %r1, %r2; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NEXT: ret; %add = add i32 %n, 1 %mul = mul i32 %m, %add @@ -53,7 +53,7 @@ define i32 @test2(i32 %n, i32 %m, i32 %s) { ; CHECK-NEXT: setp.lt.s32 %p1, %r3, 1; ; CHECK-NEXT: mad.lo.s32 %r4, %r2, %r1, %r2; ; CHECK-NEXT: selp.b32 %r5, %r2, %r4, %p1; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r5; +; CHECK-NEXT: st.param.b32 [func_retval0], %r5; ; CHECK-NEXT: ret; %add = add i32 %n, 1 %cond = icmp slt i32 %s, 1 @@ -77,7 +77,7 @@ define i32 @test2_rev1(i32 %n, i32 %m, i32 %s) { ; CHECK-NEXT: setp.lt.s32 %p1, %r3, 1; ; CHECK-NEXT: mad.lo.s32 %r4, %r2, %r1, %r2; ; CHECK-NEXT: selp.b32 %r5, %r4, %r2, %p1; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r5; +; CHECK-NEXT: st.param.b32 [func_retval0], %r5; ; CHECK-NEXT: ret; %add = add i32 %n, 1 %cond = icmp slt i32 %s, 1 @@ -101,7 +101,7 @@ define i32 @test2_rev2(i32 %n, i32 %m, i32 %s) { ; CHECK-NEXT: setp.lt.s32 %p1, %r3, 1; ; CHECK-NEXT: mad.lo.s32 %r4, %r2, %r1, %r2; ; CHECK-NEXT: selp.b32 %r5, %r4, %r2, %p1; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r5; +; CHECK-NEXT: st.param.b32 [func_retval0], %r5; ; CHECK-NEXT: ret; %add = add i32 %n, 1 %cond = icmp slt i32 %s, 1 @@ -126,7 +126,7 @@ define i32 @test3(i32 %n, i32 %m, i32 %s) { ; CHECK-NEXT: setp.lt.s32 %p1, %r4, 1; ; CHECK-NEXT: selp.b32 %r5, 1, %r2, %p1; ; CHECK-NEXT: mul.lo.s32 %r6, %r5, %r3; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r6; +; CHECK-NEXT: st.param.b32 [func_retval0], %r6; ; CHECK-NEXT: ret; %add = add i32 %n, 3 %cond = icmp slt i32 %s, 1 @@ -152,7 +152,7 @@ define i32 @test4(i32 %a, i32 %b, i32 %c, i1 %p) { ; CHECK-NEXT: ld.param.u32 %r3, [test4_param_2]; ; CHECK-NEXT: mad.lo.s32 %r4, %r1, %r2, %r3; ; CHECK-NEXT: selp.b32 %r5, %r4, %r3, %p1; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r5; +; CHECK-NEXT: st.param.b32 [func_retval0], %r5; ; CHECK-NEXT: ret; %mul = mul i32 %a, %b %sel = select i1 %p, i32 %mul, i32 0 @@ -176,7 +176,7 @@ define i32 @test4_rev(i32 %a, i32 %b, i32 %c, i1 %p) { ; CHECK-NEXT: ld.param.u32 %r3, [test4_rev_param_2]; ; CHECK-NEXT: mad.lo.s32 %r4, %r1, %r2, %r3; ; CHECK-NEXT: selp.b32 %r5, %r3, %r4, %p1; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r5; +; CHECK-NEXT: st.param.b32 [func_retval0], %r5; ; CHECK-NEXT: ret; %mul = mul i32 %a, %b %sel = select i1 %p, i32 0, i32 %mul diff --git a/llvm/test/CodeGen/NVPTX/compute-ptx-value-vts.ll b/llvm/test/CodeGen/NVPTX/compute-ptx-value-vts.ll index a88c5637f089b1..5deafb3ceed784 100644 --- a/llvm/test/CodeGen/NVPTX/compute-ptx-value-vts.ll +++ b/llvm/test/CodeGen/NVPTX/compute-ptx-value-vts.ll @@ -10,7 +10,7 @@ define <6 x half> @half6() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.b16 %rs1, 0x0000; -; CHECK-NEXT: st.param.v4.b16 [func_retval0+0], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.v4.b16 [func_retval0], {%rs1, %rs1, %rs1, %rs1}; ; CHECK-NEXT: st.param.v2.b16 [func_retval0+8], {%rs1, %rs1}; ; CHECK-NEXT: ret; ret <6 x half> zeroinitializer @@ -23,7 +23,7 @@ define <10 x half> @half10() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.b16 %rs1, 0x0000; -; CHECK-NEXT: st.param.v4.b16 [func_retval0+0], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.v4.b16 [func_retval0], {%rs1, %rs1, %rs1, %rs1}; ; CHECK-NEXT: st.param.v4.b16 [func_retval0+8], {%rs1, %rs1, %rs1, %rs1}; ; CHECK-NEXT: st.param.v2.b16 [func_retval0+16], {%rs1, %rs1}; ; CHECK-NEXT: ret; @@ -37,7 +37,7 @@ define <12 x i8> @byte12() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.u16 %rs1, 0; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+0], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.v4.b8 [func_retval0], {%rs1, %rs1, %rs1, %rs1}; ; CHECK-NEXT: st.param.v4.b8 [func_retval0+4], {%rs1, %rs1, %rs1, %rs1}; ; CHECK-NEXT: st.param.v4.b8 [func_retval0+8], {%rs1, %rs1, %rs1, %rs1}; ; CHECK-NEXT: ret; @@ -51,7 +51,7 @@ define <20 x i8> @byte20() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.u16 %rs1, 0; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+0], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.v4.b8 [func_retval0], {%rs1, %rs1, %rs1, %rs1}; ; CHECK-NEXT: st.param.v4.b8 [func_retval0+4], {%rs1, %rs1, %rs1, %rs1}; ; CHECK-NEXT: st.param.v4.b8 [func_retval0+8], {%rs1, %rs1, %rs1, %rs1}; ; CHECK-NEXT: st.param.v4.b8 [func_retval0+12], {%rs1, %rs1, %rs1, %rs1}; diff --git a/llvm/test/CodeGen/NVPTX/convert-int-sm20.ll b/llvm/test/CodeGen/NVPTX/convert-int-sm20.ll index d7e2cede8a9915..b1850185f0c763 100644 --- a/llvm/test/CodeGen/NVPTX/convert-int-sm20.ll +++ b/llvm/test/CodeGen/NVPTX/convert-int-sm20.ll @@ -11,7 +11,7 @@ define i16 @cvt_i16_i32(i32 %x) { ; CHECK: ld.param.u16 %r[[R0:[0-9]+]], [cvt_i16_i32_param_{{[0-9]+}}] -; CHECK: st.param.b32 [func_retval{{[0-9]+}}+0], %r[[R0]] +; CHECK: st.param.b32 [func_retval{{[0-9]+}}], %r[[R0]] ; CHECK: ret %a = trunc i32 %x to i16 ret i16 %a @@ -19,7 +19,7 @@ define i16 @cvt_i16_i32(i32 %x) { define i16 @cvt_i16_i64(i64 %x) { ; CHECK: ld.param.u16 %r[[R0:[0-9]+]], [cvt_i16_i64_param_{{[0-9]+}}] -; CHECK: st.param.b32 [func_retval{{[0-9]+}}+0], %r[[R0]] +; CHECK: st.param.b32 [func_retval{{[0-9]+}}], %r[[R0]] ; CHECK: ret %a = trunc i64 %x to i16 ret i16 %a @@ -31,7 +31,7 @@ define i16 @cvt_i16_i64(i64 %x) { define i32 @cvt_i32_i16(i16 %x) { ; CHECK: ld.param.u16 %r[[R0:[0-9]+]], [cvt_i32_i16_param_{{[0-9]+}}] -; CHECK: st.param.b32 [func_retval{{[0-9]+}}+0], %r[[R0]] +; CHECK: st.param.b32 [func_retval{{[0-9]+}}], %r[[R0]] ; CHECK: ret %a = zext i16 %x to i32 ret i32 %a @@ -39,7 +39,7 @@ define i32 @cvt_i32_i16(i16 %x) { define i32 @cvt_i32_i64(i64 %x) { ; CHECK: ld.param.u32 %r[[R0:[0-9]+]], [cvt_i32_i64_param_{{[0-9]+}}] -; CHECK: st.param.b32 [func_retval{{[0-9]+}}+0], %r[[R0]] +; CHECK: st.param.b32 [func_retval{{[0-9]+}}], %r[[R0]] ; CHECK: ret %a = trunc i64 %x to i32 ret i32 %a @@ -51,7 +51,7 @@ define i32 @cvt_i32_i64(i64 %x) { define i64 @cvt_i64_i16(i16 %x) { ; CHECK: ld.param.u16 %rd[[R0:[0-9]+]], [cvt_i64_i16_param_{{[0-9]+}}] -; CHECK: st.param.b64 [func_retval{{[0-9]+}}+0], %rd[[R0]] +; CHECK: st.param.b64 [func_retval{{[0-9]+}}], %rd[[R0]] ; CHECK: ret %a = zext i16 %x to i64 ret i64 %a @@ -59,7 +59,7 @@ define i64 @cvt_i64_i16(i16 %x) { define i64 @cvt_i64_i32(i32 %x) { ; CHECK: ld.param.u32 %rd[[R0:[0-9]+]], [cvt_i64_i32_param_{{[0-9]+}}] -; CHECK: st.param.b64 [func_retval{{[0-9]+}}+0], %rd[[R0]] +; CHECK: st.param.b64 [func_retval{{[0-9]+}}], %rd[[R0]] ; CHECK: ret %a = zext i32 %x to i64 ret i64 %a diff --git a/llvm/test/CodeGen/NVPTX/copysign.ll b/llvm/test/CodeGen/NVPTX/copysign.ll index a6aad1c2f012b3..ba7db68b3977d0 100644 --- a/llvm/test/CodeGen/NVPTX/copysign.ll +++ b/llvm/test/CodeGen/NVPTX/copysign.ll @@ -14,7 +14,7 @@ define float @fcopysign_f_f(float %a, float %b) { ; CHECK-NEXT: ld.param.f32 %f1, [fcopysign_f_f_param_0]; ; CHECK-NEXT: ld.param.f32 %f2, [fcopysign_f_f_param_1]; ; CHECK-NEXT: copysign.f32 %f3, %f2, %f1; -; CHECK-NEXT: st.param.f32 [func_retval0+0], %f3; +; CHECK-NEXT: st.param.f32 [func_retval0], %f3; ; CHECK-NEXT: ret; %val = call float @llvm.copysign.f32(float %a, float %b) ret float %val @@ -29,7 +29,7 @@ define double @fcopysign_d_d(double %a, double %b) { ; CHECK-NEXT: ld.param.f64 %fd1, [fcopysign_d_d_param_0]; ; CHECK-NEXT: ld.param.f64 %fd2, [fcopysign_d_d_param_1]; ; CHECK-NEXT: copysign.f64 %fd3, %fd2, %fd1; -; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd3; +; CHECK-NEXT: st.param.f64 [func_retval0], %fd3; ; CHECK-NEXT: ret; %val = call double @llvm.copysign.f64(double %a, double %b) ret double %val @@ -51,7 +51,7 @@ define float @fcopysign_f_d(float %a, double %b) { ; CHECK-NEXT: and.b64 %rd3, %rd2, 1; ; CHECK-NEXT: setp.eq.b64 %p1, %rd3, 1; ; CHECK-NEXT: selp.f32 %f4, %f3, %f2, %p1; -; CHECK-NEXT: st.param.f32 [func_retval0+0], %f4; +; CHECK-NEXT: st.param.f32 [func_retval0], %f4; ; CHECK-NEXT: ret; %c = fptrunc double %b to float %val = call float @llvm.copysign.f32(float %a, float %c) @@ -74,7 +74,7 @@ define float @fcopysign_f_h(float %a, half %b) { ; CHECK-NEXT: and.b16 %rs3, %rs2, 1; ; CHECK-NEXT: setp.eq.b16 %p1, %rs3, 1; ; CHECK-NEXT: selp.f32 %f4, %f3, %f2, %p1; -; CHECK-NEXT: st.param.f32 [func_retval0+0], %f4; +; CHECK-NEXT: st.param.f32 [func_retval0], %f4; ; CHECK-NEXT: ret; %c = fpext half %b to float %val = call float @llvm.copysign.f32(float %a, float %c) @@ -97,7 +97,7 @@ define double @fcopysign_d_f(double %a, float %b) { ; CHECK-NEXT: and.b32 %r3, %r2, 1; ; CHECK-NEXT: setp.eq.b32 %p1, %r3, 1; ; CHECK-NEXT: selp.f64 %fd4, %fd3, %fd2, %p1; -; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd4; +; CHECK-NEXT: st.param.f64 [func_retval0], %fd4; ; CHECK-NEXT: ret; %c = fpext float %b to double %val = call double @llvm.copysign.f64(double %a, double %c) @@ -120,7 +120,7 @@ define double @fcopysign_d_h(double %a, half %b) { ; CHECK-NEXT: and.b16 %rs3, %rs2, 1; ; CHECK-NEXT: setp.eq.b16 %p1, %rs3, 1; ; CHECK-NEXT: selp.f64 %fd4, %fd3, %fd2, %p1; -; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd4; +; CHECK-NEXT: st.param.f64 [func_retval0], %fd4; ; CHECK-NEXT: ret; %c = fpext half %b to double %val = call double @llvm.copysign.f64(double %a, double %c) diff --git a/llvm/test/CodeGen/NVPTX/dot-product.ll b/llvm/test/CodeGen/NVPTX/dot-product.ll index 36529bbef90332..8d3d7238d36fd5 100644 --- a/llvm/test/CodeGen/NVPTX/dot-product.ll +++ b/llvm/test/CodeGen/NVPTX/dot-product.ll @@ -19,7 +19,7 @@ define i32 @test_dp4a_u32_u32(i32 %a, i32 %b, i32 %c) { ; CHECK-NEXT: ld.param.u32 %r2, [test_dp4a_u32_u32_param_1]; ; CHECK-NEXT: ld.param.u32 %r3, [test_dp4a_u32_u32_param_2]; ; CHECK-NEXT: dp4a.u32.u32 %r4, %r1, %r2, %r3; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r4; +; CHECK-NEXT: st.param.b32 [func_retval0], %r4; ; CHECK-NEXT: ret; %call = call i32 @llvm.nvvm.idp4a.u.u(i32 %a, i32 %b, i32 %c) ret i32 %call @@ -34,7 +34,7 @@ define i32 @test_dp4a_u32imm_u32imm(i32 %c) { ; CHECK-NEXT: ld.param.u32 %r1, [test_dp4a_u32imm_u32imm_param_0]; ; CHECK-NEXT: mov.b32 %r2, 0; ; CHECK-NEXT: dp4a.u32.u32 %r3, %r2, %r2, %r1; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NEXT: ret; %call = call i32 @llvm.nvvm.idp4a.u.u(i32 0, i32 0, i32 %c) ret i32 %call @@ -50,7 +50,7 @@ define i32 @test_dp4a_u32_s32(i32 %a, i32 %b, i32 %c) { ; CHECK-NEXT: ld.param.u32 %r2, [test_dp4a_u32_s32_param_1]; ; CHECK-NEXT: ld.param.u32 %r3, [test_dp4a_u32_s32_param_2]; ; CHECK-NEXT: dp4a.u32.s32 %r4, %r1, %r2, %r3; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r4; +; CHECK-NEXT: st.param.b32 [func_retval0], %r4; ; CHECK-NEXT: ret; %call = call i32 @llvm.nvvm.idp4a.u.s(i32 %a, i32 %b, i32 %c) ret i32 %call @@ -66,7 +66,7 @@ define i32 @test_dp4a_s32_u32(i32 %a, i32 %b, i32 %c) { ; CHECK-NEXT: ld.param.u32 %r2, [test_dp4a_s32_u32_param_1]; ; CHECK-NEXT: ld.param.u32 %r3, [test_dp4a_s32_u32_param_2]; ; CHECK-NEXT: dp4a.s32.u32 %r4, %r1, %r2, %r3; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r4; +; CHECK-NEXT: st.param.b32 [func_retval0], %r4; ; CHECK-NEXT: ret; %call = call i32 @llvm.nvvm.idp4a.s.u(i32 %a, i32 %b, i32 %c) ret i32 %call @@ -82,7 +82,7 @@ define i32 @test_dp4a_s32_s32(i32 %a, i32 %b, i32 %c) { ; CHECK-NEXT: ld.param.u32 %r2, [test_dp4a_s32_s32_param_1]; ; CHECK-NEXT: ld.param.u32 %r3, [test_dp4a_s32_s32_param_2]; ; CHECK-NEXT: dp4a.s32.s32 %r4, %r1, %r2, %r3; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r4; +; CHECK-NEXT: st.param.b32 [func_retval0], %r4; ; CHECK-NEXT: ret; %call = call i32 @llvm.nvvm.idp4a.s.s(i32 %a, i32 %b, i32 %c) ret i32 %call @@ -103,7 +103,7 @@ define i32 @test_dp2a_lo_u32_u32(i32 %a, i32 %b, i32 %c) { ; CHECK-NEXT: ld.param.u32 %r2, [test_dp2a_lo_u32_u32_param_1]; ; CHECK-NEXT: ld.param.u32 %r3, [test_dp2a_lo_u32_u32_param_2]; ; CHECK-NEXT: dp2a.lo.u32.u32 %r4, %r1, %r2, %r3; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r4; +; CHECK-NEXT: st.param.b32 [func_retval0], %r4; ; CHECK-NEXT: ret; %call = call i32 @llvm.nvvm.idp2a.u.u(i32 %a, i32 %b, i1 0, i32 %c) ret i32 %call @@ -119,7 +119,7 @@ define i32 @test_dp2a_lo_u32_s32(i32 %a, i32 %b, i32 %c) { ; CHECK-NEXT: ld.param.u32 %r2, [test_dp2a_lo_u32_s32_param_1]; ; CHECK-NEXT: ld.param.u32 %r3, [test_dp2a_lo_u32_s32_param_2]; ; CHECK-NEXT: dp2a.lo.u32.s32 %r4, %r1, %r2, %r3; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r4; +; CHECK-NEXT: st.param.b32 [func_retval0], %r4; ; CHECK-NEXT: ret; %call = call i32 @llvm.nvvm.idp2a.u.s(i32 %a, i32 %b, i1 0, i32 %c) ret i32 %call @@ -135,7 +135,7 @@ define i32 @test_dp2a_lo_s32_u32(i32 %a, i32 %b, i32 %c) { ; CHECK-NEXT: ld.param.u32 %r2, [test_dp2a_lo_s32_u32_param_1]; ; CHECK-NEXT: ld.param.u32 %r3, [test_dp2a_lo_s32_u32_param_2]; ; CHECK-NEXT: dp2a.lo.s32.u32 %r4, %r1, %r2, %r3; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r4; +; CHECK-NEXT: st.param.b32 [func_retval0], %r4; ; CHECK-NEXT: ret; %call = call i32 @llvm.nvvm.idp2a.s.u(i32 %a, i32 %b, i1 0, i32 %c) ret i32 %call @@ -151,7 +151,7 @@ define i32 @test_dp2a_lo_s32_s32(i32 %a, i32 %b, i32 %c) { ; CHECK-NEXT: ld.param.u32 %r2, [test_dp2a_lo_s32_s32_param_1]; ; CHECK-NEXT: ld.param.u32 %r3, [test_dp2a_lo_s32_s32_param_2]; ; CHECK-NEXT: dp2a.lo.s32.s32 %r4, %r1, %r2, %r3; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r4; +; CHECK-NEXT: st.param.b32 [func_retval0], %r4; ; CHECK-NEXT: ret; %call = call i32 @llvm.nvvm.idp2a.s.s(i32 %a, i32 %b, i1 0, i32 %c) ret i32 %call @@ -167,7 +167,7 @@ define i32 @test_dp2a_hi_u32_u32(i32 %a, i32 %b, i32 %c) { ; CHECK-NEXT: ld.param.u32 %r2, [test_dp2a_hi_u32_u32_param_1]; ; CHECK-NEXT: ld.param.u32 %r3, [test_dp2a_hi_u32_u32_param_2]; ; CHECK-NEXT: dp2a.hi.u32.u32 %r4, %r1, %r2, %r3; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r4; +; CHECK-NEXT: st.param.b32 [func_retval0], %r4; ; CHECK-NEXT: ret; %call = call i32 @llvm.nvvm.idp2a.u.u(i32 %a, i32 %b, i1 1, i32 %c) ret i32 %call @@ -183,7 +183,7 @@ define i32 @test_dp2a_hi_u32_s32(i32 %a, i32 %b, i32 %c) { ; CHECK-NEXT: ld.param.u32 %r2, [test_dp2a_hi_u32_s32_param_1]; ; CHECK-NEXT: ld.param.u32 %r3, [test_dp2a_hi_u32_s32_param_2]; ; CHECK-NEXT: dp2a.hi.u32.s32 %r4, %r1, %r2, %r3; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r4; +; CHECK-NEXT: st.param.b32 [func_retval0], %r4; ; CHECK-NEXT: ret; %call = call i32 @llvm.nvvm.idp2a.u.s(i32 %a, i32 %b, i1 1, i32 %c) ret i32 %call @@ -199,7 +199,7 @@ define i32 @test_dp2a_hi_s32_u32(i32 %a, i32 %b, i32 %c) { ; CHECK-NEXT: ld.param.u32 %r2, [test_dp2a_hi_s32_u32_param_1]; ; CHECK-NEXT: ld.param.u32 %r3, [test_dp2a_hi_s32_u32_param_2]; ; CHECK-NEXT: dp2a.hi.s32.u32 %r4, %r1, %r2, %r3; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r4; +; CHECK-NEXT: st.param.b32 [func_retval0], %r4; ; CHECK-NEXT: ret; %call = call i32 @llvm.nvvm.idp2a.s.u(i32 %a, i32 %b, i1 1, i32 %c) ret i32 %call @@ -215,7 +215,7 @@ define i32 @test_dp2a_hi_s32_s32(i32 %a, i32 %b, i32 %c) { ; CHECK-NEXT: ld.param.u32 %r2, [test_dp2a_hi_s32_s32_param_1]; ; CHECK-NEXT: ld.param.u32 %r3, [test_dp2a_hi_s32_s32_param_2]; ; CHECK-NEXT: dp2a.hi.s32.s32 %r4, %r1, %r2, %r3; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r4; +; CHECK-NEXT: st.param.b32 [func_retval0], %r4; ; CHECK-NEXT: ret; %call = call i32 @llvm.nvvm.idp2a.s.s(i32 %a, i32 %b, i1 1, i32 %c) ret i32 %call diff --git a/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll b/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll index ce81957f2a3934..44f39df0249008 100644 --- a/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll +++ b/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll @@ -18,7 +18,7 @@ ; CHECK-32-NEXT: cvta.local.u32 %r[[ALLOCA]], %r[[ALLOCA]]; ; CHECK-32-NEXT: { // callseq 0, 0 ; CHECK-32-NEXT: .param .b32 param0; -; CHECK-32-NEXT: st.param.b32 [param0+0], %r[[ALLOCA]]; +; CHECK-32-NEXT: st.param.b32 [param0], %r[[ALLOCA]]; ; CHECK-64: ld.param.u64 %rd[[SIZE:[0-9]]], [test_dynamic_stackalloc_param_0]; ; CHECK-64-NEXT: add.s64 %rd[[SIZE2:[0-9]]], %rd[[SIZE]], 7; @@ -27,7 +27,7 @@ ; CHECK-64-NEXT: cvta.local.u64 %rd[[ALLOCA]], %rd[[ALLOCA]]; ; CHECK-64-NEXT: { // callseq 0, 0 ; CHECK-64-NEXT: .param .b64 param0; -; CHECK-64-NEXT: st.param.b64 [param0+0], %rd[[ALLOCA]]; +; CHECK-64-NEXT: st.param.b64 [param0], %rd[[ALLOCA]]; ; CHECK-NEXT: .param .b32 retval0; ; CHECK-NEXT: call.uni (retval0), diff --git a/llvm/test/CodeGen/NVPTX/elect.ll b/llvm/test/CodeGen/NVPTX/elect.ll index 358dfef9185238..71e1111562f26f 100644 --- a/llvm/test/CodeGen/NVPTX/elect.ll +++ b/llvm/test/CodeGen/NVPTX/elect.ll @@ -16,7 +16,7 @@ define {i32, i1} @elect_sync(i32 %mask) { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.u32 %r1, [elect_sync_param_0]; ; CHECK-NEXT: elect.sync %r2|%p1, %r1; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: selp.u16 %rs1, -1, 0, %p1; ; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1; ; CHECK-NEXT: ret; @@ -33,7 +33,7 @@ define {i32, i1} @elect_sync_imm() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: elect.sync %r1|%p1, -1; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; ; CHECK-NEXT: selp.u16 %rs1, -1, 0, %p1; ; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1; ; CHECK-NEXT: ret; @@ -54,7 +54,7 @@ define {i32, i1} @elect_sync_twice(i32 %mask) { ; CHECK-NEXT: ld.param.u32 %r1, [elect_sync_twice_param_0]; ; CHECK-NEXT: elect.sync %r2|%p1, %r1; ; CHECK-NEXT: elect.sync %r3|%p2, %r1; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: selp.u16 %rs1, -1, 0, %p1; ; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1; ; CHECK-NEXT: ret; diff --git a/llvm/test/CodeGen/NVPTX/extractelement.ll b/llvm/test/CodeGen/NVPTX/extractelement.ll index 367c20749a9f36..9b2d514f2a1cb1 100644 --- a/llvm/test/CodeGen/NVPTX/extractelement.ll +++ b/llvm/test/CodeGen/NVPTX/extractelement.ll @@ -16,7 +16,7 @@ define i16 @test_v2i8(i16 %a) { ; CHECK-NEXT: shr.s16 %rs3, %rs1, 8; ; CHECK-NEXT: add.s16 %rs4, %rs2, %rs3; ; CHECK-NEXT: cvt.u32.u16 %r1, %rs4; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; ; CHECK-NEXT: ret; %v = bitcast i16 %a to <2 x i8> %r0 = extractelement <2 x i8> %v, i64 0 @@ -42,7 +42,7 @@ define i1 @test_v2i8_load(ptr %a) { ; CHECK-NEXT: and.b16 %rs6, %rs5, 255; ; CHECK-NEXT: setp.eq.s16 %p1, %rs6, 0; ; CHECK-NEXT: selp.u32 %r1, 1, 0, %p1; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; ; CHECK-NEXT: ret; %v = load <2 x i8>, ptr %a, align 4 %r0 = extractelement <2 x i8> %v, i64 0 @@ -72,7 +72,7 @@ define i16 @test_v4i8(i32 %a) { ; CHECK-NEXT: add.s16 %rs6, %rs3, %rs4; ; CHECK-NEXT: add.s16 %rs7, %rs5, %rs6; ; CHECK-NEXT: cvt.u32.u16 %r6, %rs7; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r6; +; CHECK-NEXT: st.param.b32 [func_retval0], %r6; ; CHECK-NEXT: ret; %v = bitcast i32 %a to <4 x i8> %r0 = extractelement <4 x i8> %v, i64 0 @@ -103,7 +103,7 @@ define i32 @test_v4i8_s32(i32 %a) { ; CHECK-NEXT: add.s32 %r6, %r2, %r3; ; CHECK-NEXT: add.s32 %r7, %r4, %r5; ; CHECK-NEXT: add.s32 %r8, %r6, %r7; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r8; +; CHECK-NEXT: st.param.b32 [func_retval0], %r8; ; CHECK-NEXT: ret; %v = bitcast i32 %a to <4 x i8> %r0 = extractelement <4 x i8> %v, i64 0 @@ -134,7 +134,7 @@ define i32 @test_v4i8_u32(i32 %a) { ; CHECK-NEXT: add.s32 %r6, %r2, %r3; ; CHECK-NEXT: add.s32 %r7, %r4, %r5; ; CHECK-NEXT: add.s32 %r8, %r6, %r7; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r8; +; CHECK-NEXT: st.param.b32 [func_retval0], %r8; ; CHECK-NEXT: ret; %v = bitcast i32 %a to <4 x i8> %r0 = extractelement <4 x i8> %v, i64 0 @@ -188,7 +188,7 @@ define i16 @test_v8i8(i64 %a) { ; CHECK-NEXT: add.s16 %rs14, %rs11, %rs12; ; CHECK-NEXT: add.s16 %rs15, %rs13, %rs14; ; CHECK-NEXT: cvt.u32.u16 %r13, %rs15; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r13; +; CHECK-NEXT: st.param.b32 [func_retval0], %r13; ; CHECK-NEXT: ret; %v = bitcast i64 %a to <8 x i8> %r0 = extractelement <8 x i8> %v, i64 0 diff --git a/llvm/test/CodeGen/NVPTX/f16-instructions.ll b/llvm/test/CodeGen/NVPTX/f16-instructions.ll index 14e02a49f6e5e4..f78cfc31726217 100644 --- a/llvm/test/CodeGen/NVPTX/f16-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/f16-instructions.ll @@ -44,7 +44,7 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" ; CHECK-LABEL: test_ret_const( ; CHECK: mov.b16 [[R:%rs[0-9]+]], 0x3C00; -; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define half @test_ret_const() #0 { ret half 1.0 @@ -59,7 +59,7 @@ define half @test_ret_const() #0 { ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] ; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]]; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] -; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define half @test_fadd(half %a, half %b) #0 { %r = fadd half %a, %b @@ -75,7 +75,7 @@ define half @test_fadd(half %a, half %b) #0 { ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] ; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]]; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] -; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define <1 x half> @test_fadd_v1f16(<1 x half> %a, <1 x half> %b) #0 { %r = fadd <1 x half> %a, %b @@ -92,7 +92,7 @@ define <1 x half> @test_fadd_v1f16(<1 x half> %a, <1 x half> %b) #0 { ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] ; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], 0f3F800000; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] -; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define half @test_fadd_imm_0(half %b) #0 { %r = fadd half 1.0, %b @@ -108,7 +108,7 @@ define half @test_fadd_imm_0(half %b) #0 { ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] ; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], 0f3F800000; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] -; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define half @test_fadd_imm_1(half %a) #0 { %r = fadd half %a, 1.0 @@ -124,7 +124,7 @@ define half @test_fadd_imm_1(half %a) #0 { ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] ; CHECK-NOF16-NEXT: sub.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]]; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] -; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define half @test_fsub(half %a, half %b) #0 { %r = fsub half %a, %b @@ -141,7 +141,7 @@ define half @test_fsub(half %a, half %b) #0 { ; CHECK-NOF16-DAG: mov.f32 [[Z:%f[0-9]+]], 0f00000000; ; CHECK-NOF16-NEXT: sub.rn.f32 [[R32:%f[0-9]+]], [[Z]], [[A32]]; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] -; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define half @test_old_fneg(half %a) #0 { %r = fsub half 0.0, %a @@ -153,7 +153,7 @@ define half @test_old_fneg(half %a) #0 { ; CHECK-F16-NOFTZ-NEXT: neg.f16 [[R:%rs[0-9]+]], [[A]]; ; CHECK-F16-FTZ-NEXT: neg.ftz.f16 [[R:%rs[0-9]+]], [[A]]; ; CHECK-NOF16-NEXT: xor.b16 [[R:%rs[0-9]+]], [[A]], -32768; -; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define half @test_fneg(half %a) #0 { %r = fneg half %a @@ -169,7 +169,7 @@ define half @test_fneg(half %a) #0 { ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] ; CHECK-NOF16-NEXT: mul.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]]; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] -; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define half @test_fmul(half %a, half %b) #0 { %r = fmul half %a, %b @@ -186,7 +186,7 @@ define half @test_fmul(half %a, half %b) #0 { ; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[F1:%f[0-9]+]], [[B]]; ; CHECK-F16-FTZ-NEXT: div.rn.ftz.f32 [[FR:%f[0-9]+]], [[F0]], [[F1]]; ; CHECK-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[FR]]; -; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define half @test_fdiv(half %a, half %b) #0 { %r = fdiv half %a, %b @@ -211,7 +211,7 @@ define half @test_fdiv(half %a, half %b) #0 { ; CHECK-NEXT: testp.infinite.f32 [[ISBINF:%p[0-9]+]], [[FB]]; ; CHECK-NEXT: selp.f32 [[RESULT:%f[0-9]+]], [[FA]], [[RF]], [[ISBINF]]; ; CHECK-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RESULT]]; -; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define half @test_frem(half %a, half %b) #0 { %r = frem half %a, %b @@ -231,7 +231,7 @@ define void @test_store(half %a, ptr %b) #0 { ; CHECK-LABEL: test_load( ; CHECK: ld.param.u64 %[[PTR:rd[0-9]+]], [test_load_param_0]; ; CHECK-NEXT: ld.b16 [[R:%rs[0-9]+]], [%[[PTR]]]; -; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define half @test_load(ptr %a) #0 { %r = load half, ptr %a @@ -260,8 +260,8 @@ declare half @test_callee(half %a, half %b) #0 ; CHECK: { ; CHECK-DAG: .param .align 2 .b8 param0[2]; ; CHECK-DAG: .param .align 2 .b8 param1[2]; -; CHECK-DAG: st.param.b16 [param0+0], [[A]]; -; CHECK-DAG: st.param.b16 [param1+0], [[B]]; +; CHECK-DAG: st.param.b16 [param0], [[A]]; +; CHECK-DAG: st.param.b16 [param1], [[B]]; ; CHECK-DAG: .param .align 2 .b8 retval0[2]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_callee, @@ -269,9 +269,9 @@ declare half @test_callee(half %a, half %b) #0 ; CHECK-NEXT: param0, ; CHECK-NEXT: param1 ; CHECK-NEXT: ); -; CHECK-NEXT: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0]; +; CHECK-NEXT: ld.param.b16 [[R:%rs[0-9]+]], [retval0]; ; CHECK-NEXT: } -; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define half @test_call(half %a, half %b) #0 { %r = call half @test_callee(half %a, half %b) @@ -284,8 +284,8 @@ define half @test_call(half %a, half %b) #0 { ; CHECK: { ; CHECK-DAG: .param .align 2 .b8 param0[2]; ; CHECK-DAG: .param .align 2 .b8 param1[2]; -; CHECK-DAG: st.param.b16 [param0+0], [[B]]; -; CHECK-DAG: st.param.b16 [param1+0], [[A]]; +; CHECK-DAG: st.param.b16 [param0], [[B]]; +; CHECK-DAG: st.param.b16 [param1], [[A]]; ; CHECK-DAG: .param .align 2 .b8 retval0[2]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_callee, @@ -293,9 +293,9 @@ define half @test_call(half %a, half %b) #0 { ; CHECK-NEXT: param0, ; CHECK-NEXT: param1 ; CHECK-NEXT: ); -; CHECK-NEXT: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0]; +; CHECK-NEXT: ld.param.b16 [[R:%rs[0-9]+]], [retval0]; ; CHECK-NEXT: } -; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define half @test_call_flipped(half %a, half %b) #0 { %r = call half @test_callee(half %b, half %a) @@ -308,8 +308,8 @@ define half @test_call_flipped(half %a, half %b) #0 { ; CHECK: { ; CHECK-DAG: .param .align 2 .b8 param0[2]; ; CHECK-DAG: .param .align 2 .b8 param1[2]; -; CHECK-DAG: st.param.b16 [param0+0], [[B]]; -; CHECK-DAG: st.param.b16 [param1+0], [[A]]; +; CHECK-DAG: st.param.b16 [param0], [[B]]; +; CHECK-DAG: st.param.b16 [param1], [[A]]; ; CHECK-DAG: .param .align 2 .b8 retval0[2]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_callee, @@ -317,9 +317,9 @@ define half @test_call_flipped(half %a, half %b) #0 { ; CHECK-NEXT: param0, ; CHECK-NEXT: param1 ; CHECK-NEXT: ); -; CHECK-NEXT: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0]; +; CHECK-NEXT: ld.param.b16 [[R:%rs[0-9]+]], [retval0]; ; CHECK-NEXT: } -; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define half @test_tailcall_flipped(half %a, half %b) #0 { %r = tail call half @test_callee(half %b, half %a) @@ -331,7 +331,7 @@ define half @test_tailcall_flipped(half %a, half %b) #0 { ; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_select_param_1]; ; CHECK-DAG: setp.eq.b16 [[PRED:%p[0-9]+]], %rs{{.*}}, 1; ; CHECK-NEXT: selp.b16 [[R:%rs[0-9]+]], [[A]], [[B]], [[PRED]]; -; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define half @test_select(half %a, half %b, i1 zeroext %c) #0 { %r = select i1 %c, half %a, half %b @@ -348,7 +348,7 @@ define half @test_select(half %a, half %b, i1 zeroext %c) #0 { ; CHECK-NOF16-DAG: cvt.f32.f16 [[CF:%f[0-9]+]], [[C]]; ; CHECK-NOF16: setp.neu.f32 [[PRED:%p[0-9]+]], [[CF]], [[DF]] ; CHECK: selp.b16 [[R:%rs[0-9]+]], [[A]], [[B]], [[PRED]]; -; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define half @test_select_cc(half %a, half %b, half %c, half %d) #0 { %cc = fcmp une half %c, %d @@ -367,7 +367,7 @@ define half @test_select_cc(half %a, half %b, half %c, half %d) #0 { ; CHECK-NOF16-DAG: cvt.f32.f16 [[CF:%f[0-9]+]], [[C]]; ; CHECK-NOF16: setp.neu.f32 [[PRED:%p[0-9]+]], [[CF]], [[DF]] ; CHECK-NEXT: selp.f32 [[R:%f[0-9]+]], [[A]], [[B]], [[PRED]]; -; CHECK-NEXT: st.param.f32 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.f32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define float @test_select_cc_f32_f16(float %a, float %b, half %c, half %d) #0 { %cc = fcmp une half %c, %d @@ -383,7 +383,7 @@ define float @test_select_cc_f32_f16(float %a, float %b, half %c, half %d) #0 { ; CHECK-F16-FTZ-DAG: setp.neu.ftz.f32 [[PRED:%p[0-9]+]], [[C]], [[D]] ; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_select_cc_f16_f32_param_1]; ; CHECK-NEXT: selp.b16 [[R:%rs[0-9]+]], [[A]], [[B]], [[PRED]]; -; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define half @test_select_cc_f16_f32(half %a, half %b, float %c, float %d) #0 { %cc = fcmp une float %c, %d @@ -400,7 +400,7 @@ define half @test_select_cc_f16_f32(half %a, half %b, float %c, float %d) #0 { ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; ; CHECK-NOF16: setp.neu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; -; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define i1 @test_fcmp_une(half %a, half %b) #0 { %r = fcmp une half %a, %b @@ -416,7 +416,7 @@ define i1 @test_fcmp_une(half %a, half %b) #0 { ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; ; CHECK-NOF16: setp.equ.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; -; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define i1 @test_fcmp_ueq(half %a, half %b) #0 { %r = fcmp ueq half %a, %b @@ -432,7 +432,7 @@ define i1 @test_fcmp_ueq(half %a, half %b) #0 { ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; ; CHECK-NOF16: setp.gtu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; -; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define i1 @test_fcmp_ugt(half %a, half %b) #0 { %r = fcmp ugt half %a, %b @@ -448,7 +448,7 @@ define i1 @test_fcmp_ugt(half %a, half %b) #0 { ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; ; CHECK-NOF16: setp.geu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; -; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define i1 @test_fcmp_uge(half %a, half %b) #0 { %r = fcmp uge half %a, %b @@ -464,7 +464,7 @@ define i1 @test_fcmp_uge(half %a, half %b) #0 { ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; ; CHECK-NOF16: setp.ltu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; -; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define i1 @test_fcmp_ult(half %a, half %b) #0 { %r = fcmp ult half %a, %b @@ -480,7 +480,7 @@ define i1 @test_fcmp_ult(half %a, half %b) #0 { ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; ; CHECK-NOF16: setp.leu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; -; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define i1 @test_fcmp_ule(half %a, half %b) #0 { %r = fcmp ule half %a, %b @@ -497,7 +497,7 @@ define i1 @test_fcmp_ule(half %a, half %b) #0 { ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; ; CHECK-NOF16: setp.nan.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; -; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define i1 @test_fcmp_uno(half %a, half %b) #0 { %r = fcmp uno half %a, %b @@ -513,7 +513,7 @@ define i1 @test_fcmp_uno(half %a, half %b) #0 { ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; ; CHECK-NOF16: setp.ne.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; -; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define i1 @test_fcmp_one(half %a, half %b) #0 { %r = fcmp one half %a, %b @@ -529,7 +529,7 @@ define i1 @test_fcmp_one(half %a, half %b) #0 { ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; ; CHECK-NOF16: setp.eq.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; -; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define i1 @test_fcmp_oeq(half %a, half %b) #0 { %r = fcmp oeq half %a, %b @@ -545,7 +545,7 @@ define i1 @test_fcmp_oeq(half %a, half %b) #0 { ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; ; CHECK-NOF16: setp.gt.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; -; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define i1 @test_fcmp_ogt(half %a, half %b) #0 { %r = fcmp ogt half %a, %b @@ -561,7 +561,7 @@ define i1 @test_fcmp_ogt(half %a, half %b) #0 { ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; ; CHECK-NOF16: setp.ge.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; -; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define i1 @test_fcmp_oge(half %a, half %b) #0 { %r = fcmp oge half %a, %b @@ -577,7 +577,7 @@ define i1 @test_fcmp_oge(half %a, half %b) #0 { ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; ; CHECK-NOF16: setp.lt.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; -; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define i1 @test_fcmp_olt(half %a, half %b) #0 { %r = fcmp olt half %a, %b @@ -593,7 +593,7 @@ define i1 @test_fcmp_olt(half %a, half %b) #0 { ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; ; CHECK-NOF16: setp.le.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; -; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define i1 @test_fcmp_ole(half %a, half %b) #0 { %r = fcmp ole half %a, %b @@ -609,7 +609,7 @@ define i1 @test_fcmp_ole(half %a, half %b) #0 { ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; ; CHECK-NOF16: setp.num.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] ; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; -; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define i1 @test_fcmp_ord(half %a, half %b) #0 { %r = fcmp ord half %a, %b @@ -649,13 +649,13 @@ else: ; CHECK: mov.u16 [[R:%rs[0-9]+]], [[AB:%rs[0-9]+]]; ; CHECK: ld.b16 [[AB:%rs[0-9]+]], [%[[P1]]]; ; CHECK: { -; CHECK: st.param.b64 [param0+0], %[[P1]]; +; CHECK: st.param.b64 [param0], %[[P1]]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_dummy ; CHECK: } ; CHECK: setp.eq.b32 [[PRED:%p[0-9]+]], %r{{[0-9]+}}, 1; ; CHECK: @[[PRED]] bra [[LOOP]]; -; CHECK: st.param.b16 [func_retval0+0], [[R]]; +; CHECK: st.param.b16 [func_retval0], [[R]]; ; CHECK: ret; define half @test_phi(ptr %p1) #0 { entry: @@ -674,7 +674,7 @@ declare i1 @test_dummy(ptr %p1) #0 ; CHECK-LABEL: test_fptosi_i32( ; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fptosi_i32_param_0]; ; CHECK: cvt.rzi.s32.f16 [[R:%r[0-9]+]], [[A]]; -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define i32 @test_fptosi_i32(half %a) #0 { %r = fptosi half %a to i32 @@ -684,7 +684,7 @@ define i32 @test_fptosi_i32(half %a) #0 { ; CHECK-LABEL: test_fptosi_i64( ; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fptosi_i64_param_0]; ; CHECK: cvt.rzi.s64.f16 [[R:%rd[0-9]+]], [[A]]; -; CHECK: st.param.b64 [func_retval0+0], [[R]]; +; CHECK: st.param.b64 [func_retval0], [[R]]; ; CHECK: ret; define i64 @test_fptosi_i64(half %a) #0 { %r = fptosi half %a to i64 @@ -694,7 +694,7 @@ define i64 @test_fptosi_i64(half %a) #0 { ; CHECK-LABEL: test_fptoui_i32( ; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fptoui_i32_param_0]; ; CHECK: cvt.rzi.u32.f16 [[R:%r[0-9]+]], [[A]]; -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define i32 @test_fptoui_i32(half %a) #0 { %r = fptoui half %a to i32 @@ -704,7 +704,7 @@ define i32 @test_fptoui_i32(half %a) #0 { ; CHECK-LABEL: test_fptoui_i64( ; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fptoui_i64_param_0]; ; CHECK: cvt.rzi.u64.f16 [[R:%rd[0-9]+]], [[A]]; -; CHECK: st.param.b64 [func_retval0+0], [[R]]; +; CHECK: st.param.b64 [func_retval0], [[R]]; ; CHECK: ret; define i64 @test_fptoui_i64(half %a) #0 { %r = fptoui half %a to i64 @@ -714,7 +714,7 @@ define i64 @test_fptoui_i64(half %a) #0 { ; CHECK-LABEL: test_uitofp_i32( ; CHECK: ld.param.u32 [[A:%r[0-9]+]], [test_uitofp_i32_param_0]; ; CHECK: cvt.rn.f16.u32 [[R:%rs[0-9]+]], [[A]]; -; CHECK: st.param.b16 [func_retval0+0], [[R]]; +; CHECK: st.param.b16 [func_retval0], [[R]]; ; CHECK: ret; define half @test_uitofp_i32(i32 %a) #0 { %r = uitofp i32 %a to half @@ -724,7 +724,7 @@ define half @test_uitofp_i32(i32 %a) #0 { ; CHECK-LABEL: test_uitofp_i64( ; CHECK: ld.param.u64 [[A:%rd[0-9]+]], [test_uitofp_i64_param_0]; ; CHECK: cvt.rn.f16.u64 [[R:%rs[0-9]+]], [[A]]; -; CHECK: st.param.b16 [func_retval0+0], [[R]]; +; CHECK: st.param.b16 [func_retval0], [[R]]; ; CHECK: ret; define half @test_uitofp_i64(i64 %a) #0 { %r = uitofp i64 %a to half @@ -734,7 +734,7 @@ define half @test_uitofp_i64(i64 %a) #0 { ; CHECK-LABEL: test_sitofp_i32( ; CHECK: ld.param.u32 [[A:%r[0-9]+]], [test_sitofp_i32_param_0]; ; CHECK: cvt.rn.f16.s32 [[R:%rs[0-9]+]], [[A]]; -; CHECK: st.param.b16 [func_retval0+0], [[R]]; +; CHECK: st.param.b16 [func_retval0], [[R]]; ; CHECK: ret; define half @test_sitofp_i32(i32 %a) #0 { %r = sitofp i32 %a to half @@ -744,7 +744,7 @@ define half @test_sitofp_i32(i32 %a) #0 { ; CHECK-LABEL: test_sitofp_i64( ; CHECK: ld.param.u64 [[A:%rd[0-9]+]], [test_sitofp_i64_param_0]; ; CHECK: cvt.rn.f16.s64 [[R:%rs[0-9]+]], [[A]]; -; CHECK: st.param.b16 [func_retval0+0], [[R]]; +; CHECK: st.param.b16 [func_retval0], [[R]]; ; CHECK: ret; define half @test_sitofp_i64(i64 %a) #0 { %r = sitofp i64 %a to half @@ -761,7 +761,7 @@ define half @test_sitofp_i64(i64 %a) #0 { ; CHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]] ; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], [[C32]]; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] -; CHECK: st.param.b16 [func_retval0+0], [[R]]; +; CHECK: st.param.b16 [func_retval0], [[R]]; ; CHECK: ret; define half @test_uitofp_i32_fadd(i32 %a, half %b) #0 { %c = uitofp i32 %a to half @@ -779,7 +779,7 @@ define half @test_uitofp_i32_fadd(i32 %a, half %b) #0 { ; XCHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]] ; XCHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], [[C32]]; ; XCHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] -; CHECK: st.param.b16 [func_retval0+0], [[R]]; +; CHECK: st.param.b16 [func_retval0], [[R]]; ; CHECK: ret; define half @test_sitofp_i32_fadd(i32 %a, half %b) #0 { %c = sitofp i32 %a to half @@ -790,7 +790,7 @@ define half @test_sitofp_i32_fadd(i32 %a, half %b) #0 { ; CHECK-LABEL: test_fptrunc_float( ; CHECK: ld.param.f32 [[A:%f[0-9]+]], [test_fptrunc_float_param_0]; ; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[A]]; -; CHECK: st.param.b16 [func_retval0+0], [[R]]; +; CHECK: st.param.b16 [func_retval0], [[R]]; ; CHECK: ret; define half @test_fptrunc_float(float %a) #0 { %r = fptrunc float %a to half @@ -800,7 +800,7 @@ define half @test_fptrunc_float(float %a) #0 { ; CHECK-LABEL: test_fptrunc_double( ; CHECK: ld.param.f64 [[A:%fd[0-9]+]], [test_fptrunc_double_param_0]; ; CHECK: cvt.rn.f16.f64 [[R:%rs[0-9]+]], [[A]]; -; CHECK: st.param.b16 [func_retval0+0], [[R]]; +; CHECK: st.param.b16 [func_retval0], [[R]]; ; CHECK: ret; define half @test_fptrunc_double(double %a) #0 { %r = fptrunc double %a to half @@ -811,7 +811,7 @@ define half @test_fptrunc_double(double %a) #0 { ; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fpext_float_param_0]; ; CHECK-NOFTZ: cvt.f32.f16 [[R:%f[0-9]+]], [[A]]; ; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[R:%f[0-9]+]], [[A]]; -; CHECK: st.param.f32 [func_retval0+0], [[R]]; +; CHECK: st.param.f32 [func_retval0], [[R]]; ; CHECK: ret; define float @test_fpext_float(half %a) #0 { %r = fpext half %a to float @@ -821,7 +821,7 @@ define float @test_fpext_float(half %a) #0 { ; CHECK-LABEL: test_fpext_double( ; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fpext_double_param_0]; ; CHECK: cvt.f64.f16 [[R:%fd[0-9]+]], [[A]]; -; CHECK: st.param.f64 [func_retval0+0], [[R]]; +; CHECK: st.param.f64 [func_retval0], [[R]]; ; CHECK: ret; define double @test_fpext_double(half %a) #0 { %r = fpext half %a to double @@ -832,7 +832,7 @@ define double @test_fpext_double(half %a) #0 { ; CHECK-LABEL: test_bitcast_halftoi16( ; CHECK: ld.param.b16 [[AH:%rs[0-9]+]], [test_bitcast_halftoi16_param_0]; ; CHECK: cvt.u32.u16 [[R:%r[0-9]+]], [[AH]] -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define i16 @test_bitcast_halftoi16(half %a) #0 { %r = bitcast half %a to i16 @@ -841,7 +841,7 @@ define i16 @test_bitcast_halftoi16(half %a) #0 { ; CHECK-LABEL: test_bitcast_i16tohalf( ; CHECK: ld.param.u16 [[AS:%rs[0-9]+]], [test_bitcast_i16tohalf_param_0]; -; CHECK: st.param.b16 [func_retval0+0], [[AS]]; +; CHECK: st.param.b16 [func_retval0], [[AS]]; ; CHECK: ret; define half @test_bitcast_i16tohalf(i16 %a) #0 { %r = bitcast i16 %a to half @@ -880,7 +880,7 @@ declare half @llvm.fmuladd.f16(half %a, half %b, half %c) #0 ; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK-F16-FTZ: sqrt.rn.ftz.f32 [[RF:%f[0-9]+]], [[AF]]; ; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]]; -; CHECK: st.param.b16 [func_retval0+0], [[R]]; +; CHECK: st.param.b16 [func_retval0], [[R]]; ; CHECK: ret; define half @test_sqrt(half %a) #0 { %r = call half @llvm.sqrt.f16(half %a) @@ -900,7 +900,7 @@ define half @test_sqrt(half %a) #0 { ; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK: sin.approx.f32 [[RF:%f[0-9]+]], [[AF]]; ; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]]; -; CHECK: st.param.b16 [func_retval0+0], [[R]]; +; CHECK: st.param.b16 [func_retval0], [[R]]; ; CHECK: ret; define half @test_sin(half %a) #0 #1 { %r = call half @llvm.sin.f16(half %a) @@ -913,7 +913,7 @@ define half @test_sin(half %a) #0 #1 { ; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK: cos.approx.f32 [[RF:%f[0-9]+]], [[AF]]; ; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]]; -; CHECK: st.param.b16 [func_retval0+0], [[R]]; +; CHECK: st.param.b16 [func_retval0], [[R]]; ; CHECK: ret; define half @test_cos(half %a) #0 #1 { %r = call half @llvm.cos.f16(half %a) @@ -973,7 +973,7 @@ define half @test_cos(half %a) #0 #1 { ; CHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]] ; CHECK-NOF16-NEXT: fma.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]], [[C32]]; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] -; CHECK: st.param.b16 [func_retval0+0], [[R]]; +; CHECK: st.param.b16 [func_retval0], [[R]]; ; CHECK: ret define half @test_fma(half %a, half %b, half %c) #0 { %r = call half @llvm.fma.f16(half %a, half %b, half %c) @@ -987,7 +987,7 @@ define half @test_fma(half %a, half %b, half %c) #0 { ; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK-F16-FTZ: abs.ftz.f32 [[RF:%f[0-9]+]], [[AF]]; ; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]]; -; CHECK: st.param.b16 [func_retval0+0], [[R]]; +; CHECK: st.param.b16 [func_retval0], [[R]]; ; CHECK: ret; define half @test_fabs(half %a) #0 { %r = call half @llvm.fabs.f16(half %a) @@ -1004,7 +1004,7 @@ define half @test_fabs(half %a) #0 { ; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[BF:%f[0-9]+]], [[B]]; ; CHECK-F16-FTZ: min.ftz.f32 [[RF:%f[0-9]+]], [[AF]], [[BF]]; ; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]]; -; CHECK: st.param.b16 [func_retval0+0], [[R]]; +; CHECK: st.param.b16 [func_retval0], [[R]]; ; CHECK: ret; define half @test_minnum(half %a, half %b) #0 { %r = call half @llvm.minnum.f16(half %a, half %b) @@ -1021,7 +1021,7 @@ define half @test_minnum(half %a, half %b) #0 { ; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[BF:%f[0-9]+]], [[B]]; ; CHECK-F16-FTZ: max.ftz.f32 [[RF:%f[0-9]+]], [[AF]], [[BF]]; ; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]]; -; CHECK: st.param.b16 [func_retval0+0], [[R]]; +; CHECK: st.param.b16 [func_retval0], [[R]]; ; CHECK: ret; define half @test_maxnum(half %a, half %b) #0 { %r = call half @llvm.maxnum.f16(half %a, half %b) @@ -1034,7 +1034,7 @@ define half @test_maxnum(half %a, half %b) #0 { ; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[AH]], 32767; ; CHECK-DAG: and.b16 [[BX:%rs[0-9]+]], [[BH]], -32768; ; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX]]; -; CHECK: st.param.b16 [func_retval0+0], [[RX]]; +; CHECK: st.param.b16 [func_retval0], [[RX]]; ; CHECK: ret; define half @test_copysign(half %a, half %b) #0 { %r = call half @llvm.copysign.f16(half %a, half %b) @@ -1049,7 +1049,7 @@ define half @test_copysign(half %a, half %b) #0 { ; CHECK-DAG: and.b32 [[BX0:%r[0-9]+]], [[B]], -2147483648; ; CHECK-DAG: mov.b32 {tmp, [[BX2:%rs[0-9]+]]}, [[BX0]]; ; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX2]]; -; CHECK: st.param.b16 [func_retval0+0], [[RX]]; +; CHECK: st.param.b16 [func_retval0], [[RX]]; ; CHECK: ret; define half @test_copysign_f32(half %a, float %b) #0 { %tb = fptrunc float %b to half @@ -1066,7 +1066,7 @@ define half @test_copysign_f32(half %a, float %b) #0 { ; CHECK-DAG: shr.u64 [[BX1:%rd[0-9]+]], [[BX0]], 48; ; CHECK-DAG: cvt.u16.u64 [[BX2:%rs[0-9]+]], [[BX1]]; ; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX2]]; -; CHECK: st.param.b16 [func_retval0+0], [[RX]]; +; CHECK: st.param.b16 [func_retval0], [[RX]]; ; CHECK: ret; define half @test_copysign_f64(half %a, double %b) #0 { %tb = fptrunc double %b to half @@ -1082,7 +1082,7 @@ define half @test_copysign_f64(half %a, double %b) #0 { ; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX]]; ; CHECK-NOFTZ: cvt.f32.f16 [[XR:%f[0-9]+]], [[RX]]; ; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[XR:%f[0-9]+]], [[RX]]; -; CHECK: st.param.f32 [func_retval0+0], [[XR]]; +; CHECK: st.param.f32 [func_retval0], [[XR]]; ; CHECK: ret; define float @test_copysign_extended(half %a, half %b) #0 { %r = call half @llvm.copysign.f16(half %a, half %b) @@ -1093,7 +1093,7 @@ define float @test_copysign_extended(half %a, half %b) #0 { ; CHECK-LABEL: test_floor( ; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_floor_param_0]; ; CHECK: cvt.rmi.f16.f16 [[R:%rs[0-9]+]], [[A]]; -; CHECK: st.param.b16 [func_retval0+0], [[R]]; +; CHECK: st.param.b16 [func_retval0], [[R]]; ; CHECK: ret; define half @test_floor(half %a) #0 { %r = call half @llvm.floor.f16(half %a) @@ -1103,7 +1103,7 @@ define half @test_floor(half %a) #0 { ; CHECK-LABEL: test_ceil( ; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_ceil_param_0]; ; CHECK: cvt.rpi.f16.f16 [[R:%rs[0-9]+]], [[A]]; -; CHECK: st.param.b16 [func_retval0+0], [[R]]; +; CHECK: st.param.b16 [func_retval0], [[R]]; ; CHECK: ret; define half @test_ceil(half %a) #0 { %r = call half @llvm.ceil.f16(half %a) @@ -1113,7 +1113,7 @@ define half @test_ceil(half %a) #0 { ; CHECK-LABEL: test_trunc( ; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_trunc_param_0]; ; CHECK: cvt.rzi.f16.f16 [[R:%rs[0-9]+]], [[A]]; -; CHECK: st.param.b16 [func_retval0+0], [[R]]; +; CHECK: st.param.b16 [func_retval0], [[R]]; ; CHECK: ret; define half @test_trunc(half %a) #0 { %r = call half @llvm.trunc.f16(half %a) @@ -1123,7 +1123,7 @@ define half @test_trunc(half %a) #0 { ; CHECK-LABEL: test_rint( ; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_rint_param_0]; ; CHECK: cvt.rni.f16.f16 [[R:%rs[0-9]+]], [[A]]; -; CHECK: st.param.b16 [func_retval0+0], [[R]]; +; CHECK: st.param.b16 [func_retval0], [[R]]; ; CHECK: ret; define half @test_rint(half %a) #0 { %r = call half @llvm.rint.f16(half %a) @@ -1133,7 +1133,7 @@ define half @test_rint(half %a) #0 { ; CHECK-LABEL: test_nearbyint( ; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_nearbyint_param_0]; ; CHECK: cvt.rni.f16.f16 [[R:%rs[0-9]+]], [[A]]; -; CHECK: st.param.b16 [func_retval0+0], [[R]]; +; CHECK: st.param.b16 [func_retval0], [[R]]; ; CHECK: ret; define half @test_nearbyint(half %a) #0 { %r = call half @llvm.nearbyint.f16(half %a) @@ -1143,7 +1143,7 @@ define half @test_nearbyint(half %a) #0 { ; CHECK-LABEL: test_roundeven( ; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_roundeven_param_0]; ; CHECK: cvt.rni.f16.f16 [[R:%rs[0-9]+]], [[A]]; -; CHECK: st.param.b16 [func_retval0+0], [[R]]; +; CHECK: st.param.b16 [func_retval0], [[R]]; ; CHECK: ret; define half @test_roundeven(half %a) #0 { %r = call half @llvm.roundeven.f16(half %a) @@ -1155,7 +1155,7 @@ define half @test_roundeven(half %a) #0 { ; check the use of sign mask and 0.5 to implement round ; CHECK: and.b32 [[R:%r[0-9]+]], {{.*}}, -2147483648; ; CHECK: or.b32 {{.*}}, [[R]], 1056964608; -; CHECK: st.param.b16 [func_retval0+0], {{.*}}; +; CHECK: st.param.b16 [func_retval0], {{.*}}; ; CHECK: ret; define half @test_round(half %a) #0 { %r = call half @llvm.round.f16(half %a) @@ -1173,7 +1173,7 @@ define half @test_round(half %a) #0 { ; CHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]] ; CHECK-NOF16-NEXT: fma.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]], [[C32]]; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] -; CHECK: st.param.b16 [func_retval0+0], [[R]]; +; CHECK: st.param.b16 [func_retval0], [[R]]; ; CHECK: ret; define half @test_fmuladd(half %a, half %b, half %c) #0 { %r = call half @llvm.fmuladd.f16(half %a, half %b, half %c) diff --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll index b41f63b783d390..b11c69e064c4a6 100644 --- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll @@ -32,7 +32,7 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" ; CHECK-LABEL: test_ret_const( ; CHECK: mov.b32 [[R:%r[0-9+]]], 1073757184; -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define <2 x half> @test_ret_const() #0 { ret <2 x half> @@ -41,7 +41,7 @@ define <2 x half> @test_ret_const() #0 { ; CHECK-LABEL: test_extract_0( ; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_extract_0_param_0]; ; CHECK: mov.b32 {[[R:%rs[0-9]+]], tmp}, [[A]]; -; CHECK: st.param.b16 [func_retval0+0], [[R]]; +; CHECK: st.param.b16 [func_retval0], [[R]]; ; CHECK: ret; define half @test_extract_0(<2 x half> %a) #0 { %e = extractelement <2 x half> %a, i32 0 @@ -51,7 +51,7 @@ define half @test_extract_0(<2 x half> %a) #0 { ; CHECK-LABEL: test_extract_1( ; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_extract_1_param_0]; ; CHECK: mov.b32 {tmp, [[R:%rs[0-9]+]]}, [[A]]; -; CHECK: st.param.b16 [func_retval0+0], [[R]]; +; CHECK: st.param.b16 [func_retval0], [[R]]; ; CHECK: ret; define half @test_extract_1(<2 x half> %a) #0 { %e = extractelement <2 x half> %a, i32 1 @@ -64,7 +64,7 @@ define half @test_extract_1(<2 x half> %a) #0 { ; CHECK-DAG: setp.eq.s64 [[PRED:%p[0-9]+]], [[IDX]], 0; ; CHECK-DAG: mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[A]]; ; CHECK: selp.b16 [[R:%rs[0-9]+]], [[E0]], [[E1]], [[PRED]]; -; CHECK: st.param.b16 [func_retval0+0], [[R]]; +; CHECK: st.param.b16 [func_retval0], [[R]]; ; CHECK: ret; define half @test_extract_i(<2 x half> %a, i64 %idx) #0 { %e = extractelement <2 x half> %a, i64 %idx @@ -89,7 +89,7 @@ define half @test_extract_i(<2 x half> %a, i64 %idx) #0 { ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] ; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; -; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define <2 x half> @test_fadd(<2 x half> %a, <2 x half> %b) #0 { %r = fadd <2 x half> %a, %b @@ -112,7 +112,7 @@ define <2 x half> @test_fadd(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] ; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; -; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define <2 x half> @test_fadd_imm_0(<2 x half> %a) #0 { %r = fadd <2 x half> , %a @@ -134,7 +134,7 @@ define <2 x half> @test_fadd_imm_0(<2 x half> %a) #0 { ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] ; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; -; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define <2 x half> @test_fadd_imm_1(<2 x half> %a) #0 { %r = fadd <2 x half> %a, @@ -159,7 +159,7 @@ define <2 x half> @test_fadd_imm_1(<2 x half> %a) #0 { ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] ; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; -; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define <2 x half> @test_fsub(<2 x half> %a, <2 x half> %b) #0 { %r = fsub <2 x half> %a, %b @@ -182,7 +182,7 @@ define <2 x half> @test_fsub(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] ; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; -; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define <2 x half> @test_fneg(<2 x half> %a) #0 { %r = fsub <2 x half> , %a @@ -206,7 +206,7 @@ define <2 x half> @test_fneg(<2 x half> %a) #0 { ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] ; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; -; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define <2 x half> @test_fmul(<2 x half> %a, <2 x half> %b) #0 { %r = fmul <2 x half> %a, %b @@ -227,7 +227,7 @@ define <2 x half> @test_fmul(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]]; ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]]; ; CHECK-NEXT: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define <2 x half> @test_fdiv(<2 x half> %a, <2 x half> %b) #0 { %r = fdiv <2 x half> %a, %b @@ -265,7 +265,7 @@ define <2 x half> @test_fdiv(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]]; ; -- merge into f16x2 and return it. ; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define <2 x half> @test_frem(<2 x half> %a, <2 x half> %b) #0 { %r = frem <2 x half> %a, %b @@ -333,15 +333,15 @@ declare <2 x half> @test_callee(<2 x half> %a, <2 x half> %b) #0 ; CHECK: { ; CHECK-DAG: .param .align 4 .b8 param0[4]; ; CHECK-DAG: .param .align 4 .b8 param1[4]; -; CHECK-DAG: st.param.b32 [param0+0], [[A]]; -; CHECK-DAG: st.param.b32 [param1+0], [[B]]; +; CHECK-DAG: st.param.b32 [param0], [[A]]; +; CHECK-DAG: st.param.b32 [param1], [[B]]; ; CHECK-DAG: .param .align 4 .b8 retval0[4]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_callee, ; CHECK: ); -; CHECK-NEXT: ld.param.b32 [[R:%r[0-9]+]], [retval0+0]; +; CHECK-NEXT: ld.param.b32 [[R:%r[0-9]+]], [retval0]; ; CHECK-NEXT: } -; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define <2 x half> @test_call(<2 x half> %a, <2 x half> %b) #0 { %r = call <2 x half> @test_callee(<2 x half> %a, <2 x half> %b) @@ -354,15 +354,15 @@ define <2 x half> @test_call(<2 x half> %a, <2 x half> %b) #0 { ; CHECK: { ; CHECK-DAG: .param .align 4 .b8 param0[4]; ; CHECK-DAG: .param .align 4 .b8 param1[4]; -; CHECK-DAG: st.param.b32 [param0+0], [[B]]; -; CHECK-DAG: st.param.b32 [param1+0], [[A]]; +; CHECK-DAG: st.param.b32 [param0], [[B]]; +; CHECK-DAG: st.param.b32 [param1], [[A]]; ; CHECK-DAG: .param .align 4 .b8 retval0[4]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_callee, ; CHECK: ); -; CHECK-NEXT: ld.param.b32 [[R:%r[0-9]+]], [retval0+0]; +; CHECK-NEXT: ld.param.b32 [[R:%r[0-9]+]], [retval0]; ; CHECK-NEXT: } -; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define <2 x half> @test_call_flipped(<2 x half> %a, <2 x half> %b) #0 { %r = call <2 x half> @test_callee(<2 x half> %b, <2 x half> %a) @@ -375,15 +375,15 @@ define <2 x half> @test_call_flipped(<2 x half> %a, <2 x half> %b) #0 { ; CHECK: { ; CHECK-DAG: .param .align 4 .b8 param0[4]; ; CHECK-DAG: .param .align 4 .b8 param1[4]; -; CHECK-DAG: st.param.b32 [param0+0], [[B]]; -; CHECK-DAG: st.param.b32 [param1+0], [[A]]; +; CHECK-DAG: st.param.b32 [param0], [[B]]; +; CHECK-DAG: st.param.b32 [param1], [[A]]; ; CHECK-DAG: .param .align 4 .b8 retval0[4]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_callee, ; CHECK: ); -; CHECK-NEXT: ld.param.b32 [[R:%r[0-9]+]], [retval0+0]; +; CHECK-NEXT: ld.param.b32 [[R:%r[0-9]+]], [retval0]; ; CHECK-NEXT: } -; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define <2 x half> @test_tailcall_flipped(<2 x half> %a, <2 x half> %b) #0 { %r = tail call <2 x half> @test_callee(<2 x half> %b, <2 x half> %a) @@ -396,7 +396,7 @@ define <2 x half> @test_tailcall_flipped(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-DAG: ld.param.u8 [[C:%rs[0-9]+]], [test_select_param_2] ; CHECK-DAG: setp.eq.b16 [[PRED:%p[0-9]+]], %rs{{.*}}, 1; ; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], [[A]], [[B]], [[PRED]]; -; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define <2 x half> @test_select(<2 x half> %a, <2 x half> %b, i1 zeroext %c) #0 { %r = select i1 %c, <2 x half> %a, <2 x half> %b @@ -425,7 +425,7 @@ define <2 x half> @test_select(<2 x half> %a, <2 x half> %b, i1 zeroext %c) #0 { ; CHECK-DAG: selp.b16 [[R0:%rs[0-9]+]], [[A0]], [[B0]], [[P0]]; ; CHECK-DAG: selp.b16 [[R1:%rs[0-9]+]], [[A1]], [[B1]], [[P1]]; ; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define <2 x half> @test_select_cc(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x half> %d) #0 { %cc = fcmp une <2 x half> %c, %d @@ -451,7 +451,7 @@ define <2 x half> @test_select_cc(<2 x half> %a, <2 x half> %b, <2 x half> %c, < ; ; CHECK-DAG: selp.f32 [[R0:%f[0-9]+]], [[A0]], [[B0]], [[P0]]; ; CHECK-DAG: selp.f32 [[R1:%f[0-9]+]], [[A1]], [[B1]], [[P1]]; -; CHECK-NEXT: st.param.v2.f32 [func_retval0+0], {[[R0]], [[R1]]}; +; CHECK-NEXT: st.param.v2.f32 [func_retval0], {[[R0]], [[R1]]}; ; CHECK-NEXT: ret; define <2 x float> @test_select_cc_f32_f16(<2 x float> %a, <2 x float> %b, <2 x half> %c, <2 x half> %d) #0 { @@ -472,7 +472,7 @@ define <2 x float> @test_select_cc_f32_f16(<2 x float> %a, <2 x float> %b, ; CHECK-DAG: selp.b16 [[R0:%rs[0-9]+]], [[A0]], [[B0]], [[P0]]; ; CHECK-DAG: selp.b16 [[R1:%rs[0-9]+]], [[A1]], [[B1]], [[P1]]; ; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define <2 x half> @test_select_cc_f16_f32(<2 x half> %a, <2 x half> %b, <2 x float> %c, <2 x float> %d) #0 { @@ -494,7 +494,7 @@ define <2 x half> @test_select_cc_f16_f32(<2 x half> %a, <2 x half> %b, ; CHECK-NOF16-DAG: setp.neu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] ; CHECK-NOF16-DAG: setp.neu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; -; CHECK-NEXT: st.param.b8 [func_retval0+0], [[R0]]; +; CHECK-NEXT: st.param.b8 [func_retval0], [[R0]]; ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; ; CHECK-NEXT: st.param.b8 [func_retval0+1], [[R1]]; ; CHECK-NEXT: ret; @@ -516,7 +516,7 @@ define <2 x i1> @test_fcmp_une(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-DAG: setp.equ.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] ; CHECK-NOF16-DAG: setp.equ.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; -; CHECK-NEXT: st.param.b8 [func_retval0+0], [[R0]]; +; CHECK-NEXT: st.param.b8 [func_retval0], [[R0]]; ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; ; CHECK-NEXT: st.param.b8 [func_retval0+1], [[R1]]; ; CHECK-NEXT: ret; @@ -538,7 +538,7 @@ define <2 x i1> @test_fcmp_ueq(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-DAG: setp.gtu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] ; CHECK-NOF16-DAG: setp.gtu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; -; CHECK-NEXT: st.param.b8 [func_retval0+0], [[R0]]; +; CHECK-NEXT: st.param.b8 [func_retval0], [[R0]]; ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; ; CHECK-NEXT: st.param.b8 [func_retval0+1], [[R1]]; ; CHECK-NEXT: ret; @@ -560,7 +560,7 @@ define <2 x i1> @test_fcmp_ugt(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-DAG: setp.geu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] ; CHECK-NOF16-DAG: setp.geu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; -; CHECK-NEXT: st.param.b8 [func_retval0+0], [[R0]]; +; CHECK-NEXT: st.param.b8 [func_retval0], [[R0]]; ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; ; CHECK-NEXT: st.param.b8 [func_retval0+1], [[R1]]; ; CHECK-NEXT: ret; @@ -582,7 +582,7 @@ define <2 x i1> @test_fcmp_uge(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-DAG: setp.ltu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] ; CHECK-NOF16-DAG: setp.ltu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; -; CHECK-NEXT: st.param.b8 [func_retval0+0], [[R0]]; +; CHECK-NEXT: st.param.b8 [func_retval0], [[R0]]; ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; ; CHECK-NEXT: st.param.b8 [func_retval0+1], [[R1]]; ; CHECK-NEXT: ret; @@ -604,7 +604,7 @@ define <2 x i1> @test_fcmp_ult(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-DAG: setp.leu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] ; CHECK-NOF16-DAG: setp.leu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; -; CHECK-NEXT: st.param.b8 [func_retval0+0], [[R0]]; +; CHECK-NEXT: st.param.b8 [func_retval0], [[R0]]; ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; ; CHECK-NEXT: st.param.b8 [func_retval0+1], [[R1]]; ; CHECK-NEXT: ret; @@ -627,7 +627,7 @@ define <2 x i1> @test_fcmp_ule(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-DAG: setp.nan.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] ; CHECK-NOF16-DAG: setp.nan.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; -; CHECK-NEXT: st.param.b8 [func_retval0+0], [[R0]]; +; CHECK-NEXT: st.param.b8 [func_retval0], [[R0]]; ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; ; CHECK-NEXT: st.param.b8 [func_retval0+1], [[R1]]; ; CHECK-NEXT: ret; @@ -649,7 +649,7 @@ define <2 x i1> @test_fcmp_uno(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-DAG: setp.ne.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] ; CHECK-NOF16-DAG: setp.ne.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; -; CHECK-NEXT: st.param.b8 [func_retval0+0], [[R0]]; +; CHECK-NEXT: st.param.b8 [func_retval0], [[R0]]; ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; ; CHECK-NEXT: st.param.b8 [func_retval0+1], [[R1]]; ; CHECK-NEXT: ret; @@ -671,7 +671,7 @@ define <2 x i1> @test_fcmp_one(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-DAG: setp.eq.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] ; CHECK-NOF16-DAG: setp.eq.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; -; CHECK-NEXT: st.param.b8 [func_retval0+0], [[R0]]; +; CHECK-NEXT: st.param.b8 [func_retval0], [[R0]]; ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; ; CHECK-NEXT: st.param.b8 [func_retval0+1], [[R1]]; ; CHECK-NEXT: ret; @@ -693,7 +693,7 @@ define <2 x i1> @test_fcmp_oeq(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-DAG: setp.gt.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] ; CHECK-NOF16-DAG: setp.gt.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; -; CHECK-NEXT: st.param.b8 [func_retval0+0], [[R0]]; +; CHECK-NEXT: st.param.b8 [func_retval0], [[R0]]; ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; ; CHECK-NEXT: st.param.b8 [func_retval0+1], [[R1]]; ; CHECK-NEXT: ret; @@ -715,7 +715,7 @@ define <2 x i1> @test_fcmp_ogt(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-DAG: setp.ge.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] ; CHECK-NOF16-DAG: setp.ge.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; -; CHECK-NEXT: st.param.b8 [func_retval0+0], [[R0]]; +; CHECK-NEXT: st.param.b8 [func_retval0], [[R0]]; ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; ; CHECK-NEXT: st.param.b8 [func_retval0+1], [[R1]]; ; CHECK-NEXT: ret; @@ -737,7 +737,7 @@ define <2 x i1> @test_fcmp_oge(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-DAG: setp.lt.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] ; CHECK-NOF16-DAG: setp.lt.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; -; CHECK-NEXT: st.param.b8 [func_retval0+0], [[R0]]; +; CHECK-NEXT: st.param.b8 [func_retval0], [[R0]]; ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; ; CHECK-NEXT: st.param.b8 [func_retval0+1], [[R1]]; ; CHECK-NEXT: ret; @@ -759,7 +759,7 @@ define <2 x i1> @test_fcmp_olt(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-DAG: setp.le.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] ; CHECK-NOF16-DAG: setp.le.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; -; CHECK-NEXT: st.param.b8 [func_retval0+0], [[R0]]; +; CHECK-NEXT: st.param.b8 [func_retval0], [[R0]]; ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; ; CHECK-NEXT: st.param.b8 [func_retval0+1], [[R1]]; ; CHECK-NEXT: ret; @@ -781,7 +781,7 @@ define <2 x i1> @test_fcmp_ole(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-DAG: setp.num.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] ; CHECK-NOF16-DAG: setp.num.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] ; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; -; CHECK-NEXT: st.param.b8 [func_retval0+0], [[R0]]; +; CHECK-NEXT: st.param.b8 [func_retval0], [[R0]]; ; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; ; CHECK-NEXT: st.param.b8 [func_retval0+1], [[R1]]; ; CHECK-NEXT: ret; @@ -795,7 +795,7 @@ define <2 x i1> @test_fcmp_ord(<2 x half> %a, <2 x half> %b) #0 { ; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: cvt.rzi.s32.f16 [[R0:%r[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.rzi.s32.f16 [[R1:%r[0-9]+]], [[A1]]; -; CHECK: st.param.v2.b32 [func_retval0+0], {[[R0]], [[R1]]} +; CHECK: st.param.v2.b32 [func_retval0], {[[R0]], [[R1]]} ; CHECK: ret; define <2 x i32> @test_fptosi_i32(<2 x half> %a) #0 { %r = fptosi <2 x half> %a to <2 x i32> @@ -807,7 +807,7 @@ define <2 x i32> @test_fptosi_i32(<2 x half> %a) #0 { ; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: cvt.rzi.s64.f16 [[R0:%rd[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.rzi.s64.f16 [[R1:%rd[0-9]+]], [[A1]]; -; CHECK: st.param.v2.b64 [func_retval0+0], {[[R0]], [[R1]]} +; CHECK: st.param.v2.b64 [func_retval0], {[[R0]], [[R1]]} ; CHECK: ret; define <2 x i64> @test_fptosi_i64(<2 x half> %a) #0 { %r = fptosi <2 x half> %a to <2 x i64> @@ -819,7 +819,7 @@ define <2 x i64> @test_fptosi_i64(<2 x half> %a) #0 { ; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: cvt.rzi.u32.f16 [[R0:%r[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.rzi.u32.f16 [[R1:%r[0-9]+]], [[A1]]; -; CHECK: st.param.v2.b32 [func_retval0+0], {[[R0]], [[R1]]} +; CHECK: st.param.v2.b32 [func_retval0], {[[R0]], [[R1]]} ; CHECK: ret; define <2 x i32> @test_fptoui_2xi32(<2 x half> %a) #0 { %r = fptoui <2 x half> %a to <2 x i32> @@ -831,7 +831,7 @@ define <2 x i32> @test_fptoui_2xi32(<2 x half> %a) #0 { ; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: cvt.rzi.u64.f16 [[R0:%rd[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.rzi.u64.f16 [[R1:%rd[0-9]+]], [[A1]]; -; CHECK: st.param.v2.b64 [func_retval0+0], {[[R0]], [[R1]]} +; CHECK: st.param.v2.b64 [func_retval0], {[[R0]], [[R1]]} ; CHECK: ret; define <2 x i64> @test_fptoui_2xi64(<2 x half> %a) #0 { %r = fptoui <2 x half> %a to <2 x i64> @@ -843,7 +843,7 @@ define <2 x i64> @test_fptoui_2xi64(<2 x half> %a) #0 { ; CHECK-DAG: cvt.rn.f16.u32 [[R0:%rs[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.rn.f16.u32 [[R1:%rs[0-9]+]], [[A1]]; ; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define <2 x half> @test_uitofp_2xi32(<2 x i32> %a) #0 { %r = uitofp <2 x i32> %a to <2 x half> @@ -855,7 +855,7 @@ define <2 x half> @test_uitofp_2xi32(<2 x i32> %a) #0 { ; CHECK-DAG: cvt.rn.f16.u64 [[R0:%rs[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.rn.f16.u64 [[R1:%rs[0-9]+]], [[A1]]; ; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define <2 x half> @test_uitofp_2xi64(<2 x i64> %a) #0 { %r = uitofp <2 x i64> %a to <2 x half> @@ -867,7 +867,7 @@ define <2 x half> @test_uitofp_2xi64(<2 x i64> %a) #0 { ; CHECK-DAG: cvt.rn.f16.s32 [[R0:%rs[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.rn.f16.s32 [[R1:%rs[0-9]+]], [[A1]]; ; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define <2 x half> @test_sitofp_2xi32(<2 x i32> %a) #0 { %r = sitofp <2 x i32> %a to <2 x half> @@ -879,7 +879,7 @@ define <2 x half> @test_sitofp_2xi32(<2 x i32> %a) #0 { ; CHECK-DAG: cvt.rn.f16.s64 [[R0:%rs[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.rn.f16.s64 [[R1:%rs[0-9]+]], [[A1]]; ; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define <2 x half> @test_sitofp_2xi64(<2 x i64> %a) #0 { %r = sitofp <2 x i64> %a to <2 x half> @@ -906,7 +906,7 @@ define <2 x half> @test_sitofp_2xi64(<2 x i64> %a) #0 { ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] ; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define <2 x half> @test_uitofp_2xi32_fadd(<2 x i32> %a, <2 x half> %b) #0 { %c = uitofp <2 x i32> %a to <2 x half> @@ -934,7 +934,7 @@ define <2 x half> @test_uitofp_2xi32_fadd(<2 x i32> %a, <2 x half> %b) #0 { ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] ; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define <2 x half> @test_sitofp_2xi32_fadd(<2 x i32> %a, <2 x half> %b) #0 { %c = sitofp <2 x i32> %a to <2 x half> @@ -947,7 +947,7 @@ define <2 x half> @test_sitofp_2xi32_fadd(<2 x i32> %a, <2 x half> %b) #0 { ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[A1]]; ; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define <2 x half> @test_fptrunc_2xfloat(<2 x float> %a) #0 { %r = fptrunc <2 x float> %a to <2 x half> @@ -959,7 +959,7 @@ define <2 x half> @test_fptrunc_2xfloat(<2 x float> %a) #0 { ; CHECK-DAG: cvt.rn.f16.f64 [[R0:%rs[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.rn.f16.f64 [[R1:%rs[0-9]+]], [[A1]]; ; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define <2 x half> @test_fptrunc_2xdouble(<2 x double> %a) #0 { %r = fptrunc <2 x double> %a to <2 x half> @@ -971,7 +971,7 @@ define <2 x half> @test_fptrunc_2xdouble(<2 x double> %a) #0 { ; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: cvt.f32.f16 [[R0:%f[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.f32.f16 [[R1:%f[0-9]+]], [[A1]]; -; CHECK-NEXT: st.param.v2.f32 [func_retval0+0], {[[R0]], [[R1]]}; +; CHECK-NEXT: st.param.v2.f32 [func_retval0], {[[R0]], [[R1]]}; ; CHECK: ret; define <2 x float> @test_fpext_2xfloat(<2 x half> %a) #0 { %r = fpext <2 x half> %a to <2 x float> @@ -983,7 +983,7 @@ define <2 x float> @test_fpext_2xfloat(<2 x half> %a) #0 { ; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: cvt.f64.f16 [[R0:%fd[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.f64.f16 [[R1:%fd[0-9]+]], [[A1]]; -; CHECK-NEXT: st.param.v2.f64 [func_retval0+0], {[[R0]], [[R1]]}; +; CHECK-NEXT: st.param.v2.f64 [func_retval0], {[[R0]], [[R1]]}; ; CHECK: ret; define <2 x double> @test_fpext_2xdouble(<2 x half> %a) #0 { %r = fpext <2 x half> %a to <2 x double> @@ -993,7 +993,7 @@ define <2 x double> @test_fpext_2xdouble(<2 x half> %a) #0 { ; CHECK-LABEL: test_bitcast_2xhalf_to_2xi16( ; CHECK: ld.param.u32 [[A:%r[0-9]+]], [test_bitcast_2xhalf_to_2xi16_param_0]; -; CHECK: st.param.b32 [func_retval0+0], [[A]] +; CHECK: st.param.b32 [func_retval0], [[A]] ; CHECK: ret; define <2 x i16> @test_bitcast_2xhalf_to_2xi16(<2 x half> %a) #0 { %r = bitcast <2 x half> %a to <2 x i16> @@ -1002,7 +1002,7 @@ define <2 x i16> @test_bitcast_2xhalf_to_2xi16(<2 x half> %a) #0 { ; CHECK-LABEL: test_bitcast_2xi16_to_2xhalf( ; CHECK: ld.param.u32 [[R:%r[0-9]+]], [test_bitcast_2xi16_to_2xhalf_param_0]; -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define <2 x half> @test_bitcast_2xi16_to_2xhalf(<2 x i16> %a) #0 { %r = bitcast <2 x i16> %a to <2 x half> @@ -1012,7 +1012,7 @@ define <2 x half> @test_bitcast_2xi16_to_2xhalf(<2 x i16> %a) #0 { ; CHECK-LABEL: test_bitcast_float_to_2xhalf( ; CHECK: ld.param.f32 [[AF1:%f[0-9]+]], [test_bitcast_float_to_2xhalf_param_0]; ; CHECK: mov.b32 [[R:%r[0-9]+]], [[AF1]]; -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define <2 x half> @test_bitcast_float_to_2xhalf(float %a) #0 { %r = bitcast float %a to <2 x half> @@ -1022,7 +1022,7 @@ define <2 x half> @test_bitcast_float_to_2xhalf(float %a) #0 { ; CHECK-LABEL: test_bitcast_2xhalf_to_float( ; CHECK: ld.param.u32 [[R:%r[0-9]+]], [test_bitcast_2xhalf_to_float_param_0]; ; CHECK: mov.b32 [[AF1:%f[0-9]+]], [[R]]; -; CHECK: st.param.f32 [func_retval0+0], [[AF1]]; +; CHECK: st.param.f32 [func_retval0], [[AF1]]; ; CHECK: ret; define float @test_bitcast_2xhalf_to_float(<2 x half> %a) #0 { %r = bitcast <2 x half> %a to float @@ -1063,7 +1063,7 @@ declare <2 x half> @llvm.fmuladd.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]]; ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]]; ; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define <2 x half> @test_sqrt(<2 x half> %a) #0 { %r = call <2 x half> @llvm.sqrt.f16(<2 x half> %a) @@ -1087,7 +1087,7 @@ define <2 x half> @test_sqrt(<2 x half> %a) #0 { ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]]; ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]]; ; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define <2 x half> @test_sin(<2 x half> %a) #0 #1 { %r = call <2 x half> @llvm.sin.f16(<2 x half> %a) @@ -1104,7 +1104,7 @@ define <2 x half> @test_sin(<2 x half> %a) #0 #1 { ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]]; ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]]; ; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define <2 x half> @test_cos(<2 x half> %a) #0 #1 { %r = call <2 x half> @llvm.cos.f16(<2 x half> %a) @@ -1175,7 +1175,7 @@ define <2 x half> @test_cos(<2 x half> %a) #0 #1 { ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] ; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret define <2 x half> @test_fma(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 { %r = call <2 x half> @llvm.fma.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) @@ -1193,7 +1193,7 @@ define <2 x half> @test_fma(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 { ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]]; ; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK-F16: and.b32 [[R:%r[0-9]+]], [[A]], 2147450879; -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define <2 x half> @test_fabs(<2 x half> %a) #0 { %r = call <2 x half> @llvm.fabs.f16(<2 x half> %a) @@ -1214,7 +1214,7 @@ define <2 x half> @test_fabs(<2 x half> %a) #0 { ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]]; ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]]; ; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define <2 x half> @test_minnum(<2 x half> %a, <2 x half> %b) #0 { %r = call <2 x half> @llvm.minnum.f16(<2 x half> %a, <2 x half> %b) @@ -1235,7 +1235,7 @@ define <2 x half> @test_minnum(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]]; ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]]; ; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define <2 x half> @test_maxnum(<2 x half> %a, <2 x half> %b) #0 { %r = call <2 x half> @llvm.maxnum.f16(<2 x half> %a, <2 x half> %b) @@ -1257,7 +1257,7 @@ define <2 x half> @test_maxnum(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-F16-DAG: and.b32 [[R0:%r[0-9]+]], [[B]], -2147450880; ; CHECK-F16-DAG: and.b32 [[R1:%r[0-9]+]], [[A]], 2147450879; ; CHECK-F16-DAG: or.b32 [[R:%r[0-9]+]], [[R1]], [[R0]] -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define <2 x half> @test_copysign(<2 x half> %a, <2 x half> %b) #0 { %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %b) @@ -1285,7 +1285,7 @@ define <2 x half> @test_copysign(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-F16-DAG: and.b32 [[R3:%r[0-9]+]], [[R2]], -2147450880; ; CHECK-F16-DAG: and.b32 [[R4:%r[0-9]+]], [[A]], 2147450879; ; CHECK-F16-DAG: or.b32 [[R:%r[0-9]+]], [[R4]], [[R3]] -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define <2 x half> @test_copysign_f32(<2 x half> %a, <2 x float> %b) #0 { %tb = fptrunc <2 x float> %b to <2 x half> @@ -1316,7 +1316,7 @@ define <2 x half> @test_copysign_f32(<2 x half> %a, <2 x float> %b) #0 { ; CHECK-F16-DAG: and.b32 [[R3:%r[0-9]+]], [[R2]], -2147450880; ; CHECK-F16-DAG: and.b32 [[R4:%r[0-9]+]], [[A]], 2147450879; ; CHECK-F16-DAG: or.b32 [[R:%r[0-9]+]], [[R4]], [[R3]]; -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define <2 x half> @test_copysign_f64(<2 x half> %a, <2 x double> %b) #0 { %tb = fptrunc <2 x double> %b to <2 x half> @@ -1343,7 +1343,7 @@ define <2 x half> @test_copysign_f64(<2 x half> %a, <2 x double> %b) #0 { ; CHECK-F16-DAG: mov.b32 {[[R3:%rs[0-9]+]], [[R4:%rs[0-9]+]]}, [[R2]] ; CHECK-F16-DAG: cvt.f32.f16 [[XR0:%f[0-9]+]], [[R3]] ; CHECK-F16-DAG: cvt.f32.f16 [[XR1:%f[0-9]+]], [[R4]] -; CHECK: st.param.v2.f32 [func_retval0+0], {[[XR0]], [[XR1]]}; +; CHECK: st.param.v2.f32 [func_retval0], {[[XR0]], [[XR1]]}; ; CHECK: ret; define <2 x float> @test_copysign_extended(<2 x half> %a, <2 x half> %b) #0 { %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %b) @@ -1357,7 +1357,7 @@ define <2 x float> @test_copysign_extended(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-DAG: cvt.rmi.f16.f16 [[R1:%rs[0-9]+]], [[A1]]; ; CHECK-DAG: cvt.rmi.f16.f16 [[R0:%rs[0-9]+]], [[A0]]; ; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define <2 x half> @test_floor(<2 x half> %a) #0 { %r = call <2 x half> @llvm.floor.f16(<2 x half> %a) @@ -1370,7 +1370,7 @@ define <2 x half> @test_floor(<2 x half> %a) #0 { ; CHECK-DAG: cvt.rpi.f16.f16 [[R1:%rs[0-9]+]], [[A1]]; ; CHECK-DAG: cvt.rpi.f16.f16 [[R0:%rs[0-9]+]], [[A0]]; ; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define <2 x half> @test_ceil(<2 x half> %a) #0 { %r = call <2 x half> @llvm.ceil.f16(<2 x half> %a) @@ -1383,7 +1383,7 @@ define <2 x half> @test_ceil(<2 x half> %a) #0 { ; CHECK-DAG: cvt.rzi.f16.f16 [[R1:%rs[0-9]+]], [[A1]]; ; CHECK-DAG: cvt.rzi.f16.f16 [[R0:%rs[0-9]+]], [[A0]]; ; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define <2 x half> @test_trunc(<2 x half> %a) #0 { %r = call <2 x half> @llvm.trunc.f16(<2 x half> %a) @@ -1396,7 +1396,7 @@ define <2 x half> @test_trunc(<2 x half> %a) #0 { ; CHECK-DAG: cvt.rni.f16.f16 [[R1:%rs[0-9]+]], [[A1]]; ; CHECK-DAG: cvt.rni.f16.f16 [[R0:%rs[0-9]+]], [[A0]]; ; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define <2 x half> @test_rint(<2 x half> %a) #0 { %r = call <2 x half> @llvm.rint.f16(<2 x half> %a) @@ -1409,7 +1409,7 @@ define <2 x half> @test_rint(<2 x half> %a) #0 { ; CHECK-DAG: cvt.rni.f16.f16 [[R1:%rs[0-9]+]], [[A1]]; ; CHECK-DAG: cvt.rni.f16.f16 [[R0:%rs[0-9]+]], [[A0]]; ; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define <2 x half> @test_nearbyint(<2 x half> %a) #0 { %r = call <2 x half> @llvm.nearbyint.f16(<2 x half> %a) @@ -1422,7 +1422,7 @@ define <2 x half> @test_nearbyint(<2 x half> %a) #0 { ; CHECK-DAG: cvt.rni.f16.f16 [[R1:%rs[0-9]+]], [[A1]]; ; CHECK-DAG: cvt.rni.f16.f16 [[R0:%rs[0-9]+]], [[A0]]; ; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define <2 x half> @test_roundeven(<2 x half> %a) #0 { %r = call <2 x half> @llvm.roundeven.f16(<2 x half> %a) @@ -1436,7 +1436,7 @@ define <2 x half> @test_roundeven(<2 x half> %a) #0 { ; CHECK: or.b32 {{.*}}, [[R1]], 1056964608; ; CHECK: and.b32 [[R2:%r[0-9]+]], {{.*}}, -2147483648; ; CHECK: or.b32 {{.*}}, [[R2]], 1056964608; -; CHECK: st.param.b32 [func_retval0+0], {{.*}}; +; CHECK: st.param.b32 [func_retval0], {{.*}}; ; CHECK: ret; define <2 x half> @test_round(<2 x half> %a) #0 { %r = call <2 x half> @llvm.round.f16(<2 x half> %a) @@ -1465,7 +1465,7 @@ define <2 x half> @test_round(<2 x half> %a) #0 { ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] ; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define <2 x half> @test_fmuladd(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 { %r = call <2 x half> @llvm.fmuladd.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) diff --git a/llvm/test/CodeGen/NVPTX/i128-param.ll b/llvm/test/CodeGen/NVPTX/i128-param.ll index c2f23124049cae..8ad5ab6a287523 100644 --- a/llvm/test/CodeGen/NVPTX/i128-param.ll +++ b/llvm/test/CodeGen/NVPTX/i128-param.ll @@ -30,9 +30,9 @@ start: ; CHECK: { // callseq [[CALLSEQ_ID:[0-9]]], 0 ; CHECK: .param .align 16 .b8 param0[16]; - ; CHECK-NEXT: st.param.v2.b64 [param0+0], {%[[REG0]], %[[REG1]]} + ; CHECK-NEXT: st.param.v2.b64 [param0], {%[[REG0]], %[[REG1]]} ; CHECK: .param .align 16 .b8 param1[16]; - ; CHECK-NEXT: st.param.v2.b64 [param1+0], {%[[REG2]], %[[REG3]]} + ; CHECK-NEXT: st.param.v2.b64 [param1], {%[[REG2]], %[[REG3]]} ; CHECK: } // callseq [[CALLSEQ_ID]] call void @callee(i128 %0, i128 %1, ptr %2) @@ -49,9 +49,9 @@ start: ; CHECK: { // callseq [[CALLSEQ_ID:[0-9]]], 0 ; CHECK: .param .align 16 .b8 param0[16]; - ; CHECK: st.param.v2.b64 [param0+0], {%[[REG0]], %[[REG1]]} + ; CHECK: st.param.v2.b64 [param0], {%[[REG0]], %[[REG1]]} ; CHECK: .param .align 16 .b8 param1[16]; - ; CHECK: st.param.v2.b64 [param1+0], {%[[REG2]], %[[REG3]]} + ; CHECK: st.param.v2.b64 [param1], {%[[REG2]], %[[REG3]]} ; CHECK: } // callseq [[CALLSEQ_ID]] call void @callee(i128 %0, i128 %1, ptr %2) diff --git a/llvm/test/CodeGen/NVPTX/i128-retval.ll b/llvm/test/CodeGen/NVPTX/i128-retval.ll index df173536c297f8..554c43b52bf021 100644 --- a/llvm/test/CodeGen/NVPTX/i128-retval.ll +++ b/llvm/test/CodeGen/NVPTX/i128-retval.ll @@ -4,7 +4,7 @@ ; CHECK-LABEL: .visible .func (.param .align 16 .b8 func_retval0[16]) callee( define i128 @callee(i128) { ; CHECK: ld.param.v2.u64 {%[[REG0:rd[0-9]+]], %[[REG1:rd[0-9]+]]}, [callee_param_0]; - ; CHECK: st.param.v2.b64 [func_retval0+0], {%[[REG0]], %[[REG1]]} + ; CHECK: st.param.v2.b64 [func_retval0], {%[[REG0]], %[[REG1]]} ret i128 %0 } @@ -17,7 +17,7 @@ start: ; CHECK: { // callseq 0, 0 ; CHECK: .param .align 16 .b8 retval0[16]; ; CHECK: call.uni (retval0), - ; CHECK: ld.param.v2.b64 {%[[REG2:rd[0-9]+]], %[[REG3:rd[0-9]+]]}, [retval0+0]; + ; CHECK: ld.param.v2.b64 {%[[REG2:rd[0-9]+]], %[[REG3:rd[0-9]+]]}, [retval0]; ; CHECK: } // callseq 0 %a = call i128 @callee(i128 %0) diff --git a/llvm/test/CodeGen/NVPTX/i128-struct.ll b/llvm/test/CodeGen/NVPTX/i128-struct.ll index cecfd4f6ce42ae..d7a00a66bf4486 100644 --- a/llvm/test/CodeGen/NVPTX/i128-struct.ll +++ b/llvm/test/CodeGen/NVPTX/i128-struct.ll @@ -8,7 +8,7 @@ define { i128, i128 } @foo(i64 %a, i32 %b) { %3 = insertvalue { i128, i128 } undef, i128 %1, 0 %4 = insertvalue { i128, i128 } %3, i128 %2, 1 - ; CHECK: st.param.v2.b64 [func_retval0+0], {%[[REG1:rd[0-9]+]], %[[REG2:rd[0-9]+]]}; + ; CHECK: st.param.v2.b64 [func_retval0], {%[[REG1:rd[0-9]+]], %[[REG2:rd[0-9]+]]}; ; CHECK: st.param.v2.b64 [func_retval0+16], {%[[REG3:rd[0-9]+]], %[[REG4:rd[0-9]+]]}; ret { i128, i128 } %4 } diff --git a/llvm/test/CodeGen/NVPTX/i128.ll b/llvm/test/CodeGen/NVPTX/i128.ll index 396c29512933c1..895787d68adfee 100644 --- a/llvm/test/CodeGen/NVPTX/i128.ll +++ b/llvm/test/CodeGen/NVPTX/i128.ll @@ -145,7 +145,7 @@ define i128 @srem_i128(i128 %lhs, i128 %rhs) { ; CHECK-NEXT: xor.b64 %rd112, %rd110, %rd2; ; CHECK-NEXT: sub.cc.s64 %rd113, %rd111, %rd2; ; CHECK-NEXT: subc.cc.s64 %rd114, %rd112, %rd2; -; CHECK-NEXT: st.param.v2.b64 [func_retval0+0], {%rd113, %rd114}; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd113, %rd114}; ; CHECK-NEXT: ret; %div = srem i128 %lhs, %rhs ret i128 %div @@ -279,7 +279,7 @@ define i128 @urem_i128(i128 %lhs, i128 %rhs) { ; CHECK-NEXT: mul.lo.s64 %rd98, %rd3, %rd113; ; CHECK-NEXT: sub.cc.s64 %rd99, %rd41, %rd98; ; CHECK-NEXT: subc.cc.s64 %rd100, %rd42, %rd97; -; CHECK-NEXT: st.param.v2.b64 [func_retval0+0], {%rd99, %rd100}; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd99, %rd100}; ; CHECK-NEXT: ret; %div = urem i128 %lhs, %rhs ret i128 %div @@ -299,7 +299,7 @@ define i128 @srem_i128_pow2k(i128 %lhs) { ; CHECK-NEXT: and.b64 %rd7, %rd5, -8589934592; ; CHECK-NEXT: sub.cc.s64 %rd8, %rd1, %rd7; ; CHECK-NEXT: subc.cc.s64 %rd9, %rd2, %rd6; -; CHECK-NEXT: st.param.v2.b64 [func_retval0+0], {%rd8, %rd9}; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd8, %rd9}; ; CHECK-NEXT: ret; %div = srem i128 %lhs, 8589934592 ret i128 %div @@ -314,7 +314,7 @@ define i128 @urem_i128_pow2k(i128 %lhs) { ; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [urem_i128_pow2k_param_0]; ; CHECK-NEXT: and.b64 %rd3, %rd1, 8589934591; ; CHECK-NEXT: mov.u64 %rd4, 0; -; CHECK-NEXT: st.param.v2.b64 [func_retval0+0], {%rd3, %rd4}; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd3, %rd4}; ; CHECK-NEXT: ret; %div = urem i128 %lhs, 8589934592 ret i128 %div @@ -456,7 +456,7 @@ define i128 @sdiv_i128(i128 %lhs, i128 %rhs) { ; CHECK-NEXT: xor.b64 %rd105, %rd121, %rd5; ; CHECK-NEXT: sub.cc.s64 %rd106, %rd104, %rd5; ; CHECK-NEXT: subc.cc.s64 %rd107, %rd105, %rd5; -; CHECK-NEXT: st.param.v2.b64 [func_retval0+0], {%rd106, %rd107}; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd106, %rd107}; ; CHECK-NEXT: ret; %div = sdiv i128 %lhs, %rhs ret i128 %div @@ -582,7 +582,7 @@ define i128 @udiv_i128(i128 %lhs, i128 %rhs) { ; CHECK-NEXT: or.b64 %rd105, %rd97, %rd92; ; CHECK-NEXT: or.b64 %rd106, %rd94, %rd91; ; CHECK-NEXT: $L__BB5_5: // %udiv-end -; CHECK-NEXT: st.param.v2.b64 [func_retval0+0], {%rd105, %rd106}; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd105, %rd106}; ; CHECK-NEXT: ret; %div = udiv i128 %lhs, %rhs ret i128 %div @@ -603,7 +603,7 @@ define i128 @sdiv_i128_pow2k(i128 %lhs) { ; CHECK-NEXT: shr.u64 %rd8, %rd5, 33; ; CHECK-NEXT: or.b64 %rd9, %rd8, %rd7; ; CHECK-NEXT: shr.s64 %rd10, %rd6, 33; -; CHECK-NEXT: st.param.v2.b64 [func_retval0+0], {%rd9, %rd10}; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd9, %rd10}; ; CHECK-NEXT: ret; %div = sdiv i128 %lhs, 8589934592 ret i128 %div @@ -620,7 +620,7 @@ define i128 @udiv_i128_pow2k(i128 %lhs) { ; CHECK-NEXT: shr.u64 %rd4, %rd1, 33; ; CHECK-NEXT: or.b64 %rd5, %rd4, %rd3; ; CHECK-NEXT: shr.u64 %rd6, %rd2, 33; -; CHECK-NEXT: st.param.v2.b64 [func_retval0+0], {%rd5, %rd6}; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd5, %rd6}; ; CHECK-NEXT: ret; %div = udiv i128 %lhs, 8589934592 ret i128 %div @@ -636,7 +636,7 @@ define i128 @add_i128(i128 %lhs, i128 %rhs) { ; CHECK-NEXT: ld.param.v2.u64 {%rd3, %rd4}, [add_i128_param_1]; ; CHECK-NEXT: add.cc.s64 %rd5, %rd1, %rd3; ; CHECK-NEXT: addc.cc.s64 %rd6, %rd2, %rd4; -; CHECK-NEXT: st.param.v2.b64 [func_retval0+0], {%rd5, %rd6}; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd5, %rd6}; ; CHECK-NEXT: ret; %result = add i128 %lhs, %rhs ret i128 %result diff --git a/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll index ce9adfc7aa4f19..988438bebea6d0 100644 --- a/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll @@ -21,7 +21,7 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" ; COMMON-LABEL: test_ret_const( ; COMMON: mov.b32 [[R:%r[0-9+]]], 131073; -; COMMON: st.param.b32 [func_retval0+0], [[R]]; +; COMMON: st.param.b32 [func_retval0], [[R]]; ; COMMON-NEXT: ret; define <2 x i16> @test_ret_const() #0 { ret <2 x i16> @@ -31,7 +31,7 @@ define <2 x i16> @test_ret_const() #0 { ; COMMON: ld.param.u32 [[A:%r[0-9]+]], [test_extract_0_param_0]; ; COMMON: mov.b32 {[[RS:%rs[0-9]+]], tmp}, [[A]]; ; COMMON: cvt.u32.u16 [[R:%r[0-9]+]], [[RS]]; -; COMMON: st.param.b32 [func_retval0+0], [[R]]; +; COMMON: st.param.b32 [func_retval0], [[R]]; ; COMMON: ret; define i16 @test_extract_0(<2 x i16> %a) #0 { %e = extractelement <2 x i16> %a, i32 0 @@ -42,7 +42,7 @@ define i16 @test_extract_0(<2 x i16> %a) #0 { ; COMMON: ld.param.u32 [[A:%r[0-9]+]], [test_extract_1_param_0]; ; COMMON: mov.b32 {tmp, [[RS:%rs[0-9]+]]}, [[A]]; ; COMMON: cvt.u32.u16 [[R:%r[0-9]+]], [[RS]]; -; COMMON: st.param.b32 [func_retval0+0], [[R]]; +; COMMON: st.param.b32 [func_retval0], [[R]]; ; COMMON: ret; define i16 @test_extract_1(<2 x i16> %a) #0 { %e = extractelement <2 x i16> %a, i32 1 @@ -56,7 +56,7 @@ define i16 @test_extract_1(<2 x i16> %a) #0 { ; COMMON-DAG: mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[A]]; ; COMMON: selp.b16 [[RS:%rs[0-9]+]], [[E0]], [[E1]], [[PRED]]; ; COMMON: cvt.u32.u16 [[R:%r[0-9]+]], [[RS]]; -; COMMON: st.param.b32 [func_retval0+0], [[R]]; +; COMMON: st.param.b32 [func_retval0], [[R]]; ; COMMON: ret; define i16 @test_extract_i(<2 x i16> %a, i64 %idx) #0 { %e = extractelement <2 x i16> %a, i64 %idx @@ -75,7 +75,7 @@ define i16 @test_extract_i(<2 x i16> %a, i64 %idx) #0 { ; NO-I16x2-DAG: add.s16 [[RS5:%rs[0-9]+]], [[RS1]], [[RS3]]; ; NO-I16x2-DAG: mov.b32 [[R:%r[0-9]+]], {[[RS4]], [[RS5]]}; ; -; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; COMMON-NEXT: st.param.b32 [func_retval0], [[R]]; ; COMMON-NEXT: ret; define <2 x i16> @test_add(<2 x i16> %a, <2 x i16> %b) #0 { %r = add <2 x i16> %a, %b @@ -94,7 +94,7 @@ define <2 x i16> @test_add(<2 x i16> %a, <2 x i16> %b) #0 { ; NO-I16x2-DAG: add.s16 [[RS3:%rs[0-9]+]], [[RS1]], 2; ; NO-I16x2-DAG: mov.b32 [[R:%r[0-9]+]], {[[RS2]], [[RS3]]}; ; -; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; COMMON-NEXT: st.param.b32 [func_retval0], [[R]]; ; COMMON-NEXT: ret; define <2 x i16> @test_add_imm_0(<2 x i16> %a) #0 { %r = add <2 x i16> , %a @@ -112,7 +112,7 @@ define <2 x i16> @test_add_imm_0(<2 x i16> %a) #0 { ; NO-I16x2-DAG: add.s16 [[RS3:%rs[0-9]+]], [[RS1]], 2; ; NO-I16x2-DAG: mov.b32 [[R:%r[0-9]+]], {[[RS2]], [[RS3]]}; ; -; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; COMMON-NEXT: st.param.b32 [func_retval0], [[R]]; ; COMMON-NEXT: ret; define <2 x i16> @test_add_imm_1(<2 x i16> %a) #0 { %r = add <2 x i16> %a, @@ -130,7 +130,7 @@ define <2 x i16> @test_add_imm_1(<2 x i16> %a) #0 { ; COMMON-DAG: sub.s16 [[RS5:%rs[0-9]+]], [[RS1]], [[RS3]]; ; COMMON-DAG: mov.b32 [[R:%r[0-9]+]], {[[RS4]], [[RS5]]}; ; -; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; COMMON-NEXT: st.param.b32 [func_retval0], [[R]]; ; COMMON-NEXT: ret; define <2 x i16> @test_sub(<2 x i16> %a, <2 x i16> %b) #0 { %r = sub <2 x i16> %a, %b @@ -149,7 +149,7 @@ define <2 x i16> @test_sub(<2 x i16> %a, <2 x i16> %b) #0 { ; NO-I16x2-DAG: max.s16 [[RS5:%rs[0-9]+]], [[RS1]], [[RS3]]; ; NO-I16x2-DAG: mov.b32 [[R:%r[0-9]+]], {[[RS4]], [[RS5]]}; ; -; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; COMMON-NEXT: st.param.b32 [func_retval0], [[R]]; ; COMMON-NEXT: ret; define <2 x i16> @test_smax(<2 x i16> %a, <2 x i16> %b) #0 { %cmp = icmp sgt <2 x i16> %a, %b @@ -169,7 +169,7 @@ define <2 x i16> @test_smax(<2 x i16> %a, <2 x i16> %b) #0 { ; NO-I16x2-DAG: max.u16 [[RS5:%rs[0-9]+]], [[RS1]], [[RS3]]; ; NO-I16x2-DAG: mov.b32 [[R:%r[0-9]+]], {[[RS4]], [[RS5]]}; ; -; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; COMMON-NEXT: st.param.b32 [func_retval0], [[R]]; ; COMMON-NEXT: ret; define <2 x i16> @test_umax(<2 x i16> %a, <2 x i16> %b) #0 { %cmp = icmp ugt <2 x i16> %a, %b @@ -189,7 +189,7 @@ define <2 x i16> @test_umax(<2 x i16> %a, <2 x i16> %b) #0 { ; NO-I16x2-DAG: min.s16 [[RS5:%rs[0-9]+]], [[RS1]], [[RS3]]; ; NO-I16x2-DAG: mov.b32 [[R:%r[0-9]+]], {[[RS4]], [[RS5]]}; ; -; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; COMMON-NEXT: st.param.b32 [func_retval0], [[R]]; ; COMMON-NEXT: ret; define <2 x i16> @test_smin(<2 x i16> %a, <2 x i16> %b) #0 { %cmp = icmp sle <2 x i16> %a, %b @@ -209,7 +209,7 @@ define <2 x i16> @test_smin(<2 x i16> %a, <2 x i16> %b) #0 { ; NO-I16x2-DAG: min.u16 [[RS5:%rs[0-9]+]], [[RS1]], [[RS3]]; ; NO-I16x2-DAG: mov.b32 [[R:%r[0-9]+]], {[[RS4]], [[RS5]]}; ; -; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; COMMON-NEXT: st.param.b32 [func_retval0], [[R]]; ; COMMON-NEXT: ret; define <2 x i16> @test_umin(<2 x i16> %a, <2 x i16> %b) #0 { %cmp = icmp ule <2 x i16> %a, %b @@ -227,7 +227,7 @@ define <2 x i16> @test_umin(<2 x i16> %a, <2 x i16> %b) #0 { ; COMMON-DAG: mul.lo.s16 [[RS5:%rs[0-9]+]], [[RS1]], [[RS3]]; ; COMMON-DAG: mov.b32 [[R:%r[0-9]+]], {[[RS4]], [[RS5]]}; ; -; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; COMMON-NEXT: st.param.b32 [func_retval0], [[R]]; ; COMMON-NEXT: ret; define <2 x i16> @test_mul(<2 x i16> %a, <2 x i16> %b) #0 { %r = mul <2 x i16> %a, %b @@ -239,7 +239,7 @@ define <2 x i16> @test_mul(<2 x i16> %a, <2 x i16> %b) #0 { ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_or_param_0]; ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_or_param_1]; ; COMMON-NEXT: or.b32 [[R:%r[0-9]+]], [[A]], [[B]]; -; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; COMMON-NEXT: st.param.b32 [func_retval0], [[R]]; ; COMMON-NEXT: ret; define <2 x i16> @test_or(<2 x i16> %a, <2 x i16> %b) #0 { %r = or <2 x i16> %a, %b @@ -255,7 +255,7 @@ define <2 x i16> @test_or(<2 x i16> %a, <2 x i16> %b) #0 { ; COMMON-DAG: mov.u16 [[C5:%rs[0-9]+]], 5; ; COMMON-DAG: mov.b32 [[R2:%r[0-9]+]], {[[A]], [[C5]]}; ; COMMON: or.b32 [[R:%r[0-9]+]], [[R2]], [[R1]]; -; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; COMMON-NEXT: st.param.b32 [func_retval0], [[R]]; define <2 x i16> @test_or_computed(i16 %a) { %ins.0 = insertelement <2 x i16> zeroinitializer, i16 %a, i32 0 %ins.1 = insertelement <2 x i16> %ins.0, i16 5, i32 1 @@ -267,7 +267,7 @@ define <2 x i16> @test_or_computed(i16 %a) { ; COMMON-LABEL: test_or_imm_0( ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_or_imm_0_param_0]; ; COMMON-NEXT: or.b32 [[R:%r[0-9]+]], [[A]], 131073; -; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; COMMON-NEXT: st.param.b32 [func_retval0], [[R]]; ; COMMON-NEXT: ret; define <2 x i16> @test_or_imm_0(<2 x i16> %a) #0 { %r = or <2 x i16> , %a @@ -277,7 +277,7 @@ define <2 x i16> @test_or_imm_0(<2 x i16> %a) #0 { ; COMMON-LABEL: test_or_imm_1( ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_or_imm_1_param_0]; ; COMMON-NEXT: or.b32 [[R:%r[0-9]+]], [[A]], 131073; -; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; COMMON-NEXT: st.param.b32 [func_retval0], [[R]]; ; COMMON-NEXT: ret; define <2 x i16> @test_or_imm_1(<2 x i16> %a) #0 { %r = or <2 x i16> %a, @@ -288,7 +288,7 @@ define <2 x i16> @test_or_imm_1(<2 x i16> %a) #0 { ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_xor_param_0]; ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_xor_param_1]; ; COMMON-NEXT: xor.b32 [[R:%r[0-9]+]], [[A]], [[B]]; -; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; COMMON-NEXT: st.param.b32 [func_retval0], [[R]]; ; COMMON-NEXT: ret; define <2 x i16> @test_xor(<2 x i16> %a, <2 x i16> %b) #0 { %r = xor <2 x i16> %a, %b @@ -302,7 +302,7 @@ define <2 x i16> @test_xor(<2 x i16> %a, <2 x i16> %b) #0 { ; COMMON-DAG: mov.u16 [[C5:%rs[0-9]+]], 5; ; COMMON-DAG: mov.b32 [[R2:%r[0-9]+]], {[[A]], [[C5]]}; ; COMMON: xor.b32 [[R:%r[0-9]+]], [[R2]], [[R1]]; -; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; COMMON-NEXT: st.param.b32 [func_retval0], [[R]]; define <2 x i16> @test_xor_computed(i16 %a) { %ins.0 = insertelement <2 x i16> zeroinitializer, i16 %a, i32 0 %ins.1 = insertelement <2 x i16> %ins.0, i16 5, i32 1 @@ -314,7 +314,7 @@ define <2 x i16> @test_xor_computed(i16 %a) { ; COMMON-LABEL: test_xor_imm_0( ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_xor_imm_0_param_0]; ; COMMON-NEXT: xor.b32 [[R:%r[0-9]+]], [[A]], 131073; -; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; COMMON-NEXT: st.param.b32 [func_retval0], [[R]]; ; COMMON-NEXT: ret; define <2 x i16> @test_xor_imm_0(<2 x i16> %a) #0 { %r = xor <2 x i16> , %a @@ -324,7 +324,7 @@ define <2 x i16> @test_xor_imm_0(<2 x i16> %a) #0 { ; COMMON-LABEL: test_xor_imm_1( ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_xor_imm_1_param_0]; ; COMMON-NEXT: xor.b32 [[R:%r[0-9]+]], [[A]], 131073; -; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; COMMON-NEXT: st.param.b32 [func_retval0], [[R]]; ; COMMON-NEXT: ret; define <2 x i16> @test_xor_imm_1(<2 x i16> %a) #0 { %r = xor <2 x i16> %a, @@ -335,7 +335,7 @@ define <2 x i16> @test_xor_imm_1(<2 x i16> %a) #0 { ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_and_param_0]; ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_and_param_1]; ; COMMON-NEXT: and.b32 [[R:%r[0-9]+]], [[A]], [[B]]; -; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; COMMON-NEXT: st.param.b32 [func_retval0], [[R]]; ; COMMON-NEXT: ret; define <2 x i16> @test_and(<2 x i16> %a, <2 x i16> %b) #0 { %r = and <2 x i16> %a, %b @@ -351,7 +351,7 @@ define <2 x i16> @test_and(<2 x i16> %a, <2 x i16> %b) #0 { ; COMMON-DAG: mov.u16 [[C5:%rs[0-9]+]], 5; ; COMMON-DAG: mov.b32 [[R2:%r[0-9]+]], {[[A]], [[C5]]}; ; COMMON: and.b32 [[R:%r[0-9]+]], [[R2]], [[R1]]; -; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; COMMON-NEXT: st.param.b32 [func_retval0], [[R]]; define <2 x i16> @test_and_computed(i16 %a) { %ins.0 = insertelement <2 x i16> zeroinitializer, i16 %a, i32 0 %ins.1 = insertelement <2 x i16> %ins.0, i16 5, i32 1 @@ -363,7 +363,7 @@ define <2 x i16> @test_and_computed(i16 %a) { ; COMMON-LABEL: test_and_imm_0( ; COMMON-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_and_imm_0_param_0]; ; COMMON-NEXT: and.b32 [[R:%r[0-9]+]], [[A]], 131073; -; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; COMMON-NEXT: st.param.b32 [func_retval0], [[R]]; ; COMMON-NEXT: ret; define <2 x i16> @test_and_imm_0(<2 x i16> %a) #0 { %r = and <2 x i16> , %a @@ -373,7 +373,7 @@ define <2 x i16> @test_and_imm_0(<2 x i16> %a) #0 { ; COMMON-LABEL: test_and_imm_1( ; COMMON-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_and_imm_1_param_0]; ; COMMON-NEXT: and.b32 [[R:%r[0-9]+]], [[A]], 131073; -; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; COMMON-NEXT: st.param.b32 [func_retval0], [[R]]; ; COMMON-NEXT: ret; define <2 x i16> @test_and_imm_1(<2 x i16> %a) #0 { %r = and <2 x i16> %a, @@ -441,15 +441,15 @@ declare <2 x i16> @test_callee(<2 x i16> %a, <2 x i16> %b) #0 ; COMMON: { ; COMMON-DAG: .param .align 4 .b8 param0[4]; ; COMMON-DAG: .param .align 4 .b8 param1[4]; -; COMMON-DAG: st.param.b32 [param0+0], [[A]]; -; COMMON-DAG: st.param.b32 [param1+0], [[B]]; +; COMMON-DAG: st.param.b32 [param0], [[A]]; +; COMMON-DAG: st.param.b32 [param1], [[B]]; ; COMMON-DAG: .param .align 4 .b8 retval0[4]; ; COMMON: call.uni (retval0), ; COMMON-NEXT: test_callee, ; COMMON: ); -; COMMON-NEXT: ld.param.b32 [[R:%r[0-9]+]], [retval0+0]; +; COMMON-NEXT: ld.param.b32 [[R:%r[0-9]+]], [retval0]; ; COMMON-NEXT: } -; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; COMMON-NEXT: st.param.b32 [func_retval0], [[R]]; ; COMMON-NEXT: ret; define <2 x i16> @test_call(<2 x i16> %a, <2 x i16> %b) #0 { %r = call <2 x i16> @test_callee(<2 x i16> %a, <2 x i16> %b) @@ -462,15 +462,15 @@ define <2 x i16> @test_call(<2 x i16> %a, <2 x i16> %b) #0 { ; COMMON: { ; COMMON-DAG: .param .align 4 .b8 param0[4]; ; COMMON-DAG: .param .align 4 .b8 param1[4]; -; COMMON-DAG: st.param.b32 [param0+0], [[B]]; -; COMMON-DAG: st.param.b32 [param1+0], [[A]]; +; COMMON-DAG: st.param.b32 [param0], [[B]]; +; COMMON-DAG: st.param.b32 [param1], [[A]]; ; COMMON-DAG: .param .align 4 .b8 retval0[4]; ; COMMON: call.uni (retval0), ; COMMON-NEXT: test_callee, ; COMMON: ); -; COMMON-NEXT: ld.param.b32 [[R:%r[0-9]+]], [retval0+0]; +; COMMON-NEXT: ld.param.b32 [[R:%r[0-9]+]], [retval0]; ; COMMON-NEXT: } -; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; COMMON-NEXT: st.param.b32 [func_retval0], [[R]]; ; COMMON-NEXT: ret; define <2 x i16> @test_call_flipped(<2 x i16> %a, <2 x i16> %b) #0 { %r = call <2 x i16> @test_callee(<2 x i16> %b, <2 x i16> %a) @@ -483,15 +483,15 @@ define <2 x i16> @test_call_flipped(<2 x i16> %a, <2 x i16> %b) #0 { ; COMMON: { ; COMMON-DAG: .param .align 4 .b8 param0[4]; ; COMMON-DAG: .param .align 4 .b8 param1[4]; -; COMMON-DAG: st.param.b32 [param0+0], [[B]]; -; COMMON-DAG: st.param.b32 [param1+0], [[A]]; +; COMMON-DAG: st.param.b32 [param0], [[B]]; +; COMMON-DAG: st.param.b32 [param1], [[A]]; ; COMMON-DAG: .param .align 4 .b8 retval0[4]; ; COMMON: call.uni (retval0), ; COMMON-NEXT: test_callee, ; COMMON: ); -; COMMON-NEXT: ld.param.b32 [[R:%r[0-9]+]], [retval0+0]; +; COMMON-NEXT: ld.param.b32 [[R:%r[0-9]+]], [retval0]; ; COMMON-NEXT: } -; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; COMMON-NEXT: st.param.b32 [func_retval0], [[R]]; ; COMMON-NEXT: ret; define <2 x i16> @test_tailcall_flipped(<2 x i16> %a, <2 x i16> %b) #0 { %r = tail call <2 x i16> @test_callee(<2 x i16> %b, <2 x i16> %a) @@ -504,7 +504,7 @@ define <2 x i16> @test_tailcall_flipped(<2 x i16> %a, <2 x i16> %b) #0 { ; COMMON-DAG: ld.param.u8 [[C:%rs[0-9]+]], [test_select_param_2] ; COMMON-DAG: setp.eq.b16 [[PRED:%p[0-9]+]], %rs{{.*}}, 1; ; COMMON-NEXT: selp.b32 [[R:%r[0-9]+]], [[A]], [[B]], [[PRED]]; -; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; COMMON-NEXT: st.param.b32 [func_retval0], [[R]]; ; COMMON-NEXT: ret; define <2 x i16> @test_select(<2 x i16> %a, <2 x i16> %b, i1 zeroext %c) #0 { %r = select i1 %c, <2 x i16> %a, <2 x i16> %b @@ -525,7 +525,7 @@ define <2 x i16> @test_select(<2 x i16> %a, <2 x i16> %b, i1 zeroext %c) #0 { ; COMMON-DAG: selp.b16 [[R0:%rs[0-9]+]], [[A0]], [[B0]], [[P0]]; ; COMMON-DAG: selp.b16 [[R1:%rs[0-9]+]], [[A1]], [[B1]], [[P1]]; ; COMMON: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; COMMON-NEXT: st.param.b32 [func_retval0], [[R]]; ; COMMON-NEXT: ret; define <2 x i16> @test_select_cc(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x i16> %d) #0 { %cc = icmp ne <2 x i16> %c, %d @@ -544,7 +544,7 @@ define <2 x i16> @test_select_cc(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x ; COMMON-DAG: setp.ne.s16 [[P1:%p[0-9]+]], [[C1]], [[D1]] ; COMMON-DAG: selp.b32 [[R0:%r[0-9]+]], [[A0]], [[B0]], [[P0]]; ; COMMON-DAG: selp.b32 [[R1:%r[0-9]+]], [[A1]], [[B1]], [[P1]]; -; COMMON-NEXT: st.param.v2.b32 [func_retval0+0], {[[R0]], [[R1]]}; +; COMMON-NEXT: st.param.v2.b32 [func_retval0], {[[R0]], [[R1]]}; ; COMMON-NEXT: ret; define <2 x i32> @test_select_cc_i32_i16(<2 x i32> %a, <2 x i32> %b, <2 x i16> %c, <2 x i16> %d) #0 { @@ -565,7 +565,7 @@ define <2 x i32> @test_select_cc_i32_i16(<2 x i32> %a, <2 x i32> %b, ; COMMON-DAG: selp.b16 [[R0:%rs[0-9]+]], [[A0]], [[B0]], [[P0]]; ; COMMON-DAG: selp.b16 [[R1:%rs[0-9]+]], [[A1]], [[B1]], [[P1]]; ; COMMON: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; COMMON-NEXT: st.param.b32 [func_retval0+0], [[R]]; +; COMMON-NEXT: st.param.b32 [func_retval0], [[R]]; ; COMMON-NEXT: ret; define <2 x i16> @test_select_cc_i16_i32(<2 x i16> %a, <2 x i16> %b, <2 x i32> %c, <2 x i32> %d) #0 { @@ -580,7 +580,7 @@ define <2 x i16> @test_select_cc_i16_i32(<2 x i16> %a, <2 x i16> %b, ; COMMON-DAG: cvt.u16.u32 [[R0:%rs[0-9]+]], [[A0]]; ; COMMON-DAG: cvt.u16.u32 [[R1:%rs[0-9]+]], [[A1]]; ; COMMON: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; COMMON: st.param.b32 [func_retval0+0], [[R]]; +; COMMON: st.param.b32 [func_retval0], [[R]]; ; COMMON: ret; define <2 x i16> @test_trunc_2xi32(<2 x i32> %a) #0 { %r = trunc <2 x i32> %a to <2 x i16> @@ -592,7 +592,7 @@ define <2 x i16> @test_trunc_2xi32(<2 x i32> %a) #0 { ; COMMON-DAG: cvt.u16.u64 [[R0:%rs[0-9]+]], [[A0]]; ; COMMON-DAG: cvt.u16.u64 [[R1:%rs[0-9]+]], [[A1]]; ; COMMON: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} -; COMMON: st.param.b32 [func_retval0+0], [[R]]; +; COMMON: st.param.b32 [func_retval0], [[R]]; ; COMMON: ret; define <2 x i16> @test_trunc_2xi64(<2 x i64> %a) #0 { %r = trunc <2 x i64> %a to <2 x i16> @@ -604,7 +604,7 @@ define <2 x i16> @test_trunc_2xi64(<2 x i64> %a) #0 { ; COMMON: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; COMMON-DAG: cvt.u32.u16 [[R0:%r[0-9]+]], [[A0]]; ; COMMON-DAG: cvt.u32.u16 [[R1:%r[0-9]+]], [[A1]]; -; COMMON-NEXT: st.param.v2.b32 [func_retval0+0], {[[R0]], [[R1]]}; +; COMMON-NEXT: st.param.v2.b32 [func_retval0], {[[R0]], [[R1]]}; ; COMMON: ret; define <2 x i32> @test_zext_2xi32(<2 x i16> %a) #0 { %r = zext <2 x i16> %a to <2 x i32> @@ -616,7 +616,7 @@ define <2 x i32> @test_zext_2xi32(<2 x i16> %a) #0 { ; COMMON: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; COMMON-DAG: cvt.u64.u16 [[R0:%rd[0-9]+]], [[A0]]; ; COMMON-DAG: cvt.u64.u16 [[R1:%rd[0-9]+]], [[A1]]; -; COMMON-NEXT: st.param.v2.b64 [func_retval0+0], {[[R0]], [[R1]]}; +; COMMON-NEXT: st.param.v2.b64 [func_retval0], {[[R0]], [[R1]]}; ; COMMON: ret; define <2 x i64> @test_zext_2xi64(<2 x i16> %a) #0 { %r = zext <2 x i16> %a to <2 x i64> @@ -625,7 +625,7 @@ define <2 x i64> @test_zext_2xi64(<2 x i16> %a) #0 { ; COMMON-LABEL: test_bitcast_i32_to_2xi16( ; COMMON: ld.param.u32 [[R:%r[0-9]+]], [test_bitcast_i32_to_2xi16_param_0]; -; COMMON: st.param.b32 [func_retval0+0], [[R]]; +; COMMON: st.param.b32 [func_retval0], [[R]]; ; COMMON: ret; define <2 x i16> @test_bitcast_i32_to_2xi16(i32 %a) #0 { %r = bitcast i32 %a to <2 x i16> @@ -634,7 +634,7 @@ define <2 x i16> @test_bitcast_i32_to_2xi16(i32 %a) #0 { ; COMMON-LABEL: test_bitcast_2xi16_to_i32( ; COMMON: ld.param.u32 [[R:%r[0-9]+]], [test_bitcast_2xi16_to_i32_param_0]; -; COMMON: st.param.b32 [func_retval0+0], [[R]]; +; COMMON: st.param.b32 [func_retval0], [[R]]; ; COMMON: ret; define i32 @test_bitcast_2xi16_to_i32(<2 x i16> %a) #0 { %r = bitcast <2 x i16> %a to i32 @@ -645,7 +645,7 @@ define i32 @test_bitcast_2xi16_to_i32(<2 x i16> %a) #0 { ; COMMON: ld.param.u16 [[RS1:%rs[0-9]+]], [test_bitcast_2xi16_to_2xhalf_param_0]; ; COMMON: mov.u16 [[RS2:%rs[0-9]+]], 5; ; COMMON: mov.b32 [[R:%r[0-9]+]], {[[RS1]], [[RS2]]}; -; COMMON: st.param.b32 [func_retval0+0], [[R]]; +; COMMON: st.param.b32 [func_retval0], [[R]]; ; COMMON: ret; define <2 x half> @test_bitcast_2xi16_to_2xhalf(i16 %a) #0 { %ins.0 = insertelement <2 x i16> undef, i16 %a, i32 0 @@ -659,7 +659,7 @@ define <2 x half> @test_bitcast_2xi16_to_2xhalf(i16 %a) #0 { ; COMMON: ld.param.u32 [[R:%r[0-9]+]], [test_shufflevector_param_0]; ; COMMON: mov.b32 {[[RS0:%rs[0-9]+]], [[RS1:%rs[0-9]+]]}, [[R]]; ; COMMON: mov.b32 [[R1:%r[0-9]+]], {[[RS1]], [[RS0]]}; -; COMMON: st.param.b32 [func_retval0+0], [[R1]]; +; COMMON: st.param.b32 [func_retval0], [[R1]]; ; COMMON: ret; define <2 x i16> @test_shufflevector(<2 x i16> %a) #0 { %s = shufflevector <2 x i16> %a, <2 x i16> undef, <2 x i32> @@ -671,7 +671,7 @@ define <2 x i16> @test_shufflevector(<2 x i16> %a) #0 { ; COMMON: ld.param.u32 [[A:%r[0-9]+]], [test_insertelement_param_0]; ; COMMON: { .reg .b16 tmp; mov.b32 {[[R0:%rs[0-9]+]], tmp}, [[A]]; } ; COMMON: mov.b32 [[R1:%r[0-9]+]], {[[R0]], [[B]]}; -; COMMON: st.param.b32 [func_retval0+0], [[R1]]; +; COMMON: st.param.b32 [func_retval0], [[R1]]; ; COMMON: ret; define <2 x i16> @test_insertelement(<2 x i16> %a, i16 %x) #0 { %i = insertelement <2 x i16> %a, i16 %x, i64 1 diff --git a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll index 96a4359d0ec43e..5b5662a1eea766 100644 --- a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll @@ -18,7 +18,7 @@ define <4 x i8> @test_ret_const() #0 { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.b32 %r1, -66911489; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; ; CHECK-NEXT: ret; ret <4 x i8> } @@ -31,7 +31,7 @@ define i8 @test_extract_0(<4 x i8> %a) #0 { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.u32 %r1, [test_extract_0_param_0]; ; CHECK-NEXT: bfe.u32 %r2, %r1, 0, 8; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %e = extractelement <4 x i8> %a, i32 0 ret i8 %e @@ -45,7 +45,7 @@ define i8 @test_extract_1(<4 x i8> %a) #0 { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.u32 %r1, [test_extract_1_param_0]; ; CHECK-NEXT: bfe.u32 %r2, %r1, 8, 8; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %e = extractelement <4 x i8> %a, i32 1 ret i8 %e @@ -59,7 +59,7 @@ define i8 @test_extract_2(<4 x i8> %a) #0 { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.u32 %r1, [test_extract_2_param_0]; ; CHECK-NEXT: bfe.u32 %r2, %r1, 16, 8; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %e = extractelement <4 x i8> %a, i32 2 ret i8 %e @@ -73,7 +73,7 @@ define i8 @test_extract_3(<4 x i8> %a) #0 { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.u32 %r1, [test_extract_3_param_0]; ; CHECK-NEXT: bfe.u32 %r2, %r1, 24, 8; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %e = extractelement <4 x i8> %a, i32 3 ret i8 %e @@ -91,7 +91,7 @@ define i8 @test_extract_i(<4 x i8> %a, i64 %idx) #0 { ; CHECK-NEXT: cvt.u32.u64 %r2, %rd1; ; CHECK-NEXT: shl.b32 %r3, %r2, 3; ; CHECK-NEXT: bfe.u32 %r4, %r1, %r3, 8; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r4; +; CHECK-NEXT: st.param.b32 [func_retval0], %r4; ; CHECK-NEXT: ret; %e = extractelement <4 x i8> %a, i64 %idx ret i8 %e @@ -133,7 +133,7 @@ define <4 x i8> @test_add(<4 x i8> %a, <4 x i8> %b) #0 { ; CHECK-NEXT: add.s16 %rs12, %rs11, %rs10; ; CHECK-NEXT: cvt.u32.u16 %r16, %rs12; ; CHECK-NEXT: bfi.b32 %r17, %r16, %r13, 24, 8; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r17; +; CHECK-NEXT: st.param.b32 [func_retval0], %r17; ; CHECK-NEXT: ret; %r = add <4 x i8> %a, %b ret <4 x i8> %r @@ -166,7 +166,7 @@ define <4 x i8> @test_add_imm_0(<4 x i8> %a) #0 { ; CHECK-NEXT: add.s16 %rs8, %rs7, 4; ; CHECK-NEXT: cvt.u32.u16 %r11, %rs8; ; CHECK-NEXT: bfi.b32 %r12, %r11, %r9, 24, 8; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r12; +; CHECK-NEXT: st.param.b32 [func_retval0], %r12; ; CHECK-NEXT: ret; %r = add <4 x i8> , %a ret <4 x i8> %r @@ -199,7 +199,7 @@ define <4 x i8> @test_add_imm_1(<4 x i8> %a) #0 { ; CHECK-NEXT: add.s16 %rs8, %rs7, 4; ; CHECK-NEXT: cvt.u32.u16 %r11, %rs8; ; CHECK-NEXT: bfi.b32 %r12, %r11, %r9, 24, 8; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r12; +; CHECK-NEXT: st.param.b32 [func_retval0], %r12; ; CHECK-NEXT: ret; %r = add <4 x i8> %a, ret <4 x i8> %r @@ -241,7 +241,7 @@ define <4 x i8> @test_sub(<4 x i8> %a, <4 x i8> %b) #0 { ; CHECK-NEXT: sub.s16 %rs12, %rs11, %rs10; ; CHECK-NEXT: cvt.u32.u16 %r16, %rs12; ; CHECK-NEXT: bfi.b32 %r17, %r16, %r13, 24, 8; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r17; +; CHECK-NEXT: st.param.b32 [func_retval0], %r17; ; CHECK-NEXT: ret; %r = sub <4 x i8> %a, %b ret <4 x i8> %r @@ -283,7 +283,7 @@ define <4 x i8> @test_smax(<4 x i8> %a, <4 x i8> %b) #0 { ; CHECK-NEXT: bfe.u32 %r23, %r2, 24, 8; ; CHECK-NEXT: selp.b32 %r24, %r11, %r23, %p1; ; CHECK-NEXT: bfi.b32 %r25, %r24, %r22, 24, 8; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r25; +; CHECK-NEXT: st.param.b32 [func_retval0], %r25; ; CHECK-NEXT: ret; %cmp = icmp sgt <4 x i8> %a, %b %r = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %b @@ -318,7 +318,7 @@ define <4 x i8> @test_umax(<4 x i8> %a, <4 x i8> %b) #0 { ; CHECK-NEXT: bfi.b32 %r15, %r14, %r13, 16, 8; ; CHECK-NEXT: selp.b32 %r16, %r4, %r3, %p1; ; CHECK-NEXT: bfi.b32 %r17, %r16, %r15, 24, 8; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r17; +; CHECK-NEXT: st.param.b32 [func_retval0], %r17; ; CHECK-NEXT: ret; %cmp = icmp ugt <4 x i8> %a, %b %r = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %b @@ -361,7 +361,7 @@ define <4 x i8> @test_smin(<4 x i8> %a, <4 x i8> %b) #0 { ; CHECK-NEXT: bfe.u32 %r23, %r2, 24, 8; ; CHECK-NEXT: selp.b32 %r24, %r11, %r23, %p1; ; CHECK-NEXT: bfi.b32 %r25, %r24, %r22, 24, 8; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r25; +; CHECK-NEXT: st.param.b32 [func_retval0], %r25; ; CHECK-NEXT: ret; %cmp = icmp sle <4 x i8> %a, %b %r = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %b @@ -396,7 +396,7 @@ define <4 x i8> @test_umin(<4 x i8> %a, <4 x i8> %b) #0 { ; CHECK-NEXT: bfi.b32 %r15, %r14, %r13, 16, 8; ; CHECK-NEXT: selp.b32 %r16, %r4, %r3, %p1; ; CHECK-NEXT: bfi.b32 %r17, %r16, %r15, 24, 8; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r17; +; CHECK-NEXT: st.param.b32 [func_retval0], %r17; ; CHECK-NEXT: ret; %cmp = icmp ule <4 x i8> %a, %b %r = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %b @@ -436,7 +436,7 @@ define <4 x i8> @test_eq(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c) #0 { ; CHECK-NEXT: bfe.u32 %r20, %r3, 24, 8; ; CHECK-NEXT: selp.b32 %r21, %r5, %r20, %p1; ; CHECK-NEXT: bfi.b32 %r22, %r21, %r19, 24, 8; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r22; +; CHECK-NEXT: st.param.b32 [func_retval0], %r22; ; CHECK-NEXT: ret; %cmp = icmp eq <4 x i8> %a, %b %r = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %c @@ -476,7 +476,7 @@ define <4 x i8> @test_ne(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c) #0 { ; CHECK-NEXT: bfe.u32 %r20, %r3, 24, 8; ; CHECK-NEXT: selp.b32 %r21, %r5, %r20, %p1; ; CHECK-NEXT: bfi.b32 %r22, %r21, %r19, 24, 8; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r22; +; CHECK-NEXT: st.param.b32 [func_retval0], %r22; ; CHECK-NEXT: ret; %cmp = icmp ne <4 x i8> %a, %b %r = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %c @@ -519,7 +519,7 @@ define <4 x i8> @test_mul(<4 x i8> %a, <4 x i8> %b) #0 { ; CHECK-NEXT: mul.lo.s16 %rs12, %rs11, %rs10; ; CHECK-NEXT: cvt.u32.u16 %r16, %rs12; ; CHECK-NEXT: bfi.b32 %r17, %r16, %r13, 24, 8; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r17; +; CHECK-NEXT: st.param.b32 [func_retval0], %r17; ; CHECK-NEXT: ret; %r = mul <4 x i8> %a, %b ret <4 x i8> %r @@ -534,7 +534,7 @@ define <4 x i8> @test_or(<4 x i8> %a, <4 x i8> %b) #0 { ; CHECK-NEXT: ld.param.u32 %r3, [test_or_param_1]; ; CHECK-NEXT: ld.param.u32 %r4, [test_or_param_0]; ; CHECK-NEXT: or.b32 %r5, %r4, %r3; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r5; +; CHECK-NEXT: st.param.b32 [func_retval0], %r5; ; CHECK-NEXT: ret; %r = or <4 x i8> %a, %b ret <4 x i8> %r @@ -554,7 +554,7 @@ define <4 x i8> @test_or_computed(i8 %a) { ; CHECK-NEXT: bfi.b32 %r4, 0, %r3, 24, 8; ; CHECK-NEXT: bfi.b32 %r6, 5, %r4, 8, 8; ; CHECK-NEXT: or.b32 %r8, %r6, %r4; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r8; +; CHECK-NEXT: st.param.b32 [func_retval0], %r8; ; CHECK-NEXT: ret; %ins.0 = insertelement <4 x i8> zeroinitializer, i8 %a, i32 0 %ins.1 = insertelement <4 x i8> %ins.0, i8 5, i32 1 @@ -570,7 +570,7 @@ define <4 x i8> @test_or_imm_0(<4 x i8> %a) #0 { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.u32 %r1, [test_or_imm_0_param_0]; ; CHECK-NEXT: or.b32 %r2, %r1, 67305985; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %r = or <4 x i8> , %a ret <4 x i8> %r @@ -584,7 +584,7 @@ define <4 x i8> @test_or_imm_1(<4 x i8> %a) #0 { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.u32 %r1, [test_or_imm_1_param_0]; ; CHECK-NEXT: or.b32 %r2, %r1, 67305985; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %r = or <4 x i8> %a, ret <4 x i8> %r @@ -599,7 +599,7 @@ define <4 x i8> @test_xor(<4 x i8> %a, <4 x i8> %b) #0 { ; CHECK-NEXT: ld.param.u32 %r3, [test_xor_param_1]; ; CHECK-NEXT: ld.param.u32 %r4, [test_xor_param_0]; ; CHECK-NEXT: xor.b32 %r5, %r4, %r3; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r5; +; CHECK-NEXT: st.param.b32 [func_retval0], %r5; ; CHECK-NEXT: ret; %r = xor <4 x i8> %a, %b ret <4 x i8> %r @@ -619,7 +619,7 @@ define <4 x i8> @test_xor_computed(i8 %a) { ; CHECK-NEXT: bfi.b32 %r4, 0, %r3, 24, 8; ; CHECK-NEXT: bfi.b32 %r6, 5, %r4, 8, 8; ; CHECK-NEXT: xor.b32 %r8, %r6, %r4; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r8; +; CHECK-NEXT: st.param.b32 [func_retval0], %r8; ; CHECK-NEXT: ret; %ins.0 = insertelement <4 x i8> zeroinitializer, i8 %a, i32 0 %ins.1 = insertelement <4 x i8> %ins.0, i8 5, i32 1 @@ -635,7 +635,7 @@ define <4 x i8> @test_xor_imm_0(<4 x i8> %a) #0 { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.u32 %r1, [test_xor_imm_0_param_0]; ; CHECK-NEXT: xor.b32 %r2, %r1, 67305985; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %r = xor <4 x i8> , %a ret <4 x i8> %r @@ -649,7 +649,7 @@ define <4 x i8> @test_xor_imm_1(<4 x i8> %a) #0 { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.u32 %r1, [test_xor_imm_1_param_0]; ; CHECK-NEXT: xor.b32 %r2, %r1, 67305985; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %r = xor <4 x i8> %a, ret <4 x i8> %r @@ -664,7 +664,7 @@ define <4 x i8> @test_and(<4 x i8> %a, <4 x i8> %b) #0 { ; CHECK-NEXT: ld.param.u32 %r3, [test_and_param_1]; ; CHECK-NEXT: ld.param.u32 %r4, [test_and_param_0]; ; CHECK-NEXT: and.b32 %r5, %r4, %r3; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r5; +; CHECK-NEXT: st.param.b32 [func_retval0], %r5; ; CHECK-NEXT: ret; %r = and <4 x i8> %a, %b ret <4 x i8> %r @@ -684,7 +684,7 @@ define <4 x i8> @test_and_computed(i8 %a) { ; CHECK-NEXT: bfi.b32 %r4, 0, %r3, 24, 8; ; CHECK-NEXT: bfi.b32 %r6, 5, %r4, 8, 8; ; CHECK-NEXT: and.b32 %r8, %r6, %r4; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r8; +; CHECK-NEXT: st.param.b32 [func_retval0], %r8; ; CHECK-NEXT: ret; %ins.0 = insertelement <4 x i8> zeroinitializer, i8 %a, i32 0 %ins.1 = insertelement <4 x i8> %ins.0, i8 5, i32 1 @@ -700,7 +700,7 @@ define <4 x i8> @test_and_imm_0(<4 x i8> %a) #0 { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.u32 %r1, [test_and_imm_0_param_0]; ; CHECK-NEXT: and.b32 %r2, %r1, 67305985; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %r = and <4 x i8> , %a ret <4 x i8> %r @@ -714,7 +714,7 @@ define <4 x i8> @test_and_imm_1(<4 x i8> %a) #0 { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.u32 %r1, [test_and_imm_1_param_0]; ; CHECK-NEXT: and.b32 %r2, %r1, 67305985; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %r = and <4 x i8> %a, ret <4 x i8> %r @@ -828,9 +828,9 @@ define <4 x i8> @test_call(<4 x i8> %a, <4 x i8> %b) #0 { ; CHECK-NEXT: ld.param.u32 %r1, [test_call_param_0]; ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.b32 [param0+0], %r1; +; CHECK-NEXT: st.param.b32 [param0], %r1; ; CHECK-NEXT: .param .align 4 .b8 param1[4]; -; CHECK-NEXT: st.param.b32 [param1+0], %r2; +; CHECK-NEXT: st.param.b32 [param1], %r2; ; CHECK-NEXT: .param .align 4 .b8 retval0[4]; ; CHECK-NEXT: call.uni (retval0), ; CHECK-NEXT: test_callee, @@ -838,9 +838,9 @@ define <4 x i8> @test_call(<4 x i8> %a, <4 x i8> %b) #0 { ; CHECK-NEXT: param0, ; CHECK-NEXT: param1 ; CHECK-NEXT: ); -; CHECK-NEXT: ld.param.b32 %r3, [retval0+0]; +; CHECK-NEXT: ld.param.b32 %r3, [retval0]; ; CHECK-NEXT: } // callseq 0 -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NEXT: ret; %r = call <4 x i8> @test_callee(<4 x i8> %a, <4 x i8> %b) ret <4 x i8> %r @@ -856,9 +856,9 @@ define <4 x i8> @test_call_flipped(<4 x i8> %a, <4 x i8> %b) #0 { ; CHECK-NEXT: ld.param.u32 %r1, [test_call_flipped_param_0]; ; CHECK-NEXT: { // callseq 1, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.b32 [param0+0], %r2; +; CHECK-NEXT: st.param.b32 [param0], %r2; ; CHECK-NEXT: .param .align 4 .b8 param1[4]; -; CHECK-NEXT: st.param.b32 [param1+0], %r1; +; CHECK-NEXT: st.param.b32 [param1], %r1; ; CHECK-NEXT: .param .align 4 .b8 retval0[4]; ; CHECK-NEXT: call.uni (retval0), ; CHECK-NEXT: test_callee, @@ -866,9 +866,9 @@ define <4 x i8> @test_call_flipped(<4 x i8> %a, <4 x i8> %b) #0 { ; CHECK-NEXT: param0, ; CHECK-NEXT: param1 ; CHECK-NEXT: ); -; CHECK-NEXT: ld.param.b32 %r3, [retval0+0]; +; CHECK-NEXT: ld.param.b32 %r3, [retval0]; ; CHECK-NEXT: } // callseq 1 -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NEXT: ret; %r = call <4 x i8> @test_callee(<4 x i8> %b, <4 x i8> %a) ret <4 x i8> %r @@ -884,9 +884,9 @@ define <4 x i8> @test_tailcall_flipped(<4 x i8> %a, <4 x i8> %b) #0 { ; CHECK-NEXT: ld.param.u32 %r1, [test_tailcall_flipped_param_0]; ; CHECK-NEXT: { // callseq 2, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.b32 [param0+0], %r2; +; CHECK-NEXT: st.param.b32 [param0], %r2; ; CHECK-NEXT: .param .align 4 .b8 param1[4]; -; CHECK-NEXT: st.param.b32 [param1+0], %r1; +; CHECK-NEXT: st.param.b32 [param1], %r1; ; CHECK-NEXT: .param .align 4 .b8 retval0[4]; ; CHECK-NEXT: call.uni (retval0), ; CHECK-NEXT: test_callee, @@ -894,9 +894,9 @@ define <4 x i8> @test_tailcall_flipped(<4 x i8> %a, <4 x i8> %b) #0 { ; CHECK-NEXT: param0, ; CHECK-NEXT: param1 ; CHECK-NEXT: ); -; CHECK-NEXT: ld.param.b32 %r3, [retval0+0]; +; CHECK-NEXT: ld.param.b32 %r3, [retval0]; ; CHECK-NEXT: } // callseq 2 -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NEXT: ret; %r = tail call <4 x i8> @test_callee(<4 x i8> %b, <4 x i8> %a) ret <4 x i8> %r @@ -916,7 +916,7 @@ define <4 x i8> @test_select(<4 x i8> %a, <4 x i8> %b, i1 zeroext %c) #0 { ; CHECK-NEXT: ld.param.u32 %r2, [test_select_param_1]; ; CHECK-NEXT: ld.param.u32 %r1, [test_select_param_0]; ; CHECK-NEXT: selp.b32 %r3, %r1, %r2, %p1; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NEXT: ret; %r = select i1 %c, <4 x i8> %a, <4 x i8> %b ret <4 x i8> %r @@ -960,7 +960,7 @@ define <4 x i8> @test_select_cc(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8> ; CHECK-NEXT: bfe.u32 %r25, %r1, 24, 8; ; CHECK-NEXT: selp.b32 %r26, %r25, %r24, %p1; ; CHECK-NEXT: bfi.b32 %r27, %r26, %r23, 24, 8; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r27; +; CHECK-NEXT: st.param.b32 [func_retval0], %r27; ; CHECK-NEXT: ret; %cc = icmp ne <4 x i8> %c, %d %r = select <4 x i1> %cc, <4 x i8> %a, <4 x i8> %b @@ -994,7 +994,7 @@ define <4 x i32> @test_select_cc_i32_i8(<4 x i32> %a, <4 x i32> %b, ; CHECK-NEXT: selp.b32 %r20, %r3, %r7, %p3; ; CHECK-NEXT: selp.b32 %r21, %r2, %r6, %p2; ; CHECK-NEXT: selp.b32 %r22, %r1, %r5, %p1; -; CHECK-NEXT: st.param.v4.b32 [func_retval0+0], {%r22, %r21, %r20, %r19}; +; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r22, %r21, %r20, %r19}; ; CHECK-NEXT: ret; <4 x i8> %c, <4 x i8> %d) #0 { %cc = icmp ne <4 x i8> %c, %d @@ -1032,7 +1032,7 @@ define <4 x i8> @test_select_cc_i8_i32(<4 x i8> %a, <4 x i8> %b, ; CHECK-NEXT: bfe.u32 %r23, %r1, 24, 8; ; CHECK-NEXT: selp.b32 %r24, %r23, %r22, %p1; ; CHECK-NEXT: bfi.b32 %r25, %r24, %r21, 24, 8; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r25; +; CHECK-NEXT: st.param.b32 [func_retval0], %r25; ; CHECK-NEXT: ret; <4 x i32> %c, <4 x i32> %d) #0 { %cc = icmp ne <4 x i32> %c, %d @@ -1051,7 +1051,7 @@ define <4 x i8> @test_trunc_2xi32(<4 x i32> %a) #0 { ; CHECK-NEXT: bfi.b32 %r5, %r2, %r1, 8, 8; ; CHECK-NEXT: bfi.b32 %r6, %r3, %r5, 16, 8; ; CHECK-NEXT: bfi.b32 %r7, %r4, %r6, 24, 8; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r7; +; CHECK-NEXT: st.param.b32 [func_retval0], %r7; ; CHECK-NEXT: ret; %r = trunc <4 x i32> %a to <4 x i8> ret <4 x i8> %r @@ -1073,7 +1073,7 @@ define <4 x i8> @test_trunc_2xi64(<4 x i64> %a) #0 { ; CHECK-NEXT: bfi.b32 %r5, %r4, %r3, 16, 8; ; CHECK-NEXT: cvt.u32.u64 %r6, %rd4; ; CHECK-NEXT: bfi.b32 %r7, %r6, %r5, 24, 8; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r7; +; CHECK-NEXT: st.param.b32 [func_retval0], %r7; ; CHECK-NEXT: ret; %r = trunc <4 x i64> %a to <4 x i8> ret <4 x i8> %r @@ -1090,7 +1090,7 @@ define <4 x i32> @test_zext_2xi32(<4 x i8> %a) #0 { ; CHECK-NEXT: bfe.u32 %r3, %r1, 16, 8; ; CHECK-NEXT: bfe.u32 %r4, %r1, 8, 8; ; CHECK-NEXT: bfe.u32 %r5, %r1, 0, 8; -; CHECK-NEXT: st.param.v4.b32 [func_retval0+0], {%r5, %r4, %r3, %r2}; +; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r5, %r4, %r3, %r2}; ; CHECK-NEXT: ret; %r = zext <4 x i8> %a to <4 x i32> ret <4 x i32> %r @@ -1116,7 +1116,7 @@ define <4 x i64> @test_zext_2xi64(<4 x i8> %a) #0 { ; CHECK-NEXT: bfe.u32 %r5, %r1, 0, 8; ; CHECK-NEXT: cvt.u64.u32 %rd7, %r5; ; CHECK-NEXT: and.b64 %rd8, %rd7, 255; -; CHECK-NEXT: st.param.v2.b64 [func_retval0+0], {%rd8, %rd6}; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd8, %rd6}; ; CHECK-NEXT: st.param.v2.b64 [func_retval0+16], {%rd4, %rd2}; ; CHECK-NEXT: ret; %r = zext <4 x i8> %a to <4 x i64> @@ -1130,7 +1130,7 @@ define <4 x i8> @test_bitcast_i32_to_4xi8(i32 %a) #0 { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.u32 %r1, [test_bitcast_i32_to_4xi8_param_0]; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; ; CHECK-NEXT: ret; %r = bitcast i32 %a to <4 x i8> ret <4 x i8> %r @@ -1145,7 +1145,7 @@ define <4 x i8> @test_bitcast_float_to_4xi8(float %a) #0 { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.f32 %f1, [test_bitcast_float_to_4xi8_param_0]; ; CHECK-NEXT: mov.b32 %r1, %f1; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; ; CHECK-NEXT: ret; %r = bitcast float %a to <4 x i8> ret <4 x i8> %r @@ -1158,7 +1158,7 @@ define i32 @test_bitcast_4xi8_to_i32(<4 x i8> %a) #0 { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.u32 %r2, [test_bitcast_4xi8_to_i32_param_0]; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %r = bitcast <4 x i8> %a to i32 ret i32 %r @@ -1173,7 +1173,7 @@ define float @test_bitcast_4xi8_to_float(<4 x i8> %a) #0 { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.u32 %r2, [test_bitcast_4xi8_to_float_param_0]; ; CHECK-NEXT: mov.b32 %f1, %r2; -; CHECK-NEXT: st.param.f32 [func_retval0+0], %f1; +; CHECK-NEXT: st.param.f32 [func_retval0], %f1; ; CHECK-NEXT: ret; %r = bitcast <4 x i8> %a to float ret float %r @@ -1192,7 +1192,7 @@ define <2 x half> @test_bitcast_4xi8_to_2xhalf(i8 %a) #0 { ; CHECK-NEXT: bfi.b32 %r2, 5, %r1, 8, 8; ; CHECK-NEXT: bfi.b32 %r3, 6, %r2, 16, 8; ; CHECK-NEXT: bfi.b32 %r4, 7, %r3, 24, 8; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r4; +; CHECK-NEXT: st.param.b32 [func_retval0], %r4; ; CHECK-NEXT: ret; %ins.0 = insertelement <4 x i8> undef, i8 %a, i32 0 %ins.1 = insertelement <4 x i8> %ins.0, i8 5, i32 1 @@ -1212,7 +1212,7 @@ define <4 x i8> @test_shufflevector(<4 x i8> %a) #0 { ; CHECK-NEXT: ld.param.u32 %r1, [test_shufflevector_param_0]; ; CHECK-NEXT: // implicit-def: %r3 ; CHECK-NEXT: prmt.b32 %r2, %r1, %r3, 291; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %s = shufflevector <4 x i8> %a, <4 x i8> undef, <4 x i32> ret <4 x i8> %s @@ -1227,7 +1227,7 @@ define <4 x i8> @test_shufflevector_2(<4 x i8> %a, <4 x i8> %b) #0 { ; CHECK-NEXT: ld.param.u32 %r2, [test_shufflevector_2_param_1]; ; CHECK-NEXT: ld.param.u32 %r1, [test_shufflevector_2_param_0]; ; CHECK-NEXT: prmt.b32 %r3, %r1, %r2, 9527; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NEXT: ret; %s = shufflevector <4 x i8> %a, <4 x i8> %b, <4 x i32> ret <4 x i8> %s @@ -1245,7 +1245,7 @@ define <4 x i8> @test_insertelement(<4 x i8> %a, i8 %x) #0 { ; CHECK-NEXT: ld.param.u32 %r1, [test_insertelement_param_0]; ; CHECK-NEXT: cvt.u32.u16 %r2, %rs1; ; CHECK-NEXT: bfi.b32 %r3, %r2, %r1, 8, 8; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NEXT: ret; %i = insertelement <4 x i8> %a, i8 %x, i64 1 ret <4 x i8> %i @@ -1276,7 +1276,7 @@ define <4 x i8> @test_fptosi_4xhalf_to_4xi8(<4 x half> %a) #0 { ; CHECK-NEXT: bfi.b32 %r11, %r10, %r8, 16, 8; ; CHECK-NEXT: cvt.u32.u16 %r12, %rs12; ; CHECK-NEXT: bfi.b32 %r13, %r12, %r11, 24, 8; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r13; +; CHECK-NEXT: st.param.b32 [func_retval0], %r13; ; CHECK-NEXT: ret; %r = fptosi <4 x half> %a to <4 x i8> ret <4 x i8> %r @@ -1307,7 +1307,7 @@ define <4 x i8> @test_fptoui_4xhalf_to_4xi8(<4 x half> %a) #0 { ; CHECK-NEXT: bfi.b32 %r11, %r10, %r8, 16, 8; ; CHECK-NEXT: cvt.u32.u16 %r12, %rs12; ; CHECK-NEXT: bfi.b32 %r13, %r12, %r11, 24, 8; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r13; +; CHECK-NEXT: st.param.b32 [func_retval0], %r13; ; CHECK-NEXT: ret; %r = fptoui <4 x half> %a to <4 x i8> ret <4 x i8> %r diff --git a/llvm/test/CodeGen/NVPTX/indirect_byval.ll b/llvm/test/CodeGen/NVPTX/indirect_byval.ll index ac6c4e262fd60e..1799c86deda76d 100644 --- a/llvm/test/CodeGen/NVPTX/indirect_byval.ll +++ b/llvm/test/CodeGen/NVPTX/indirect_byval.ll @@ -27,9 +27,9 @@ define internal i32 @foo() { ; CHECK-NEXT: add.u64 %rd2, %SP, 0; ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .align 1 .b8 param0[1]; -; CHECK-NEXT: st.param.b8 [param0+0], %rs1; +; CHECK-NEXT: st.param.b8 [param0], %rs1; ; CHECK-NEXT: .param .b64 param1; -; CHECK-NEXT: st.param.b64 [param1+0], %rd2; +; CHECK-NEXT: st.param.b64 [param1], %rd2; ; CHECK-NEXT: .param .b32 retval0; ; CHECK-NEXT: prototype_0 : .callprototype (.param .b32 _) _ (.param .align 1 .b8 _[1], .param .b64 _); ; CHECK-NEXT: call (retval0), @@ -39,9 +39,9 @@ define internal i32 @foo() { ; CHECK-NEXT: param1 ; CHECK-NEXT: ) ; CHECK-NEXT: , prototype_0; -; CHECK-NEXT: ld.param.b32 %r1, [retval0+0]; +; CHECK-NEXT: ld.param.b32 %r1, [retval0]; ; CHECK-NEXT: } // callseq 0 -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; ; CHECK-NEXT: ret; entry: %s = alloca %struct.S, align 1 @@ -69,9 +69,9 @@ define internal i32 @bar() { ; CHECK-NEXT: add.u64 %rd3, %SP, 0; ; CHECK-NEXT: { // callseq 1, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.b64 [param0+0], %rd2; +; CHECK-NEXT: st.param.b64 [param0], %rd2; ; CHECK-NEXT: .param .b64 param1; -; CHECK-NEXT: st.param.b64 [param1+0], %rd3; +; CHECK-NEXT: st.param.b64 [param1], %rd3; ; CHECK-NEXT: .param .b32 retval0; ; CHECK-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .align 8 .b8 _[8], .param .b64 _); ; CHECK-NEXT: call (retval0), @@ -81,9 +81,9 @@ define internal i32 @bar() { ; CHECK-NEXT: param1 ; CHECK-NEXT: ) ; CHECK-NEXT: , prototype_1; -; CHECK-NEXT: ld.param.b32 %r1, [retval0+0]; +; CHECK-NEXT: ld.param.b32 %r1, [retval0]; ; CHECK-NEXT: } // callseq 1 -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; ; CHECK-NEXT: ret; entry: %s = alloca %struct.U, align 8 diff --git a/llvm/test/CodeGen/NVPTX/jump-table.ll b/llvm/test/CodeGen/NVPTX/jump-table.ll index b201fb98f3e6bb..dbd4f8a55facfd 100644 --- a/llvm/test/CodeGen/NVPTX/jump-table.ll +++ b/llvm/test/CodeGen/NVPTX/jump-table.ll @@ -101,7 +101,7 @@ define i32 @test2(i32 %tmp158) { ; CHECK-NEXT: brx.idx %r2, $L_brx_0; ; CHECK-NEXT: $L__BB1_7: // %bb339 ; CHECK-NEXT: mov.b32 %r7, 12; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r7; +; CHECK-NEXT: st.param.b32 [func_retval0], %r7; ; CHECK-NEXT: ret; ; CHECK-NEXT: $L__BB1_5: // %entry ; CHECK-NEXT: setp.eq.s32 %p3, %r1, 1024; @@ -109,27 +109,27 @@ define i32 @test2(i32 %tmp158) { ; CHECK-NEXT: bra.uni $L__BB1_6; ; CHECK-NEXT: $L__BB1_3: // %bb338 ; CHECK-NEXT: mov.b32 %r8, 11; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r8; +; CHECK-NEXT: st.param.b32 [func_retval0], %r8; ; CHECK-NEXT: ret; ; CHECK-NEXT: $L__BB1_10: // %bb342 ; CHECK-NEXT: mov.b32 %r4, 15; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r4; +; CHECK-NEXT: st.param.b32 [func_retval0], %r4; ; CHECK-NEXT: ret; ; CHECK-NEXT: $L__BB1_6: // %bb336 ; CHECK-NEXT: mov.b32 %r9, 10; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r9; +; CHECK-NEXT: st.param.b32 [func_retval0], %r9; ; CHECK-NEXT: ret; ; CHECK-NEXT: $L__BB1_8: // %bb340 ; CHECK-NEXT: mov.b32 %r6, 13; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r6; +; CHECK-NEXT: st.param.b32 [func_retval0], %r6; ; CHECK-NEXT: ret; ; CHECK-NEXT: $L__BB1_9: // %bb341 ; CHECK-NEXT: mov.b32 %r5, 14; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r5; +; CHECK-NEXT: st.param.b32 [func_retval0], %r5; ; CHECK-NEXT: ret; ; CHECK-NEXT: $L__BB1_11: // %bb343 ; CHECK-NEXT: mov.b32 %r3, 18; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NEXT: ret; entry: switch i32 %tmp158, label %bb336 [ diff --git a/llvm/test/CodeGen/NVPTX/ldparam-v4.ll b/llvm/test/CodeGen/NVPTX/ldparam-v4.ll index dc20441a67a8bf..47f65ecbcfa6d5 100644 --- a/llvm/test/CodeGen/NVPTX/ldparam-v4.ll +++ b/llvm/test/CodeGen/NVPTX/ldparam-v4.ll @@ -6,7 +6,7 @@ declare <4 x float> @bar() ; CHECK-LABEL: .func foo( define void @foo(ptr %ptr) { ; CHECK: ld.param.u64 %[[PTR:rd[0-9]+]], [foo_param_0]; -; CHECK: ld.param.v4.f32 {[[E0:%f[0-9]+]], [[E1:%f[0-9]+]], [[E2:%f[0-9]+]], [[E3:%f[0-9]+]]}, [retval0+0]; +; CHECK: ld.param.v4.f32 {[[E0:%f[0-9]+]], [[E1:%f[0-9]+]], [[E2:%f[0-9]+]], [[E3:%f[0-9]+]]}, [retval0]; ; CHECK: st.v4.f32 [%[[PTR]]], {[[E0]], [[E1]], [[E2]], [[E3]]} %val = tail call <4 x float> @bar() store <4 x float> %val, ptr %ptr diff --git a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll index d702ede61addf4..cac49b49970b73 100644 --- a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll +++ b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll @@ -8,11 +8,11 @@ ; PTX32: mov.u32 %SPL, __local_depot{{[0-9]+}}; ; PTX32: cvta.local.u32 %SP, %SPL; ; PTX32: ld.param.u32 %r{{[0-9]+}}, [foo_param_0]; -; PTX32: st.volatile.u32 [%SP+0], %r{{[0-9]+}}; +; PTX32: st.volatile.u32 [%SP], %r{{[0-9]+}}; ; PTX64: mov.u64 %SPL, __local_depot{{[0-9]+}}; ; PTX64: cvta.local.u64 %SP, %SPL; ; PTX64: ld.param.u32 %r{{[0-9]+}}, [foo_param_0]; -; PTX64: st.volatile.u32 [%SP+0], %r{{[0-9]+}}; +; PTX64: st.volatile.u32 [%SP], %r{{[0-9]+}}; define void @foo(i32 %a) { %local = alloca i32, align 4 store volatile i32 %a, ptr %local diff --git a/llvm/test/CodeGen/NVPTX/lower-alloca.ll b/llvm/test/CodeGen/NVPTX/lower-alloca.ll index 400184aaefb211..e09fb938ef0864 100644 --- a/llvm/test/CodeGen/NVPTX/lower-alloca.ll +++ b/llvm/test/CodeGen/NVPTX/lower-alloca.ll @@ -26,7 +26,7 @@ define void @alloca_in_explicit_local_as() { ; PTX-LABEL: .visible .func alloca_in_explicit_local_as( %A = alloca i32, addrspace(5) ; CHECK: store i32 0, ptr addrspace(5) {{%.+}} -; PTX: st.local.u32 [%SP+0], {{%r[0-9]+}} +; PTX: st.local.u32 [%SP], {{%r[0-9]+}} ; LOWERALLOCAONLY: [[V1:%.*]] = addrspacecast ptr addrspace(5) %A to ptr ; LOWERALLOCAONLY: store i32 0, ptr [[V1]], align 4 store i32 0, ptr addrspace(5) %A diff --git a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll index 33fa3afc94b89d..9cfe9192772b89 100644 --- a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll +++ b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll @@ -43,7 +43,7 @@ define dso_local noundef i32 @non_kernel_function(ptr nocapture noundef readonly ; PTX-NEXT: ld.param.u64 %rd4, [non_kernel_function_param_0+8]; ; PTX-NEXT: st.u64 [%rd3], %rd4; ; PTX-NEXT: ld.param.u64 %rd5, [non_kernel_function_param_0]; -; PTX-NEXT: st.u64 [%SP+0], %rd5; +; PTX-NEXT: st.u64 [%SP], %rd5; ; PTX-NEXT: mov.u64 %rd6, gi; ; PTX-NEXT: cvta.global.u64 %rd7, %rd6; ; PTX-NEXT: selp.b64 %rd8, %rd2, %rd7, %p1; @@ -58,7 +58,7 @@ define dso_local noundef i32 @non_kernel_function(ptr nocapture noundef readonly ; PTX-NEXT: shl.b32 %r8, %r7, 24; ; PTX-NEXT: or.b32 %r9, %r8, %r6; ; PTX-NEXT: or.b32 %r10, %r9, %r4; -; PTX-NEXT: st.param.b32 [func_retval0+0], %r10; +; PTX-NEXT: st.param.b32 [func_retval0], %r10; ; PTX-NEXT: ret; entry: %a. = select i1 %b, ptr %a, ptr addrspacecast (ptr addrspace(1) @gi to ptr), !dbg !17 @@ -147,7 +147,7 @@ define void @grid_const_escape(ptr byval(%struct.s) align 4 %input) { ; PTX-NEXT: mov.u64 %rd1, escape; ; PTX-NEXT: { // callseq 0, 0 ; PTX-NEXT: .param .b64 param0; -; PTX-NEXT: st.param.b64 [param0+0], %rd4; +; PTX-NEXT: st.param.b64 [param0], %rd4; ; PTX-NEXT: .param .b32 retval0; ; PTX-NEXT: prototype_0 : .callprototype (.param .b32 _) _ (.param .b64 _); ; PTX-NEXT: call (retval0), @@ -156,7 +156,7 @@ define void @grid_const_escape(ptr byval(%struct.s) align 4 %input) { ; PTX-NEXT: param0 ; PTX-NEXT: ) ; PTX-NEXT: , prototype_0; -; PTX-NEXT: ld.param.b32 %r1, [retval0+0]; +; PTX-NEXT: ld.param.b32 %r1, [retval0]; ; PTX-NEXT: } // callseq 0 ; PTX-NEXT: ret; ; OPT-LABEL: define void @grid_const_escape( @@ -194,11 +194,11 @@ define void @multiple_grid_const_escape(ptr byval(%struct.s) align 4 %input, i32 ; PTX-NEXT: mov.u64 %rd1, escape3; ; PTX-NEXT: { // callseq 1, 0 ; PTX-NEXT: .param .b64 param0; -; PTX-NEXT: st.param.b64 [param0+0], %rd7; +; PTX-NEXT: st.param.b64 [param0], %rd7; ; PTX-NEXT: .param .b64 param1; -; PTX-NEXT: st.param.b64 [param1+0], %rd8; +; PTX-NEXT: st.param.b64 [param1], %rd8; ; PTX-NEXT: .param .b64 param2; -; PTX-NEXT: st.param.b64 [param2+0], %rd5; +; PTX-NEXT: st.param.b64 [param2], %rd5; ; PTX-NEXT: .param .b32 retval0; ; PTX-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .b64 _, .param .b64 _, .param .b64 _); ; PTX-NEXT: call (retval0), @@ -209,7 +209,7 @@ define void @multiple_grid_const_escape(ptr byval(%struct.s) align 4 %input, i32 ; PTX-NEXT: param2 ; PTX-NEXT: ) ; PTX-NEXT: , prototype_1; -; PTX-NEXT: ld.param.b32 %r2, [retval0+0]; +; PTX-NEXT: ld.param.b32 %r2, [retval0]; ; PTX-NEXT: } // callseq 1 ; PTX-NEXT: ret; ; OPT-LABEL: define void @multiple_grid_const_escape( @@ -307,7 +307,7 @@ define void @grid_const_partial_escape(ptr byval(i32) %input, ptr %output) { ; PTX-NEXT: mov.u64 %rd1, escape; ; PTX-NEXT: { // callseq 2, 0 ; PTX-NEXT: .param .b64 param0; -; PTX-NEXT: st.param.b64 [param0+0], %rd6; +; PTX-NEXT: st.param.b64 [param0], %rd6; ; PTX-NEXT: .param .b32 retval0; ; PTX-NEXT: prototype_2 : .callprototype (.param .b32 _) _ (.param .b64 _); ; PTX-NEXT: call (retval0), @@ -316,7 +316,7 @@ define void @grid_const_partial_escape(ptr byval(i32) %input, ptr %output) { ; PTX-NEXT: param0 ; PTX-NEXT: ) ; PTX-NEXT: , prototype_2; -; PTX-NEXT: ld.param.b32 %r3, [retval0+0]; +; PTX-NEXT: ld.param.b32 %r3, [retval0]; ; PTX-NEXT: } // callseq 2 ; PTX-NEXT: ret; ; OPT-LABEL: define void @grid_const_partial_escape( @@ -356,7 +356,7 @@ define i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input, ptr %outpu ; PTX-NEXT: mov.u64 %rd1, escape; ; PTX-NEXT: { // callseq 3, 0 ; PTX-NEXT: .param .b64 param0; -; PTX-NEXT: st.param.b64 [param0+0], %rd6; +; PTX-NEXT: st.param.b64 [param0], %rd6; ; PTX-NEXT: .param .b32 retval0; ; PTX-NEXT: prototype_3 : .callprototype (.param .b32 _) _ (.param .b64 _); ; PTX-NEXT: call (retval0), @@ -365,9 +365,9 @@ define i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input, ptr %outpu ; PTX-NEXT: param0 ; PTX-NEXT: ) ; PTX-NEXT: , prototype_3; -; PTX-NEXT: ld.param.b32 %r4, [retval0+0]; +; PTX-NEXT: ld.param.b32 %r4, [retval0]; ; PTX-NEXT: } // callseq 3 -; PTX-NEXT: st.param.b32 [func_retval0+0], %r3; +; PTX-NEXT: st.param.b32 [func_retval0], %r3; ; PTX-NEXT: ret; ; OPT-LABEL: define i32 @grid_const_partial_escapemem( ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) #[[ATTR0]] { @@ -574,7 +574,7 @@ define i32 @grid_const_ptrtoint(ptr byval(i32) %input) { ; PTX-NEXT: cvta.param.u64 %rd3, %rd2; ; PTX-NEXT: cvt.u32.u64 %r2, %rd3; ; PTX-NEXT: add.s32 %r3, %r1, %r2; -; PTX-NEXT: st.param.b32 [func_retval0+0], %r3; +; PTX-NEXT: st.param.b32 [func_retval0], %r3; ; PTX-NEXT: ret; ; OPT-LABEL: define i32 @grid_const_ptrtoint( ; OPT-SAME: ptr byval(i32) align 4 [[INPUT:%.*]]) #[[ATTR0]] { diff --git a/llvm/test/CodeGen/NVPTX/lower-args.ll b/llvm/test/CodeGen/NVPTX/lower-args.ll index d1bec032ec3a98..eba4f273fa709d 100644 --- a/llvm/test/CodeGen/NVPTX/lower-args.ll +++ b/llvm/test/CodeGen/NVPTX/lower-args.ll @@ -46,18 +46,18 @@ define void @load_padding(ptr nocapture readonly byval(%class.padded) %arg) { ; PTX-NEXT: mov.u64 %SPL, __local_depot1; ; PTX-NEXT: cvta.local.u64 %SP, %SPL; ; PTX-NEXT: ld.param.u64 %rd1, [load_padding_param_0]; -; PTX-NEXT: st.u64 [%SP+0], %rd1; +; PTX-NEXT: st.u64 [%SP], %rd1; ; PTX-NEXT: add.u64 %rd2, %SP, 0; ; PTX-NEXT: { // callseq 1, 0 ; PTX-NEXT: .param .b64 param0; -; PTX-NEXT: st.param.b64 [param0+0], %rd2; +; PTX-NEXT: st.param.b64 [param0], %rd2; ; PTX-NEXT: .param .b64 retval0; ; PTX-NEXT: call.uni (retval0), ; PTX-NEXT: escape, ; PTX-NEXT: ( ; PTX-NEXT: param0 ; PTX-NEXT: ); -; PTX-NEXT: ld.param.b64 %rd3, [retval0+0]; +; PTX-NEXT: ld.param.b64 %rd3, [retval0]; ; PTX-NEXT: } // callseq 1 ; PTX-NEXT: ret; %tmp = call ptr @escape(ptr nonnull align 16 %arg) diff --git a/llvm/test/CodeGen/NVPTX/math-intrins.ll b/llvm/test/CodeGen/NVPTX/math-intrins.ll index bdd6c914384601..5161e5d029777e 100644 --- a/llvm/test/CodeGen/NVPTX/math-intrins.ll +++ b/llvm/test/CodeGen/NVPTX/math-intrins.ll @@ -55,7 +55,7 @@ define float @ceil_float(float %a) { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.f32 %f1, [ceil_float_param_0]; ; CHECK-NEXT: cvt.rpi.f32.f32 %f2, %f1; -; CHECK-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; ; CHECK-NEXT: ret; %b = call float @llvm.ceil.f32(float %a) ret float %b @@ -69,7 +69,7 @@ define float @ceil_float_ftz(float %a) #1 { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.f32 %f1, [ceil_float_ftz_param_0]; ; CHECK-NEXT: cvt.rpi.ftz.f32.f32 %f2, %f1; -; CHECK-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; ; CHECK-NEXT: ret; %b = call float @llvm.ceil.f32(float %a) ret float %b @@ -83,7 +83,7 @@ define double @ceil_double(double %a) { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.f64 %fd1, [ceil_double_param_0]; ; CHECK-NEXT: cvt.rpi.f64.f64 %fd2, %fd1; -; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd2; +; CHECK-NEXT: st.param.f64 [func_retval0], %fd2; ; CHECK-NEXT: ret; %b = call double @llvm.ceil.f64(double %a) ret double %b @@ -99,7 +99,7 @@ define float @floor_float(float %a) { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.f32 %f1, [floor_float_param_0]; ; CHECK-NEXT: cvt.rmi.f32.f32 %f2, %f1; -; CHECK-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; ; CHECK-NEXT: ret; %b = call float @llvm.floor.f32(float %a) ret float %b @@ -113,7 +113,7 @@ define float @floor_float_ftz(float %a) #1 { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.f32 %f1, [floor_float_ftz_param_0]; ; CHECK-NEXT: cvt.rmi.ftz.f32.f32 %f2, %f1; -; CHECK-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; ; CHECK-NEXT: ret; %b = call float @llvm.floor.f32(float %a) ret float %b @@ -127,7 +127,7 @@ define double @floor_double(double %a) { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.f64 %fd1, [floor_double_param_0]; ; CHECK-NEXT: cvt.rmi.f64.f64 %fd2, %fd1; -; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd2; +; CHECK-NEXT: st.param.f64 [func_retval0], %fd2; ; CHECK-NEXT: ret; %b = call double @llvm.floor.f64(double %a) ret double %b @@ -157,7 +157,7 @@ define float @round_float(float %a) { ; CHECK-NEXT: cvt.rzi.f32.f32 %f7, %f1; ; CHECK-NEXT: setp.lt.f32 %p2, %f5, 0f3F000000; ; CHECK-NEXT: selp.f32 %f8, %f7, %f6, %p2; -; CHECK-NEXT: st.param.f32 [func_retval0+0], %f8; +; CHECK-NEXT: st.param.f32 [func_retval0], %f8; ; CHECK-NEXT: ret; %b = call float @llvm.round.f32(float %a) ret float %b @@ -185,7 +185,7 @@ define float @round_float_ftz(float %a) #1 { ; CHECK-NEXT: cvt.rzi.ftz.f32.f32 %f7, %f1; ; CHECK-NEXT: setp.lt.ftz.f32 %p2, %f5, 0f3F000000; ; CHECK-NEXT: selp.f32 %f8, %f7, %f6, %p2; -; CHECK-NEXT: st.param.f32 [func_retval0+0], %f8; +; CHECK-NEXT: st.param.f32 [func_retval0], %f8; ; CHECK-NEXT: ret; %b = call float @llvm.round.f32(float %a) ret float %b @@ -208,7 +208,7 @@ define double @round_double(double %a) { ; CHECK-NEXT: copysign.f64 %fd6, %fd1, %fd5; ; CHECK-NEXT: setp.gt.f64 %p2, %fd2, 0d4330000000000000; ; CHECK-NEXT: selp.f64 %fd7, %fd1, %fd6, %p2; -; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd7; +; CHECK-NEXT: st.param.f64 [func_retval0], %fd7; ; CHECK-NEXT: ret; %b = call double @llvm.round.f64(double %a) ret double %b @@ -224,7 +224,7 @@ define float @nearbyint_float(float %a) { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.f32 %f1, [nearbyint_float_param_0]; ; CHECK-NEXT: cvt.rni.f32.f32 %f2, %f1; -; CHECK-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; ; CHECK-NEXT: ret; %b = call float @llvm.nearbyint.f32(float %a) ret float %b @@ -238,7 +238,7 @@ define float @nearbyint_float_ftz(float %a) #1 { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.f32 %f1, [nearbyint_float_ftz_param_0]; ; CHECK-NEXT: cvt.rni.ftz.f32.f32 %f2, %f1; -; CHECK-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; ; CHECK-NEXT: ret; %b = call float @llvm.nearbyint.f32(float %a) ret float %b @@ -252,7 +252,7 @@ define double @nearbyint_double(double %a) { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.f64 %fd1, [nearbyint_double_param_0]; ; CHECK-NEXT: cvt.rni.f64.f64 %fd2, %fd1; -; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd2; +; CHECK-NEXT: st.param.f64 [func_retval0], %fd2; ; CHECK-NEXT: ret; %b = call double @llvm.nearbyint.f64(double %a) ret double %b @@ -268,7 +268,7 @@ define float @rint_float(float %a) { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.f32 %f1, [rint_float_param_0]; ; CHECK-NEXT: cvt.rni.f32.f32 %f2, %f1; -; CHECK-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; ; CHECK-NEXT: ret; %b = call float @llvm.rint.f32(float %a) ret float %b @@ -282,7 +282,7 @@ define float @rint_float_ftz(float %a) #1 { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.f32 %f1, [rint_float_ftz_param_0]; ; CHECK-NEXT: cvt.rni.ftz.f32.f32 %f2, %f1; -; CHECK-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; ; CHECK-NEXT: ret; %b = call float @llvm.rint.f32(float %a) ret float %b @@ -296,7 +296,7 @@ define double @rint_double(double %a) { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.f64 %fd1, [rint_double_param_0]; ; CHECK-NEXT: cvt.rni.f64.f64 %fd2, %fd1; -; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd2; +; CHECK-NEXT: st.param.f64 [func_retval0], %fd2; ; CHECK-NEXT: ret; %b = call double @llvm.rint.f64(double %a) ret double %b @@ -312,7 +312,7 @@ define float @roundeven_float(float %a) { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.f32 %f1, [roundeven_float_param_0]; ; CHECK-NEXT: cvt.rni.f32.f32 %f2, %f1; -; CHECK-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; ; CHECK-NEXT: ret; %b = call float @llvm.roundeven.f32(float %a) ret float %b @@ -326,7 +326,7 @@ define float @roundeven_float_ftz(float %a) #1 { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.f32 %f1, [roundeven_float_ftz_param_0]; ; CHECK-NEXT: cvt.rni.ftz.f32.f32 %f2, %f1; -; CHECK-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; ; CHECK-NEXT: ret; %b = call float @llvm.roundeven.f32(float %a) ret float %b @@ -340,7 +340,7 @@ define double @roundeven_double(double %a) { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.f64 %fd1, [roundeven_double_param_0]; ; CHECK-NEXT: cvt.rni.f64.f64 %fd2, %fd1; -; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd2; +; CHECK-NEXT: st.param.f64 [func_retval0], %fd2; ; CHECK-NEXT: ret; %b = call double @llvm.roundeven.f64(double %a) ret double %b @@ -356,7 +356,7 @@ define float @trunc_float(float %a) { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.f32 %f1, [trunc_float_param_0]; ; CHECK-NEXT: cvt.rzi.f32.f32 %f2, %f1; -; CHECK-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; ; CHECK-NEXT: ret; %b = call float @llvm.trunc.f32(float %a) ret float %b @@ -370,7 +370,7 @@ define float @trunc_float_ftz(float %a) #1 { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.f32 %f1, [trunc_float_ftz_param_0]; ; CHECK-NEXT: cvt.rzi.ftz.f32.f32 %f2, %f1; -; CHECK-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; ; CHECK-NEXT: ret; %b = call float @llvm.trunc.f32(float %a) ret float %b @@ -384,7 +384,7 @@ define double @trunc_double(double %a) { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.f64 %fd1, [trunc_double_param_0]; ; CHECK-NEXT: cvt.rzi.f64.f64 %fd2, %fd1; -; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd2; +; CHECK-NEXT: st.param.f64 [func_retval0], %fd2; ; CHECK-NEXT: ret; %b = call double @llvm.trunc.f64(double %a) ret double %b @@ -400,7 +400,7 @@ define float @abs_float(float %a) { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.f32 %f1, [abs_float_param_0]; ; CHECK-NEXT: abs.f32 %f2, %f1; -; CHECK-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; ; CHECK-NEXT: ret; %b = call float @llvm.fabs.f32(float %a) ret float %b @@ -414,7 +414,7 @@ define float @abs_float_ftz(float %a) #1 { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.f32 %f1, [abs_float_ftz_param_0]; ; CHECK-NEXT: abs.ftz.f32 %f2, %f1; -; CHECK-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; ; CHECK-NEXT: ret; %b = call float @llvm.fabs.f32(float %a) ret float %b @@ -428,7 +428,7 @@ define double @abs_double(double %a) { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.f64 %fd1, [abs_double_param_0]; ; CHECK-NEXT: abs.f64 %fd2, %fd1; -; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd2; +; CHECK-NEXT: st.param.f64 [func_retval0], %fd2; ; CHECK-NEXT: ret; %b = call double @llvm.fabs.f64(double %a) ret double %b @@ -449,7 +449,7 @@ define half @minnum_half(half %a, half %b) { ; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs1; ; CHECK-NOF16-NEXT: min.f32 %f3, %f2, %f1; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs3, %f3; -; CHECK-NOF16-NEXT: st.param.b16 [func_retval0+0], %rs3; +; CHECK-NOF16-NEXT: st.param.b16 [func_retval0], %rs3; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: minnum_half( @@ -460,7 +460,7 @@ define half @minnum_half(half %a, half %b) { ; CHECK-F16-NEXT: ld.param.b16 %rs1, [minnum_half_param_0]; ; CHECK-F16-NEXT: ld.param.b16 %rs2, [minnum_half_param_1]; ; CHECK-F16-NEXT: min.f16 %rs3, %rs1, %rs2; -; CHECK-F16-NEXT: st.param.b16 [func_retval0+0], %rs3; +; CHECK-F16-NEXT: st.param.b16 [func_retval0], %rs3; ; CHECK-F16-NEXT: ret; ; ; CHECK-SM80-NOF16-LABEL: minnum_half( @@ -475,7 +475,7 @@ define half @minnum_half(half %a, half %b) { ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f2, %rs1; ; CHECK-SM80-NOF16-NEXT: min.f32 %f3, %f2, %f1; ; CHECK-SM80-NOF16-NEXT: cvt.rn.f16.f32 %rs3, %f3; -; CHECK-SM80-NOF16-NEXT: st.param.b16 [func_retval0+0], %rs3; +; CHECK-SM80-NOF16-NEXT: st.param.b16 [func_retval0], %rs3; ; CHECK-SM80-NOF16-NEXT: ret; %x = call half @llvm.minnum.f16(half %a, half %b) ret half %x @@ -490,7 +490,7 @@ define float @minnum_float(float %a, float %b) { ; CHECK-NEXT: ld.param.f32 %f1, [minnum_float_param_0]; ; CHECK-NEXT: ld.param.f32 %f2, [minnum_float_param_1]; ; CHECK-NEXT: min.f32 %f3, %f1, %f2; -; CHECK-NEXT: st.param.f32 [func_retval0+0], %f3; +; CHECK-NEXT: st.param.f32 [func_retval0], %f3; ; CHECK-NEXT: ret; %x = call float @llvm.minnum.f32(float %a, float %b) ret float %x @@ -504,7 +504,7 @@ define float @minnum_imm1(float %a) { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.f32 %f1, [minnum_imm1_param_0]; ; CHECK-NEXT: min.f32 %f2, %f1, 0f00000000; -; CHECK-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; ; CHECK-NEXT: ret; %x = call float @llvm.minnum.f32(float %a, float 0.0) ret float %x @@ -518,7 +518,7 @@ define float @minnum_imm2(float %a) { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.f32 %f1, [minnum_imm2_param_0]; ; CHECK-NEXT: min.f32 %f2, %f1, 0f00000000; -; CHECK-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; ; CHECK-NEXT: ret; %x = call float @llvm.minnum.f32(float 0.0, float %a) ret float %x @@ -533,7 +533,7 @@ define float @minnum_float_ftz(float %a, float %b) #1 { ; CHECK-NEXT: ld.param.f32 %f1, [minnum_float_ftz_param_0]; ; CHECK-NEXT: ld.param.f32 %f2, [minnum_float_ftz_param_1]; ; CHECK-NEXT: min.ftz.f32 %f3, %f1, %f2; -; CHECK-NEXT: st.param.f32 [func_retval0+0], %f3; +; CHECK-NEXT: st.param.f32 [func_retval0], %f3; ; CHECK-NEXT: ret; %x = call float @llvm.minnum.f32(float %a, float %b) ret float %x @@ -548,7 +548,7 @@ define double @minnum_double(double %a, double %b) { ; CHECK-NEXT: ld.param.f64 %fd1, [minnum_double_param_0]; ; CHECK-NEXT: ld.param.f64 %fd2, [minnum_double_param_1]; ; CHECK-NEXT: min.f64 %fd3, %fd1, %fd2; -; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd3; +; CHECK-NEXT: st.param.f64 [func_retval0], %fd3; ; CHECK-NEXT: ret; %x = call double @llvm.minnum.f64(double %a, double %b) ret double %x @@ -575,7 +575,7 @@ define <2 x half> @minnum_v2half(<2 x half> %a, <2 x half> %b) { ; CHECK-NOF16-NEXT: min.f32 %f6, %f5, %f4; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs6, %f6; ; CHECK-NOF16-NEXT: mov.b32 %r3, {%rs6, %rs5}; -; CHECK-NOF16-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: minnum_v2half( @@ -586,7 +586,7 @@ define <2 x half> @minnum_v2half(<2 x half> %a, <2 x half> %b) { ; CHECK-F16-NEXT: ld.param.b32 %r1, [minnum_v2half_param_1]; ; CHECK-F16-NEXT: ld.param.b32 %r2, [minnum_v2half_param_0]; ; CHECK-F16-NEXT: min.f16x2 %r3, %r2, %r1; -; CHECK-F16-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-F16-NEXT: ret; ; ; CHECK-SM80-NOF16-LABEL: minnum_v2half( @@ -609,7 +609,7 @@ define <2 x half> @minnum_v2half(<2 x half> %a, <2 x half> %b) { ; CHECK-SM80-NOF16-NEXT: min.f32 %f6, %f5, %f4; ; CHECK-SM80-NOF16-NEXT: cvt.rn.f16.f32 %rs6, %f6; ; CHECK-SM80-NOF16-NEXT: mov.b32 %r3, {%rs6, %rs5}; -; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-SM80-NOF16-NEXT: ret; %x = call <2 x half> @llvm.minnum.v2f16(<2 x half> %a, <2 x half> %b) ret <2 x half> %x @@ -640,7 +640,7 @@ define half @minimum_half(half %a, half %b) { ; CHECK-NOF16-NEXT: cvt.f32.f16 %f3, %rs5; ; CHECK-NOF16-NEXT: setp.eq.f32 %p5, %f3, 0f00000000; ; CHECK-NOF16-NEXT: selp.b16 %rs9, %rs8, %rs5, %p5; -; CHECK-NOF16-NEXT: st.param.b16 [func_retval0+0], %rs9; +; CHECK-NOF16-NEXT: st.param.b16 [func_retval0], %rs9; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: minimum_half( @@ -651,7 +651,7 @@ define half @minimum_half(half %a, half %b) { ; CHECK-F16-NEXT: ld.param.b16 %rs1, [minimum_half_param_0]; ; CHECK-F16-NEXT: ld.param.b16 %rs2, [minimum_half_param_1]; ; CHECK-F16-NEXT: min.NaN.f16 %rs3, %rs1, %rs2; -; CHECK-F16-NEXT: st.param.b16 [func_retval0+0], %rs3; +; CHECK-F16-NEXT: st.param.b16 [func_retval0], %rs3; ; CHECK-F16-NEXT: ret; ; ; CHECK-SM80-NOF16-LABEL: minimum_half( @@ -676,7 +676,7 @@ define half @minimum_half(half %a, half %b) { ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f3, %rs5; ; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p5, %f3, 0f00000000; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs9, %rs8, %rs5, %p5; -; CHECK-SM80-NOF16-NEXT: st.param.b16 [func_retval0+0], %rs9; +; CHECK-SM80-NOF16-NEXT: st.param.b16 [func_retval0], %rs9; ; CHECK-SM80-NOF16-NEXT: ret; %x = call half @llvm.minimum.f16(half %a, half %b) ret half %x @@ -703,7 +703,7 @@ define float @minimum_float(float %a, float %b) { ; CHECK-NOF16-NEXT: selp.f32 %f6, %f2, %f5, %p3; ; CHECK-NOF16-NEXT: setp.eq.f32 %p4, %f4, 0f00000000; ; CHECK-NOF16-NEXT: selp.f32 %f7, %f6, %f4, %p4; -; CHECK-NOF16-NEXT: st.param.f32 [func_retval0+0], %f7; +; CHECK-NOF16-NEXT: st.param.f32 [func_retval0], %f7; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: minimum_float( @@ -714,7 +714,7 @@ define float @minimum_float(float %a, float %b) { ; CHECK-F16-NEXT: ld.param.f32 %f1, [minimum_float_param_0]; ; CHECK-F16-NEXT: ld.param.f32 %f2, [minimum_float_param_1]; ; CHECK-F16-NEXT: min.NaN.f32 %f3, %f1, %f2; -; CHECK-F16-NEXT: st.param.f32 [func_retval0+0], %f3; +; CHECK-F16-NEXT: st.param.f32 [func_retval0], %f3; ; CHECK-F16-NEXT: ret; ; ; CHECK-SM80-NOF16-LABEL: minimum_float( @@ -725,7 +725,7 @@ define float @minimum_float(float %a, float %b) { ; CHECK-SM80-NOF16-NEXT: ld.param.f32 %f1, [minimum_float_param_0]; ; CHECK-SM80-NOF16-NEXT: ld.param.f32 %f2, [minimum_float_param_1]; ; CHECK-SM80-NOF16-NEXT: min.NaN.f32 %f3, %f1, %f2; -; CHECK-SM80-NOF16-NEXT: st.param.f32 [func_retval0+0], %f3; +; CHECK-SM80-NOF16-NEXT: st.param.f32 [func_retval0], %f3; ; CHECK-SM80-NOF16-NEXT: ret; %x = call float @llvm.minimum.f32(float %a, float %b) ret float %x @@ -748,7 +748,7 @@ define float @minimum_imm1(float %a) { ; CHECK-NOF16-NEXT: selp.f32 %f4, %f1, %f3, %p2; ; CHECK-NOF16-NEXT: setp.eq.f32 %p3, %f3, 0f00000000; ; CHECK-NOF16-NEXT: selp.f32 %f5, %f4, %f3, %p3; -; CHECK-NOF16-NEXT: st.param.f32 [func_retval0+0], %f5; +; CHECK-NOF16-NEXT: st.param.f32 [func_retval0], %f5; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: minimum_imm1( @@ -758,7 +758,7 @@ define float @minimum_imm1(float %a) { ; CHECK-F16-NEXT: // %bb.0: ; CHECK-F16-NEXT: ld.param.f32 %f1, [minimum_imm1_param_0]; ; CHECK-F16-NEXT: min.NaN.f32 %f2, %f1, 0f00000000; -; CHECK-F16-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-F16-NEXT: st.param.f32 [func_retval0], %f2; ; CHECK-F16-NEXT: ret; ; ; CHECK-SM80-NOF16-LABEL: minimum_imm1( @@ -768,7 +768,7 @@ define float @minimum_imm1(float %a) { ; CHECK-SM80-NOF16-NEXT: // %bb.0: ; CHECK-SM80-NOF16-NEXT: ld.param.f32 %f1, [minimum_imm1_param_0]; ; CHECK-SM80-NOF16-NEXT: min.NaN.f32 %f2, %f1, 0f00000000; -; CHECK-SM80-NOF16-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-SM80-NOF16-NEXT: st.param.f32 [func_retval0], %f2; ; CHECK-SM80-NOF16-NEXT: ret; %x = call float @llvm.minimum.f32(float %a, float 0.0) ret float %x @@ -791,7 +791,7 @@ define float @minimum_imm2(float %a) { ; CHECK-NOF16-NEXT: selp.f32 %f4, %f1, %f3, %p2; ; CHECK-NOF16-NEXT: setp.eq.f32 %p3, %f3, 0f00000000; ; CHECK-NOF16-NEXT: selp.f32 %f5, %f4, %f3, %p3; -; CHECK-NOF16-NEXT: st.param.f32 [func_retval0+0], %f5; +; CHECK-NOF16-NEXT: st.param.f32 [func_retval0], %f5; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: minimum_imm2( @@ -801,7 +801,7 @@ define float @minimum_imm2(float %a) { ; CHECK-F16-NEXT: // %bb.0: ; CHECK-F16-NEXT: ld.param.f32 %f1, [minimum_imm2_param_0]; ; CHECK-F16-NEXT: min.NaN.f32 %f2, %f1, 0f00000000; -; CHECK-F16-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-F16-NEXT: st.param.f32 [func_retval0], %f2; ; CHECK-F16-NEXT: ret; ; ; CHECK-SM80-NOF16-LABEL: minimum_imm2( @@ -811,7 +811,7 @@ define float @minimum_imm2(float %a) { ; CHECK-SM80-NOF16-NEXT: // %bb.0: ; CHECK-SM80-NOF16-NEXT: ld.param.f32 %f1, [minimum_imm2_param_0]; ; CHECK-SM80-NOF16-NEXT: min.NaN.f32 %f2, %f1, 0f00000000; -; CHECK-SM80-NOF16-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-SM80-NOF16-NEXT: st.param.f32 [func_retval0], %f2; ; CHECK-SM80-NOF16-NEXT: ret; %x = call float @llvm.minimum.f32(float 0.0, float %a) ret float %x @@ -838,7 +838,7 @@ define float @minimum_float_ftz(float %a, float %b) #1 { ; CHECK-NOF16-NEXT: selp.f32 %f6, %f2, %f5, %p3; ; CHECK-NOF16-NEXT: setp.eq.ftz.f32 %p4, %f4, 0f00000000; ; CHECK-NOF16-NEXT: selp.f32 %f7, %f6, %f4, %p4; -; CHECK-NOF16-NEXT: st.param.f32 [func_retval0+0], %f7; +; CHECK-NOF16-NEXT: st.param.f32 [func_retval0], %f7; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: minimum_float_ftz( @@ -849,7 +849,7 @@ define float @minimum_float_ftz(float %a, float %b) #1 { ; CHECK-F16-NEXT: ld.param.f32 %f1, [minimum_float_ftz_param_0]; ; CHECK-F16-NEXT: ld.param.f32 %f2, [minimum_float_ftz_param_1]; ; CHECK-F16-NEXT: min.NaN.ftz.f32 %f3, %f1, %f2; -; CHECK-F16-NEXT: st.param.f32 [func_retval0+0], %f3; +; CHECK-F16-NEXT: st.param.f32 [func_retval0], %f3; ; CHECK-F16-NEXT: ret; ; ; CHECK-SM80-NOF16-LABEL: minimum_float_ftz( @@ -860,7 +860,7 @@ define float @minimum_float_ftz(float %a, float %b) #1 { ; CHECK-SM80-NOF16-NEXT: ld.param.f32 %f1, [minimum_float_ftz_param_0]; ; CHECK-SM80-NOF16-NEXT: ld.param.f32 %f2, [minimum_float_ftz_param_1]; ; CHECK-SM80-NOF16-NEXT: min.NaN.ftz.f32 %f3, %f1, %f2; -; CHECK-SM80-NOF16-NEXT: st.param.f32 [func_retval0+0], %f3; +; CHECK-SM80-NOF16-NEXT: st.param.f32 [func_retval0], %f3; ; CHECK-SM80-NOF16-NEXT: ret; %x = call float @llvm.minimum.f32(float %a, float %b) ret float %x @@ -887,7 +887,7 @@ define double @minimum_double(double %a, double %b) { ; CHECK-NEXT: selp.f64 %fd6, %fd2, %fd5, %p3; ; CHECK-NEXT: setp.eq.f64 %p4, %fd4, 0d0000000000000000; ; CHECK-NEXT: selp.f64 %fd7, %fd6, %fd4, %p4; -; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd7; +; CHECK-NEXT: st.param.f64 [func_retval0], %fd7; ; CHECK-NEXT: ret; %x = call double @llvm.minimum.f64(double %a, double %b) ret double %x @@ -933,7 +933,7 @@ define <2 x half> @minimum_v2half(<2 x half> %a, <2 x half> %b) { ; CHECK-NOF16-NEXT: setp.eq.f32 %p10, %f6, 0f00000000; ; CHECK-NOF16-NEXT: selp.b16 %rs18, %rs17, %rs13, %p10; ; CHECK-NOF16-NEXT: mov.b32 %r3, {%rs18, %rs11}; -; CHECK-NOF16-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: minimum_v2half( @@ -944,7 +944,7 @@ define <2 x half> @minimum_v2half(<2 x half> %a, <2 x half> %b) { ; CHECK-F16-NEXT: ld.param.b32 %r1, [minimum_v2half_param_1]; ; CHECK-F16-NEXT: ld.param.b32 %r2, [minimum_v2half_param_0]; ; CHECK-F16-NEXT: min.NaN.f16x2 %r3, %r2, %r1; -; CHECK-F16-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-F16-NEXT: ret; ; ; CHECK-SM80-NOF16-LABEL: minimum_v2half( @@ -986,7 +986,7 @@ define <2 x half> @minimum_v2half(<2 x half> %a, <2 x half> %b) { ; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p10, %f6, 0f00000000; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs18, %rs17, %rs13, %p10; ; CHECK-SM80-NOF16-NEXT: mov.b32 %r3, {%rs18, %rs11}; -; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-SM80-NOF16-NEXT: ret; %x = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %b) ret <2 x half> %x @@ -1007,7 +1007,7 @@ define half @maxnum_half(half %a, half %b) { ; CHECK-NOF16-NEXT: cvt.f32.f16 %f2, %rs1; ; CHECK-NOF16-NEXT: max.f32 %f3, %f2, %f1; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs3, %f3; -; CHECK-NOF16-NEXT: st.param.b16 [func_retval0+0], %rs3; +; CHECK-NOF16-NEXT: st.param.b16 [func_retval0], %rs3; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: maxnum_half( @@ -1018,7 +1018,7 @@ define half @maxnum_half(half %a, half %b) { ; CHECK-F16-NEXT: ld.param.b16 %rs1, [maxnum_half_param_0]; ; CHECK-F16-NEXT: ld.param.b16 %rs2, [maxnum_half_param_1]; ; CHECK-F16-NEXT: max.f16 %rs3, %rs1, %rs2; -; CHECK-F16-NEXT: st.param.b16 [func_retval0+0], %rs3; +; CHECK-F16-NEXT: st.param.b16 [func_retval0], %rs3; ; CHECK-F16-NEXT: ret; ; ; CHECK-SM80-NOF16-LABEL: maxnum_half( @@ -1033,7 +1033,7 @@ define half @maxnum_half(half %a, half %b) { ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f2, %rs1; ; CHECK-SM80-NOF16-NEXT: max.f32 %f3, %f2, %f1; ; CHECK-SM80-NOF16-NEXT: cvt.rn.f16.f32 %rs3, %f3; -; CHECK-SM80-NOF16-NEXT: st.param.b16 [func_retval0+0], %rs3; +; CHECK-SM80-NOF16-NEXT: st.param.b16 [func_retval0], %rs3; ; CHECK-SM80-NOF16-NEXT: ret; %x = call half @llvm.maxnum.f16(half %a, half %b) ret half %x @@ -1047,7 +1047,7 @@ define float @maxnum_imm1(float %a) { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.f32 %f1, [maxnum_imm1_param_0]; ; CHECK-NEXT: max.f32 %f2, %f1, 0f00000000; -; CHECK-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; ; CHECK-NEXT: ret; %x = call float @llvm.maxnum.f32(float %a, float 0.0) ret float %x @@ -1061,7 +1061,7 @@ define float @maxnum_imm2(float %a) { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.f32 %f1, [maxnum_imm2_param_0]; ; CHECK-NEXT: max.f32 %f2, %f1, 0f00000000; -; CHECK-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; ; CHECK-NEXT: ret; %x = call float @llvm.maxnum.f32(float 0.0, float %a) ret float %x @@ -1076,7 +1076,7 @@ define float @maxnum_float(float %a, float %b) { ; CHECK-NEXT: ld.param.f32 %f1, [maxnum_float_param_0]; ; CHECK-NEXT: ld.param.f32 %f2, [maxnum_float_param_1]; ; CHECK-NEXT: max.f32 %f3, %f1, %f2; -; CHECK-NEXT: st.param.f32 [func_retval0+0], %f3; +; CHECK-NEXT: st.param.f32 [func_retval0], %f3; ; CHECK-NEXT: ret; %x = call float @llvm.maxnum.f32(float %a, float %b) ret float %x @@ -1091,7 +1091,7 @@ define float @maxnum_float_ftz(float %a, float %b) #1 { ; CHECK-NEXT: ld.param.f32 %f1, [maxnum_float_ftz_param_0]; ; CHECK-NEXT: ld.param.f32 %f2, [maxnum_float_ftz_param_1]; ; CHECK-NEXT: max.ftz.f32 %f3, %f1, %f2; -; CHECK-NEXT: st.param.f32 [func_retval0+0], %f3; +; CHECK-NEXT: st.param.f32 [func_retval0], %f3; ; CHECK-NEXT: ret; %x = call float @llvm.maxnum.f32(float %a, float %b) ret float %x @@ -1106,7 +1106,7 @@ define double @maxnum_double(double %a, double %b) { ; CHECK-NEXT: ld.param.f64 %fd1, [maxnum_double_param_0]; ; CHECK-NEXT: ld.param.f64 %fd2, [maxnum_double_param_1]; ; CHECK-NEXT: max.f64 %fd3, %fd1, %fd2; -; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd3; +; CHECK-NEXT: st.param.f64 [func_retval0], %fd3; ; CHECK-NEXT: ret; %x = call double @llvm.maxnum.f64(double %a, double %b) ret double %x @@ -1133,7 +1133,7 @@ define <2 x half> @maxnum_v2half(<2 x half> %a, <2 x half> %b) { ; CHECK-NOF16-NEXT: max.f32 %f6, %f5, %f4; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs6, %f6; ; CHECK-NOF16-NEXT: mov.b32 %r3, {%rs6, %rs5}; -; CHECK-NOF16-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: maxnum_v2half( @@ -1144,7 +1144,7 @@ define <2 x half> @maxnum_v2half(<2 x half> %a, <2 x half> %b) { ; CHECK-F16-NEXT: ld.param.b32 %r1, [maxnum_v2half_param_1]; ; CHECK-F16-NEXT: ld.param.b32 %r2, [maxnum_v2half_param_0]; ; CHECK-F16-NEXT: max.f16x2 %r3, %r2, %r1; -; CHECK-F16-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-F16-NEXT: ret; ; ; CHECK-SM80-NOF16-LABEL: maxnum_v2half( @@ -1167,7 +1167,7 @@ define <2 x half> @maxnum_v2half(<2 x half> %a, <2 x half> %b) { ; CHECK-SM80-NOF16-NEXT: max.f32 %f6, %f5, %f4; ; CHECK-SM80-NOF16-NEXT: cvt.rn.f16.f32 %rs6, %f6; ; CHECK-SM80-NOF16-NEXT: mov.b32 %r3, {%rs6, %rs5}; -; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-SM80-NOF16-NEXT: ret; %x = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> %b) ret <2 x half> %x @@ -1198,7 +1198,7 @@ define half @maximum_half(half %a, half %b) { ; CHECK-NOF16-NEXT: cvt.f32.f16 %f3, %rs5; ; CHECK-NOF16-NEXT: setp.eq.f32 %p5, %f3, 0f00000000; ; CHECK-NOF16-NEXT: selp.b16 %rs9, %rs8, %rs5, %p5; -; CHECK-NOF16-NEXT: st.param.b16 [func_retval0+0], %rs9; +; CHECK-NOF16-NEXT: st.param.b16 [func_retval0], %rs9; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: maximum_half( @@ -1209,7 +1209,7 @@ define half @maximum_half(half %a, half %b) { ; CHECK-F16-NEXT: ld.param.b16 %rs1, [maximum_half_param_0]; ; CHECK-F16-NEXT: ld.param.b16 %rs2, [maximum_half_param_1]; ; CHECK-F16-NEXT: max.NaN.f16 %rs3, %rs1, %rs2; -; CHECK-F16-NEXT: st.param.b16 [func_retval0+0], %rs3; +; CHECK-F16-NEXT: st.param.b16 [func_retval0], %rs3; ; CHECK-F16-NEXT: ret; ; ; CHECK-SM80-NOF16-LABEL: maximum_half( @@ -1234,7 +1234,7 @@ define half @maximum_half(half %a, half %b) { ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %f3, %rs5; ; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p5, %f3, 0f00000000; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs9, %rs8, %rs5, %p5; -; CHECK-SM80-NOF16-NEXT: st.param.b16 [func_retval0+0], %rs9; +; CHECK-SM80-NOF16-NEXT: st.param.b16 [func_retval0], %rs9; ; CHECK-SM80-NOF16-NEXT: ret; %x = call half @llvm.maximum.f16(half %a, half %b) ret half %x @@ -1253,7 +1253,7 @@ define float @maximum_imm1(float %a) { ; CHECK-NOF16-NEXT: selp.f32 %f3, 0f7FC00000, %f2, %p1; ; CHECK-NOF16-NEXT: setp.eq.f32 %p2, %f3, 0f00000000; ; CHECK-NOF16-NEXT: selp.f32 %f4, 0f00000000, %f3, %p2; -; CHECK-NOF16-NEXT: st.param.f32 [func_retval0+0], %f4; +; CHECK-NOF16-NEXT: st.param.f32 [func_retval0], %f4; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: maximum_imm1( @@ -1263,7 +1263,7 @@ define float @maximum_imm1(float %a) { ; CHECK-F16-NEXT: // %bb.0: ; CHECK-F16-NEXT: ld.param.f32 %f1, [maximum_imm1_param_0]; ; CHECK-F16-NEXT: max.NaN.f32 %f2, %f1, 0f00000000; -; CHECK-F16-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-F16-NEXT: st.param.f32 [func_retval0], %f2; ; CHECK-F16-NEXT: ret; ; ; CHECK-SM80-NOF16-LABEL: maximum_imm1( @@ -1273,7 +1273,7 @@ define float @maximum_imm1(float %a) { ; CHECK-SM80-NOF16-NEXT: // %bb.0: ; CHECK-SM80-NOF16-NEXT: ld.param.f32 %f1, [maximum_imm1_param_0]; ; CHECK-SM80-NOF16-NEXT: max.NaN.f32 %f2, %f1, 0f00000000; -; CHECK-SM80-NOF16-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-SM80-NOF16-NEXT: st.param.f32 [func_retval0], %f2; ; CHECK-SM80-NOF16-NEXT: ret; %x = call float @llvm.maximum.f32(float %a, float 0.0) ret float %x @@ -1292,7 +1292,7 @@ define float @maximum_imm2(float %a) { ; CHECK-NOF16-NEXT: selp.f32 %f3, 0f7FC00000, %f2, %p1; ; CHECK-NOF16-NEXT: setp.eq.f32 %p2, %f3, 0f00000000; ; CHECK-NOF16-NEXT: selp.f32 %f4, 0f00000000, %f3, %p2; -; CHECK-NOF16-NEXT: st.param.f32 [func_retval0+0], %f4; +; CHECK-NOF16-NEXT: st.param.f32 [func_retval0], %f4; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: maximum_imm2( @@ -1302,7 +1302,7 @@ define float @maximum_imm2(float %a) { ; CHECK-F16-NEXT: // %bb.0: ; CHECK-F16-NEXT: ld.param.f32 %f1, [maximum_imm2_param_0]; ; CHECK-F16-NEXT: max.NaN.f32 %f2, %f1, 0f00000000; -; CHECK-F16-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-F16-NEXT: st.param.f32 [func_retval0], %f2; ; CHECK-F16-NEXT: ret; ; ; CHECK-SM80-NOF16-LABEL: maximum_imm2( @@ -1312,7 +1312,7 @@ define float @maximum_imm2(float %a) { ; CHECK-SM80-NOF16-NEXT: // %bb.0: ; CHECK-SM80-NOF16-NEXT: ld.param.f32 %f1, [maximum_imm2_param_0]; ; CHECK-SM80-NOF16-NEXT: max.NaN.f32 %f2, %f1, 0f00000000; -; CHECK-SM80-NOF16-NEXT: st.param.f32 [func_retval0+0], %f2; +; CHECK-SM80-NOF16-NEXT: st.param.f32 [func_retval0], %f2; ; CHECK-SM80-NOF16-NEXT: ret; %x = call float @llvm.maximum.f32(float 0.0, float %a) ret float %x @@ -1339,7 +1339,7 @@ define float @maximum_float(float %a, float %b) { ; CHECK-NOF16-NEXT: selp.f32 %f6, %f2, %f5, %p3; ; CHECK-NOF16-NEXT: setp.eq.f32 %p4, %f4, 0f00000000; ; CHECK-NOF16-NEXT: selp.f32 %f7, %f6, %f4, %p4; -; CHECK-NOF16-NEXT: st.param.f32 [func_retval0+0], %f7; +; CHECK-NOF16-NEXT: st.param.f32 [func_retval0], %f7; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: maximum_float( @@ -1350,7 +1350,7 @@ define float @maximum_float(float %a, float %b) { ; CHECK-F16-NEXT: ld.param.f32 %f1, [maximum_float_param_0]; ; CHECK-F16-NEXT: ld.param.f32 %f2, [maximum_float_param_1]; ; CHECK-F16-NEXT: max.NaN.f32 %f3, %f1, %f2; -; CHECK-F16-NEXT: st.param.f32 [func_retval0+0], %f3; +; CHECK-F16-NEXT: st.param.f32 [func_retval0], %f3; ; CHECK-F16-NEXT: ret; ; ; CHECK-SM80-NOF16-LABEL: maximum_float( @@ -1361,7 +1361,7 @@ define float @maximum_float(float %a, float %b) { ; CHECK-SM80-NOF16-NEXT: ld.param.f32 %f1, [maximum_float_param_0]; ; CHECK-SM80-NOF16-NEXT: ld.param.f32 %f2, [maximum_float_param_1]; ; CHECK-SM80-NOF16-NEXT: max.NaN.f32 %f3, %f1, %f2; -; CHECK-SM80-NOF16-NEXT: st.param.f32 [func_retval0+0], %f3; +; CHECK-SM80-NOF16-NEXT: st.param.f32 [func_retval0], %f3; ; CHECK-SM80-NOF16-NEXT: ret; %x = call float @llvm.maximum.f32(float %a, float %b) ret float %x @@ -1388,7 +1388,7 @@ define float @maximum_float_ftz(float %a, float %b) #1 { ; CHECK-NOF16-NEXT: selp.f32 %f6, %f2, %f5, %p3; ; CHECK-NOF16-NEXT: setp.eq.ftz.f32 %p4, %f4, 0f00000000; ; CHECK-NOF16-NEXT: selp.f32 %f7, %f6, %f4, %p4; -; CHECK-NOF16-NEXT: st.param.f32 [func_retval0+0], %f7; +; CHECK-NOF16-NEXT: st.param.f32 [func_retval0], %f7; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: maximum_float_ftz( @@ -1399,7 +1399,7 @@ define float @maximum_float_ftz(float %a, float %b) #1 { ; CHECK-F16-NEXT: ld.param.f32 %f1, [maximum_float_ftz_param_0]; ; CHECK-F16-NEXT: ld.param.f32 %f2, [maximum_float_ftz_param_1]; ; CHECK-F16-NEXT: max.NaN.ftz.f32 %f3, %f1, %f2; -; CHECK-F16-NEXT: st.param.f32 [func_retval0+0], %f3; +; CHECK-F16-NEXT: st.param.f32 [func_retval0], %f3; ; CHECK-F16-NEXT: ret; ; ; CHECK-SM80-NOF16-LABEL: maximum_float_ftz( @@ -1410,7 +1410,7 @@ define float @maximum_float_ftz(float %a, float %b) #1 { ; CHECK-SM80-NOF16-NEXT: ld.param.f32 %f1, [maximum_float_ftz_param_0]; ; CHECK-SM80-NOF16-NEXT: ld.param.f32 %f2, [maximum_float_ftz_param_1]; ; CHECK-SM80-NOF16-NEXT: max.NaN.ftz.f32 %f3, %f1, %f2; -; CHECK-SM80-NOF16-NEXT: st.param.f32 [func_retval0+0], %f3; +; CHECK-SM80-NOF16-NEXT: st.param.f32 [func_retval0], %f3; ; CHECK-SM80-NOF16-NEXT: ret; %x = call float @llvm.maximum.f32(float %a, float %b) ret float %x @@ -1437,7 +1437,7 @@ define double @maximum_double(double %a, double %b) { ; CHECK-NEXT: selp.f64 %fd6, %fd2, %fd5, %p3; ; CHECK-NEXT: setp.eq.f64 %p4, %fd4, 0d0000000000000000; ; CHECK-NEXT: selp.f64 %fd7, %fd6, %fd4, %p4; -; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd7; +; CHECK-NEXT: st.param.f64 [func_retval0], %fd7; ; CHECK-NEXT: ret; %x = call double @llvm.maximum.f64(double %a, double %b) ret double %x @@ -1483,7 +1483,7 @@ define <2 x half> @maximum_v2half(<2 x half> %a, <2 x half> %b) { ; CHECK-NOF16-NEXT: setp.eq.f32 %p10, %f6, 0f00000000; ; CHECK-NOF16-NEXT: selp.b16 %rs18, %rs17, %rs13, %p10; ; CHECK-NOF16-NEXT: mov.b32 %r3, {%rs18, %rs11}; -; CHECK-NOF16-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: maximum_v2half( @@ -1494,7 +1494,7 @@ define <2 x half> @maximum_v2half(<2 x half> %a, <2 x half> %b) { ; CHECK-F16-NEXT: ld.param.b32 %r1, [maximum_v2half_param_1]; ; CHECK-F16-NEXT: ld.param.b32 %r2, [maximum_v2half_param_0]; ; CHECK-F16-NEXT: max.NaN.f16x2 %r3, %r2, %r1; -; CHECK-F16-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-F16-NEXT: ret; ; ; CHECK-SM80-NOF16-LABEL: maximum_v2half( @@ -1536,7 +1536,7 @@ define <2 x half> @maximum_v2half(<2 x half> %a, <2 x half> %b) { ; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p10, %f6, 0f00000000; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs18, %rs17, %rs13, %p10; ; CHECK-SM80-NOF16-NEXT: mov.b32 %r3, {%rs18, %rs11}; -; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-SM80-NOF16-NEXT: ret; %x = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b) ret <2 x half> %x @@ -1554,7 +1554,7 @@ define float @fma_float(float %a, float %b, float %c) { ; CHECK-NEXT: ld.param.f32 %f2, [fma_float_param_1]; ; CHECK-NEXT: ld.param.f32 %f3, [fma_float_param_2]; ; CHECK-NEXT: fma.rn.f32 %f4, %f1, %f2, %f3; -; CHECK-NEXT: st.param.f32 [func_retval0+0], %f4; +; CHECK-NEXT: st.param.f32 [func_retval0], %f4; ; CHECK-NEXT: ret; %x = call float @llvm.fma.f32(float %a, float %b, float %c) ret float %x @@ -1570,7 +1570,7 @@ define float @fma_float_ftz(float %a, float %b, float %c) #1 { ; CHECK-NEXT: ld.param.f32 %f2, [fma_float_ftz_param_1]; ; CHECK-NEXT: ld.param.f32 %f3, [fma_float_ftz_param_2]; ; CHECK-NEXT: fma.rn.ftz.f32 %f4, %f1, %f2, %f3; -; CHECK-NEXT: st.param.f32 [func_retval0+0], %f4; +; CHECK-NEXT: st.param.f32 [func_retval0], %f4; ; CHECK-NEXT: ret; %x = call float @llvm.fma.f32(float %a, float %b, float %c) ret float %x @@ -1586,7 +1586,7 @@ define double @fma_double(double %a, double %b, double %c) { ; CHECK-NEXT: ld.param.f64 %fd2, [fma_double_param_1]; ; CHECK-NEXT: ld.param.f64 %fd3, [fma_double_param_2]; ; CHECK-NEXT: fma.rn.f64 %fd4, %fd1, %fd2, %fd3; -; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd4; +; CHECK-NEXT: st.param.f64 [func_retval0], %fd4; ; CHECK-NEXT: ret; %x = call double @llvm.fma.f64(double %a, double %b, double %c) ret double %x diff --git a/llvm/test/CodeGen/NVPTX/mulhi-intrins.ll b/llvm/test/CodeGen/NVPTX/mulhi-intrins.ll index efa99462b9b11c..21fce55fcbc242 100644 --- a/llvm/test/CodeGen/NVPTX/mulhi-intrins.ll +++ b/llvm/test/CodeGen/NVPTX/mulhi-intrins.ll @@ -13,7 +13,7 @@ define i16 @test_mulhi_i16(i16 %x, i16 %y) { ; CHECK-NEXT: ld.param.u16 %rs2, [test_mulhi_i16_param_1]; ; CHECK-NEXT: mul.hi.s16 %rs3, %rs1, %rs2; ; CHECK-NEXT: cvt.u32.u16 %r1, %rs3; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; ; CHECK-NEXT: ret; %1 = call i16 @llvm.nvvm.mulhi.s(i16 %x, i16 %y) ret i16 %1 @@ -30,7 +30,7 @@ define i16 @test_mulhi_u16(i16 %x, i16 %y) { ; CHECK-NEXT: ld.param.u16 %rs2, [test_mulhi_u16_param_1]; ; CHECK-NEXT: mul.hi.u16 %rs3, %rs1, %rs2; ; CHECK-NEXT: cvt.u32.u16 %r1, %rs3; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; ; CHECK-NEXT: ret; %1 = call i16 @llvm.nvvm.mulhi.us(i16 %x, i16 %y) ret i16 %1 @@ -45,7 +45,7 @@ define i32 @test_mulhi_i32(i32 %x, i32 %y) { ; CHECK-NEXT: ld.param.u32 %r1, [test_mulhi_i32_param_0]; ; CHECK-NEXT: ld.param.u32 %r2, [test_mulhi_i32_param_1]; ; CHECK-NEXT: mul.hi.s32 %r3, %r1, %r2; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NEXT: ret; %1 = call i32 @llvm.nvvm.mulhi.i(i32 %x, i32 %y) ret i32 %1 @@ -60,7 +60,7 @@ define i32 @test_mulhi_u32(i32 %x, i32 %y) { ; CHECK-NEXT: ld.param.u32 %r1, [test_mulhi_u32_param_0]; ; CHECK-NEXT: ld.param.u32 %r2, [test_mulhi_u32_param_1]; ; CHECK-NEXT: mul.hi.u32 %r3, %r1, %r2; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r3; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NEXT: ret; %1 = call i32 @llvm.nvvm.mulhi.ui(i32 %x, i32 %y) ret i32 %1 @@ -75,7 +75,7 @@ define i64 @test_mulhi_i64(i64 %x, i64 %y) { ; CHECK-NEXT: ld.param.u64 %rd1, [test_mulhi_i64_param_0]; ; CHECK-NEXT: ld.param.u64 %rd2, [test_mulhi_i64_param_1]; ; CHECK-NEXT: mul.hi.s64 %rd3, %rd1, %rd2; -; CHECK-NEXT: st.param.b64 [func_retval0+0], %rd3; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; ; CHECK-NEXT: ret; %1 = call i64 @llvm.nvvm.mulhi.ll(i64 %x, i64 %y) ret i64 %1 @@ -90,7 +90,7 @@ define i64 @test_mulhi_u64(i64 %x, i64 %y) { ; CHECK-NEXT: ld.param.u64 %rd1, [test_mulhi_u64_param_0]; ; CHECK-NEXT: ld.param.u64 %rd2, [test_mulhi_u64_param_1]; ; CHECK-NEXT: mul.hi.u64 %rd3, %rd1, %rd2; -; CHECK-NEXT: st.param.b64 [func_retval0+0], %rd3; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; ; CHECK-NEXT: ret; %1 = call i64 @llvm.nvvm.mulhi.ull(i64 %x, i64 %y) ret i64 %1 diff --git a/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch-O0.ll b/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch-O0.ll index 0088d6c64205d2..1e45df5efcf538 100644 --- a/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch-O0.ll +++ b/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch-O0.ll @@ -9,17 +9,17 @@ declare i32 @__nvvm_reflect(ptr) ; SM_52: .visible .func (.param .b32 func_retval0) foo() ; SM_52: mov.b32 %[[REG:.+]], 3; -; SM_52-NEXT: st.param.b32 [func_retval0+0], %[[REG:.+]]; +; SM_52-NEXT: st.param.b32 [func_retval0], %[[REG:.+]]; ; SM_52-NEXT: ret; ; ; SM_70: .visible .func (.param .b32 func_retval0) foo() ; SM_70: mov.b32 %[[REG:.+]], 2; -; SM_70-NEXT: st.param.b32 [func_retval0+0], %[[REG:.+]]; +; SM_70-NEXT: st.param.b32 [func_retval0], %[[REG:.+]]; ; SM_70-NEXT: ret; ; ; SM_90: .visible .func (.param .b32 func_retval0) foo() ; SM_90: mov.b32 %[[REG:.+]], 1; -; SM_90-NEXT: st.param.b32 [func_retval0+0], %[[REG:.+]]; +; SM_90-NEXT: st.param.b32 [func_retval0], %[[REG:.+]]; ; SM_90-NEXT: ret; define i32 @foo() { entry: @@ -56,17 +56,17 @@ return: ; SM_52: .visible .func (.param .b32 func_retval0) bar() ; SM_52: mov.b32 %[[REG:.+]], 2; -; SM_52-NEXT: st.param.b32 [func_retval0+0], %[[REG:.+]]; +; SM_52-NEXT: st.param.b32 [func_retval0], %[[REG:.+]]; ; SM_52-NEXT: ret; ; ; SM_70: .visible .func (.param .b32 func_retval0) bar() ; SM_70: mov.b32 %[[REG:.+]], 1; -; SM_70-NEXT: st.param.b32 [func_retval0+0], %[[REG:.+]]; +; SM_70-NEXT: st.param.b32 [func_retval0], %[[REG:.+]]; ; SM_70-NEXT: ret; ; ; SM_90: .visible .func (.param .b32 func_retval0) bar() ; SM_90: mov.b32 %[[REG:.+]], 1; -; SM_90-NEXT: st.param.b32 [func_retval0+0], %[[REG:.+]]; +; SM_90-NEXT: st.param.b32 [func_retval0], %[[REG:.+]]; ; SM_90-NEXT: ret; define i32 @bar() { entry: @@ -104,17 +104,17 @@ if.end: ; SM_52: .visible .func (.param .b32 func_retval0) qux() ; SM_52: mov.b32 %[[REG:.+]], 3; -; SM_52-NEXT: st.param.b32 [func_retval0+0], %[[REG:.+]]; +; SM_52-NEXT: st.param.b32 [func_retval0], %[[REG:.+]]; ; SM_52-NEXT: ret; ; ; SM_70: .visible .func (.param .b32 func_retval0) qux() ; SM_70: mov.b32 %[[REG:.+]], 2; -; SM_70-NEXT: st.param.b32 [func_retval0+0], %[[REG:.+]]; +; SM_70-NEXT: st.param.b32 [func_retval0], %[[REG:.+]]; ; SM_70-NEXT: ret; ; ; SM_90: .visible .func (.param .b32 func_retval0) qux() ; SM_90: mov.b32 %[[REG:.+]], 1; -; SM_90-NEXT: st.param.b32 [func_retval0+0], %[[REG:.+]]; +; SM_90-NEXT: st.param.b32 [func_retval0], %[[REG:.+]]; ; SM_90-NEXT: ret; define i32 @qux() { entry: @@ -144,15 +144,15 @@ return: ; SM_52: .visible .func (.param .b32 func_retval0) phi() ; SM_52: mov.f32 %[[REG:.+]], 0f00000000; -; SM_52-NEXT: st.param.f32 [func_retval0+0], %[[REG]]; +; SM_52-NEXT: st.param.f32 [func_retval0], %[[REG]]; ; SM_52-NEXT: ret; ; SM_70: .visible .func (.param .b32 func_retval0) phi() ; SM_70: mov.f32 %[[REG:.+]], 0f00000000; -; SM_70-NEXT: st.param.f32 [func_retval0+0], %[[REG]]; +; SM_70-NEXT: st.param.f32 [func_retval0], %[[REG]]; ; SM_70-NEXT: ret; ; SM_90: .visible .func (.param .b32 func_retval0) phi() ; SM_90: mov.f32 %[[REG:.+]], 0f00000000; -; SM_90-NEXT: st.param.f32 [func_retval0+0], %[[REG]]; +; SM_90-NEXT: st.param.f32 [func_retval0], %[[REG]]; ; SM_90-NEXT: ret; define float @phi() { entry: @@ -177,17 +177,17 @@ exit: ; SM_52: .visible .func (.param .b32 func_retval0) prop() ; SM_52: mov.b32 %[[REG:.+]], 3; -; SM_52-NEXT: st.param.b32 [func_retval0+0], %[[REG:.+]]; +; SM_52-NEXT: st.param.b32 [func_retval0], %[[REG:.+]]; ; SM_52-NEXT: ret; ; ; SM_70: .visible .func (.param .b32 func_retval0) prop() ; SM_70: mov.b32 %[[REG:.+]], 2; -; SM_70-NEXT: st.param.b32 [func_retval0+0], %[[REG:.+]]; +; SM_70-NEXT: st.param.b32 [func_retval0], %[[REG:.+]]; ; SM_70-NEXT: ret; ; ; SM_90: .visible .func (.param .b32 func_retval0) prop() ; SM_90: mov.b32 %[[REG:.+]], 1; -; SM_90-NEXT: st.param.b32 [func_retval0+0], %[[REG:.+]]; +; SM_90-NEXT: st.param.b32 [func_retval0], %[[REG:.+]]; ; SM_90-NEXT: ret; define i32 @prop() { entry: diff --git a/llvm/test/CodeGen/NVPTX/param-load-store.ll b/llvm/test/CodeGen/NVPTX/param-load-store.ll index a29d4e1875cd7b..bb95f88e999d29 100644 --- a/llvm/test/CodeGen/NVPTX/param-load-store.ll +++ b/llvm/test/CodeGen/NVPTX/param-load-store.ll @@ -30,13 +30,13 @@ ; CHECK: cvt.u32.u16 [[B:%r[0-9]+]], [[A8]] ; CHECK: and.b32 [[C:%r[0-9]+]], [[B]], 1; ; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0+0], [[C]] +; CHECK: st.param.b32 [param0], [[C]] ; CHECK: .param .b32 retval0; ; CHECK: call.uni ; CHECK-NEXT: test_i1, -; CHECK: ld.param.b32 [[R8:%r[0-9]+]], [retval0+0]; +; CHECK: ld.param.b32 [[R8:%r[0-9]+]], [retval0]; ; CHECK: and.b32 [[R:%r[0-9]+]], [[R8]], 1; -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK: ret; define i1 @test_i1(i1 %a) { %r = tail call i1 @test_i1(i1 %a); @@ -53,13 +53,13 @@ define i1 @test_i1(i1 %a) { ; CHECK: and.b32 [[A1:%r[0-9]+]], [[A32]], 1; ; CHECK: neg.s32 [[A:%r[0-9]+]], [[A1]]; ; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0+0], [[A]]; +; CHECK: st.param.b32 [param0], [[A]]; ; CHECK: .param .b32 retval0; ; CHECK: call.uni -; CHECK: ld.param.b32 [[R8:%r[0-9]+]], [retval0+0]; +; CHECK: ld.param.b32 [[R8:%r[0-9]+]], [retval0]; ; CHECK: and.b32 [[R1:%r[0-9]+]], [[R8]], 1; ; CHECK: neg.s32 [[R:%r[0-9]+]], [[R1]]; -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define signext i1 @test_i1s(i1 signext %a) { %r = tail call signext i1 @test_i1s(i1 signext %a); @@ -73,14 +73,14 @@ define signext i1 @test_i1s(i1 signext %a) { ; CHECK-DAG: ld.param.u8 [[E2:%rs[0-9]+]], [test_v3i1_param_0+2]; ; CHECK-DAG: ld.param.u8 [[E0:%rs[0-9]+]], [test_v3i1_param_0] ; CHECK: .param .align 1 .b8 param0[1]; -; CHECK-DAG: st.param.b8 [param0+0], [[E0]]; +; CHECK-DAG: st.param.b8 [param0], [[E0]]; ; CHECK-DAG: st.param.b8 [param0+2], [[E2]]; ; CHECK: .param .align 1 .b8 retval0[1]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_v3i1, -; CHECK-DAG: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0+0]; +; CHECK-DAG: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0]; ; CHECK-DAG: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+2]; -; CHECK-DAG: st.param.b8 [func_retval0+0], [[RE0]] +; CHECK-DAG: st.param.b8 [func_retval0], [[RE0]] ; CHECK-DAG: st.param.b8 [func_retval0+2], [[RE2]]; ; CHECK-NEXT: ret; define <3 x i1> @test_v3i1(<3 x i1> %a) { @@ -93,15 +93,15 @@ define <3 x i1> @test_v3i1(<3 x i1> %a) { ; CHECK-NEXT: .param .align 1 .b8 test_v4i1_param_0[1] ; CHECK: ld.param.u8 [[E0:%rs[0-9]+]], [test_v4i1_param_0] ; CHECK: .param .align 1 .b8 param0[1]; -; CHECK: st.param.b8 [param0+0], [[E0]]; +; CHECK: st.param.b8 [param0], [[E0]]; ; CHECK: .param .align 1 .b8 retval0[1]; ; CHECK: call.uni (retval0), ; CHECK: test_v4i1, -; CHECK: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0+0]; +; CHECK: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0]; ; CHECK: ld.param.b8 [[RE1:%rs[0-9]+]], [retval0+1]; ; CHECK: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+2]; ; CHECK: ld.param.b8 [[RE3:%rs[0-9]+]], [retval0+3]; -; CHECK: st.param.b8 [func_retval0+0], [[RE0]]; +; CHECK: st.param.b8 [func_retval0], [[RE0]]; ; CHECK: st.param.b8 [func_retval0+1], [[RE1]]; ; CHECK: st.param.b8 [func_retval0+2], [[RE2]]; ; CHECK: st.param.b8 [func_retval0+3], [[RE3]]; @@ -117,14 +117,14 @@ define <4 x i1> @test_v4i1(<4 x i1> %a) { ; CHECK-DAG: ld.param.u8 [[E4:%rs[0-9]+]], [test_v5i1_param_0+4]; ; CHECK-DAG: ld.param.u8 [[E0:%rs[0-9]+]], [test_v5i1_param_0] ; CHECK: .param .align 1 .b8 param0[1]; -; CHECK-DAG: st.param.b8 [param0+0], [[E0]]; +; CHECK-DAG: st.param.b8 [param0], [[E0]]; ; CHECK-DAG: st.param.b8 [param0+4], [[E4]]; ; CHECK: .param .align 1 .b8 retval0[1]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_v5i1, -; CHECK-DAG: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0+0]; +; CHECK-DAG: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0]; ; CHECK-DAG: ld.param.b8 [[RE4:%rs[0-9]+]], [retval0+4]; -; CHECK-DAG: st.param.b8 [func_retval0+0], [[RE0]] +; CHECK-DAG: st.param.b8 [func_retval0], [[RE0]] ; CHECK-DAG: st.param.b8 [func_retval0+4], [[RE4]]; ; CHECK-NEXT: ret; define <5 x i1> @test_v5i1(<5 x i1> %a) { @@ -137,12 +137,12 @@ define <5 x i1> @test_v5i1(<5 x i1> %a) { ; CHECK-NEXT: .param .b32 test_i2_param_0 ; CHECK: ld.param.u8 {{%rs[0-9]+}}, [test_i2_param_0]; ; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0+0], {{%r[0-9]+}}; +; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: .param .b32 retval0; ; CHECK: call.uni (retval0), ; CHECK: test_i2, -; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+0]; -; CHECK: st.param.b32 [func_retval0+0], {{%r[0-9]+}}; +; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; +; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}}; ; CHECK-NEXT: ret; define i2 @test_i2(i2 %a) { %r = tail call i2 @test_i2(i2 %a); @@ -154,12 +154,12 @@ define i2 @test_i2(i2 %a) { ; CHECK-NEXT: .param .b32 test_i3_param_0 ; CHECK: ld.param.u8 {{%rs[0-9]+}}, [test_i3_param_0]; ; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0+0], {{%r[0-9]+}}; +; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: .param .b32 retval0; ; CHECK: call.uni (retval0), ; CHECK: test_i3, -; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+0]; -; CHECK: st.param.b32 [func_retval0+0], {{%r[0-9]+}}; +; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; +; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}}; ; CHECK-NEXT: ret; define i3 @test_i3(i3 %a) { %r = tail call i3 @test_i3(i3 %a); @@ -174,13 +174,13 @@ define i3 @test_i3(i3 %a) { ; CHECK: cvt.u32.u16 [[A32:%r[0-9]+]], [[A8]]; ; CHECK: and.b32 [[A:%r[0-9]+]], [[A32]], 255; ; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0+0], [[A]]; +; CHECK: st.param.b32 [param0], [[A]]; ; CHECK: .param .b32 retval0; ; CHECK: call.uni (retval0), ; CHECK: test_i8, -; CHECK: ld.param.b32 [[R32:%r[0-9]+]], [retval0+0]; +; CHECK: ld.param.b32 [[R32:%r[0-9]+]], [retval0]; ; CHECK: and.b32 [[R:%r[0-9]+]], [[R32]], 255; -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define i8 @test_i8(i8 %a) { %r = tail call i8 @test_i8(i8 %a); @@ -194,15 +194,15 @@ define i8 @test_i8(i8 %a) { ; CHECK: ld.param.s8 [[A8:%rs[0-9]+]], [test_i8s_param_0]; ; CHECK: cvt.s32.s16 [[A:%r[0-9]+]], [[A8]]; ; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0+0], [[A]]; +; CHECK: st.param.b32 [param0], [[A]]; ; CHECK: .param .b32 retval0; ; CHECK: call.uni (retval0), ; CHECK: test_i8s, -; CHECK: ld.param.b32 [[R32:%r[0-9]+]], [retval0+0]; +; CHECK: ld.param.b32 [[R32:%r[0-9]+]], [retval0]; ; -- This is suspicious (though correct) -- why not cvt.u8.u32, cvt.s8.s32 ? ; CHECK: cvt.u16.u32 [[R16:%rs[0-9]+]], [[R32]]; ; CHECK: cvt.s32.s16 [[R:%r[0-9]+]], [[R16]]; -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define signext i8 @test_i8s(i8 signext %a) { %r = tail call signext i8 @test_i8s(i8 signext %a); @@ -214,14 +214,14 @@ define signext i8 @test_i8s(i8 signext %a) { ; CHECK-NEXT: .param .align 4 .b8 test_v3i8_param_0[4] ; CHECK: ld.param.u32 [[R:%r[0-9]+]], [test_v3i8_param_0]; ; CHECK: .param .align 4 .b8 param0[4]; -; CHECK: st.param.b32 [param0+0], [[R]] +; CHECK: st.param.b32 [param0], [[R]] ; CHECK: .param .align 4 .b8 retval0[4]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_v3i8, -; CHECK: ld.param.b32 [[RE:%r[0-9]+]], [retval0+0]; +; CHECK: ld.param.b32 [[RE:%r[0-9]+]], [retval0]; ; v4i8/i32->{v3i8 elements}->v4i8/i32 conversion is messy and not very ; interesting here, so it's skipped. -; CHECK: st.param.b32 [func_retval0+0], +; CHECK: st.param.b32 [func_retval0], ; CHECK-NEXT: ret; define <3 x i8> @test_v3i8(<3 x i8> %a) { %r = tail call <3 x i8> @test_v3i8(<3 x i8> %a); @@ -233,12 +233,12 @@ define <3 x i8> @test_v3i8(<3 x i8> %a) { ; CHECK-NEXT: .param .align 4 .b8 test_v4i8_param_0[4] ; CHECK: ld.param.u32 [[R:%r[0-9]+]], [test_v4i8_param_0] ; CHECK: .param .align 4 .b8 param0[4]; -; CHECK: st.param.b32 [param0+0], [[R]]; +; CHECK: st.param.b32 [param0], [[R]]; ; CHECK: .param .align 4 .b8 retval0[4]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_v4i8, -; CHECK: ld.param.b32 [[RET:%r[0-9]+]], [retval0+0]; -; CHECK: st.param.b32 [func_retval0+0], [[RET]]; +; CHECK: ld.param.b32 [[RET:%r[0-9]+]], [retval0]; +; CHECK: st.param.b32 [func_retval0], [[RET]]; ; CHECK-NEXT: ret; define <4 x i8> @test_v4i8(<4 x i8> %a) { %r = tail call <4 x i8> @test_v4i8(<4 x i8> %a); @@ -251,14 +251,14 @@ define <4 x i8> @test_v4i8(<4 x i8> %a) { ; CHECK-DAG: ld.param.u32 [[E0:%r[0-9]+]], [test_v5i8_param_0] ; CHECK-DAG: ld.param.u8 [[E4:%rs[0-9]+]], [test_v5i8_param_0+4]; ; CHECK: .param .align 8 .b8 param0[8]; -; CHECK-DAG: st.param.v4.b8 [param0+0], +; CHECK-DAG: st.param.v4.b8 [param0], ; CHECK-DAG: st.param.b8 [param0+4], [[E4]]; ; CHECK: .param .align 8 .b8 retval0[8]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_v5i8, -; CHECK-DAG: ld.param.v4.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0]; +; CHECK-DAG: ld.param.v4.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0]; ; CHECK-DAG: ld.param.b8 [[RE4:%rs[0-9]+]], [retval0+4]; -; CHECK-DAG: st.param.v4.b8 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} +; CHECK-DAG: st.param.v4.b8 [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} ; CHECK-DAG: st.param.b8 [func_retval0+4], [[RE4]]; ; CHECK-NEXT: ret; define <5 x i8> @test_v5i8(<5 x i8> %a) { @@ -270,12 +270,12 @@ define <5 x i8> @test_v5i8(<5 x i8> %a) { ; CHECK-LABEL: test_i11( ; CHECK-NEXT: .param .b32 test_i11_param_0 ; CHECK: ld.param.u16 {{%rs[0-9]+}}, [test_i11_param_0]; -; CHECK: st.param.b32 [param0+0], {{%r[0-9]+}}; +; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: .param .b32 retval0; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_i11, -; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+0]; -; CHECK: st.param.b32 [func_retval0+0], {{%r[0-9]+}}; +; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; +; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}}; ; CHECK-NEXT: ret; define i11 @test_i11(i11 %a) { %r = tail call i11 @test_i11(i11 %a); @@ -288,13 +288,13 @@ define i11 @test_i11(i11 %a) { ; CHECK: ld.param.u16 [[E16:%rs[0-9]+]], [test_i16_param_0]; ; CHECK: cvt.u32.u16 [[E32:%r[0-9]+]], [[E16]]; ; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0+0], [[E32]]; +; CHECK: st.param.b32 [param0], [[E32]]; ; CHECK: .param .b32 retval0; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_i16, -; CHECK: ld.param.b32 [[RE32:%r[0-9]+]], [retval0+0]; +; CHECK: ld.param.b32 [[RE32:%r[0-9]+]], [retval0]; ; CHECK: and.b32 [[R:%r[0-9]+]], [[RE32]], 65535; -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define i16 @test_i16(i16 %a) { %r = tail call i16 @test_i16(i16 %a); @@ -307,13 +307,13 @@ define i16 @test_i16(i16 %a) { ; CHECK: ld.param.u16 [[E16:%rs[0-9]+]], [test_i16s_param_0]; ; CHECK: cvt.s32.s16 [[E32:%r[0-9]+]], [[E16]]; ; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0+0], [[E32]]; +; CHECK: st.param.b32 [param0], [[E32]]; ; CHECK: .param .b32 retval0; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_i16s, -; CHECK: ld.param.b32 [[RE32:%r[0-9]+]], [retval0+0]; +; CHECK: ld.param.b32 [[RE32:%r[0-9]+]], [retval0]; ; CHECK: cvt.s32.s16 [[R:%r[0-9]+]], [[RE32]]; -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define signext i16 @test_i16s(i16 signext %a) { %r = tail call signext i16 @test_i16s(i16 signext %a); @@ -327,14 +327,14 @@ define signext i16 @test_i16s(i16 signext %a) { ; CHECK-DAG: ld.param.u32 [[R:%r[0-9]+]], [test_v3i16_param_0]; ; CHECK-DAG: mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[R]]; ; CHECK: .param .align 8 .b8 param0[8]; -; CHECK: st.param.v2.b16 [param0+0], {[[E0]], [[E1]]}; +; CHECK: st.param.v2.b16 [param0], {[[E0]], [[E1]]}; ; CHECK: st.param.b16 [param0+4], [[E2]]; ; CHECK: .param .align 8 .b8 retval0[8]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_v3i16, -; CHECK: ld.param.v2.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [retval0+0]; +; CHECK: ld.param.v2.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [retval0]; ; CHECK: ld.param.b16 [[RE2:%rs[0-9]+]], [retval0+4]; -; CHECK-DAG: st.param.v2.b16 [func_retval0+0], {[[RE0]], [[RE1]]}; +; CHECK-DAG: st.param.v2.b16 [func_retval0], {[[RE0]], [[RE1]]}; ; CHECK-DAG: st.param.b16 [func_retval0+4], [[RE2]]; ; CHECK-NEXT: ret; define <3 x i16> @test_v3i16(<3 x i16> %a) { @@ -347,12 +347,12 @@ define <3 x i16> @test_v3i16(<3 x i16> %a) { ; CHECK-NEXT: .param .align 8 .b8 test_v4i16_param_0[8] ; CHECK: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v4i16_param_0] ; CHECK: .param .align 8 .b8 param0[8]; -; CHECK: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]}; +; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]}; ; CHECK: .param .align 8 .b8 retval0[8]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_v4i16, -; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0]; -; CHECK: st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]} +; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0]; +; CHECK: st.param.v2.b32 [func_retval0], {[[RE0]], [[RE1]]} ; CHECK-NEXT: ret; define <4 x i16> @test_v4i16(<4 x i16> %a) { %r = tail call <4 x i16> @test_v4i16(<4 x i16> %a); @@ -365,14 +365,14 @@ define <4 x i16> @test_v4i16(<4 x i16> %a) { ; CHECK-DAG: ld.param.u16 [[E4:%rs[0-9]+]], [test_v5i16_param_0+8]; ; CHECK-DAG: ld.param.v4.u16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i16_param_0] ; CHECK: .param .align 16 .b8 param0[16]; -; CHECK-DAG: st.param.v4.b16 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]}; +; CHECK-DAG: st.param.v4.b16 [param0], {[[E0]], [[E1]], [[E2]], [[E3]]}; ; CHECK-DAG: st.param.b16 [param0+8], [[E4]]; ; CHECK: .param .align 16 .b8 retval0[16]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_v5i16, -; CHECK-DAG: ld.param.v4.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0]; +; CHECK-DAG: ld.param.v4.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0]; ; CHECK-DAG: ld.param.b16 [[RE4:%rs[0-9]+]], [retval0+8]; -; CHECK-DAG: st.param.v4.b16 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} +; CHECK-DAG: st.param.v4.b16 [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} ; CHECK-DAG: st.param.b16 [func_retval0+8], [[RE4]]; ; CHECK-NEXT: ret; define <5 x i16> @test_v5i16(<5 x i16> %a) { @@ -385,12 +385,12 @@ define <5 x i16> @test_v5i16(<5 x i16> %a) { ; CHECK-NEXT: .param .align 2 .b8 test_f16_param_0[2] ; CHECK: ld.param.b16 [[E:%rs[0-9]+]], [test_f16_param_0]; ; CHECK: .param .align 2 .b8 param0[2]; -; CHECK: st.param.b16 [param0+0], [[E]]; +; CHECK: st.param.b16 [param0], [[E]]; ; CHECK: .param .align 2 .b8 retval0[2]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_f16, -; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0]; -; CHECK: st.param.b16 [func_retval0+0], [[R]] +; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0]; +; CHECK: st.param.b16 [func_retval0], [[R]] ; CHECK-NEXT: ret; define half @test_f16(half %a) { %r = tail call half @test_f16(half %a); @@ -402,12 +402,12 @@ define half @test_f16(half %a) { ; CHECK-NEXT: .param .align 4 .b8 test_v2f16_param_0[4] ; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_v2f16_param_0]; ; CHECK: .param .align 4 .b8 param0[4]; -; CHECK: st.param.b32 [param0+0], [[E]]; +; CHECK: st.param.b32 [param0], [[E]]; ; CHECK: .param .align 4 .b8 retval0[4]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_v2f16, -; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0+0]; -; CHECK: st.param.b32 [func_retval0+0], [[R]] +; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0]; +; CHECK: st.param.b32 [func_retval0], [[R]] ; CHECK-NEXT: ret; define <2 x half> @test_v2f16(<2 x half> %a) { %r = tail call <2 x half> @test_v2f16(<2 x half> %a); @@ -419,12 +419,12 @@ define <2 x half> @test_v2f16(<2 x half> %a) { ; CHECK-NEXT: .param .align 2 .b8 test_bf16_param_0[2] ; CHECK: ld.param.b16 [[E:%rs[0-9]+]], [test_bf16_param_0]; ; CHECK: .param .align 2 .b8 param0[2]; -; CHECK: st.param.b16 [param0+0], [[E]]; +; CHECK: st.param.b16 [param0], [[E]]; ; CHECK: .param .align 2 .b8 retval0[2]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_bf16, -; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0]; -; CHECK: st.param.b16 [func_retval0+0], [[R]] +; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0]; +; CHECK: st.param.b16 [func_retval0], [[R]] ; CHECK-NEXT: ret; define bfloat @test_bf16(bfloat %a) { %r = tail call bfloat @test_bf16(bfloat %a); @@ -436,12 +436,12 @@ define bfloat @test_bf16(bfloat %a) { ; CHECK-NEXT: .param .align 4 .b8 test_v2bf16_param_0[4] ; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_v2bf16_param_0]; ; CHECK: .param .align 4 .b8 param0[4]; -; CHECK: st.param.b32 [param0+0], [[E]]; +; CHECK: st.param.b32 [param0], [[E]]; ; CHECK: .param .align 4 .b8 retval0[4]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_v2bf16, -; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0+0]; -; CHECK: st.param.b32 [func_retval0+0], [[R]] +; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0]; +; CHECK: st.param.b32 [func_retval0], [[R]] ; CHECK-NEXT: ret; define <2 x bfloat> @test_v2bf16(<2 x bfloat> %a) { %r = tail call <2 x bfloat> @test_v2bf16(<2 x bfloat> %a); @@ -456,14 +456,14 @@ define <2 x bfloat> @test_v2bf16(<2 x bfloat> %a) { ; CHECK-DAG: mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[HH01]]; ; CHECK-DAG: ld.param.b16 [[E2:%rs[0-9]+]], [test_v3f16_param_0+4]; ; CHECK: .param .align 8 .b8 param0[8]; -; CHECK-DAG: st.param.v2.b16 [param0+0], {[[E0]], [[E1]]}; +; CHECK-DAG: st.param.v2.b16 [param0], {[[E0]], [[E1]]}; ; CHECK-DAG: st.param.b16 [param0+4], [[E2]]; ; CHECK: .param .align 8 .b8 retval0[8]; ; CHECK: call.uni (retval0), ; CHECK: test_v3f16, -; CHECK-DAG: ld.param.v2.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]]}, [retval0+0]; +; CHECK-DAG: ld.param.v2.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]]}, [retval0]; ; CHECK-DAG: ld.param.b16 [[R2:%rs[0-9]+]], [retval0+4]; -; CHECK-DAG: st.param.v2.b16 [func_retval0+0], {[[R0]], [[R1]]}; +; CHECK-DAG: st.param.v2.b16 [func_retval0], {[[R0]], [[R1]]}; ; CHECK-DAG: st.param.b16 [func_retval0+4], [[R2]]; ; CHECK: ret; define <3 x half> @test_v3f16(<3 x half> %a) { @@ -476,12 +476,12 @@ define <3 x half> @test_v3f16(<3 x half> %a) { ; CHECK: .param .align 8 .b8 test_v4f16_param_0[8] ; CHECK: ld.param.v2.u32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]]}, [test_v4f16_param_0]; ; CHECK: .param .align 8 .b8 param0[8]; -; CHECK: st.param.v2.b32 [param0+0], {[[R01]], [[R23]]}; +; CHECK: st.param.v2.b32 [param0], {[[R01]], [[R23]]}; ; CHECK: .param .align 8 .b8 retval0[8]; ; CHECK: call.uni (retval0), ; CHECK: test_v4f16, -; CHECK: ld.param.v2.b32 {[[RH01:%r[0-9]+]], [[RH23:%r[0-9]+]]}, [retval0+0]; -; CHECK: st.param.v2.b32 [func_retval0+0], {[[RH01]], [[RH23]]}; +; CHECK: ld.param.v2.b32 {[[RH01:%r[0-9]+]], [[RH23:%r[0-9]+]]}, [retval0]; +; CHECK: st.param.v2.b32 [func_retval0], {[[RH01]], [[RH23]]}; ; CHECK: ret; define <4 x half> @test_v4f16(<4 x half> %a) { %r = tail call <4 x half> @test_v4f16(<4 x half> %a); @@ -494,14 +494,14 @@ define <4 x half> @test_v4f16(<4 x half> %a) { ; CHECK-DAG: ld.param.v4.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5f16_param_0]; ; CHECK-DAG: ld.param.b16 [[E4:%rs[0-9]+]], [test_v5f16_param_0+8]; ; CHECK: .param .align 16 .b8 param0[16]; -; CHECK-DAG: st.param.v4.b16 [param0+0], +; CHECK-DAG: st.param.v4.b16 [param0], ; CHECK-DAG: st.param.b16 [param0+8], [[E4]]; ; CHECK: .param .align 16 .b8 retval0[16]; ; CHECK: call.uni (retval0), ; CHECK: test_v5f16, -; CHECK-DAG: ld.param.v4.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]], [[R2:%rs[0-9]+]], [[R3:%rs[0-9]+]]}, [retval0+0]; +; CHECK-DAG: ld.param.v4.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]], [[R2:%rs[0-9]+]], [[R3:%rs[0-9]+]]}, [retval0]; ; CHECK-DAG: ld.param.b16 [[R4:%rs[0-9]+]], [retval0+8]; -; CHECK-DAG: st.param.v4.b16 [func_retval0+0], {[[R0]], [[R1]], [[R2]], [[R3]]}; +; CHECK-DAG: st.param.v4.b16 [func_retval0], {[[R0]], [[R1]], [[R2]], [[R3]]}; ; CHECK-DAG: st.param.b16 [func_retval0+8], [[R4]]; ; CHECK: ret; define <5 x half> @test_v5f16(<5 x half> %a) { @@ -514,12 +514,12 @@ define <5 x half> @test_v5f16(<5 x half> %a) { ; CHECK: .param .align 16 .b8 test_v8f16_param_0[16] ; CHECK: ld.param.v4.u32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]], [[R45:%r[0-9]+]], [[R67:%r[0-9]+]]}, [test_v8f16_param_0]; ; CHECK: .param .align 16 .b8 param0[16]; -; CHECK: st.param.v4.b32 [param0+0], {[[R01]], [[R23]], [[R45]], [[R67]]}; +; CHECK: st.param.v4.b32 [param0], {[[R01]], [[R23]], [[R45]], [[R67]]}; ; CHECK: .param .align 16 .b8 retval0[16]; ; CHECK: call.uni (retval0), ; CHECK: test_v8f16, -; CHECK: ld.param.v4.b32 {[[RH01:%r[0-9]+]], [[RH23:%r[0-9]+]], [[RH45:%r[0-9]+]], [[RH67:%r[0-9]+]]}, [retval0+0]; -; CHECK: st.param.v4.b32 [func_retval0+0], {[[RH01]], [[RH23]], [[RH45]], [[RH67]]}; +; CHECK: ld.param.v4.b32 {[[RH01:%r[0-9]+]], [[RH23:%r[0-9]+]], [[RH45:%r[0-9]+]], [[RH67:%r[0-9]+]]}, [retval0]; +; CHECK: st.param.v4.b32 [func_retval0], {[[RH01]], [[RH23]], [[RH45]], [[RH67]]}; ; CHECK: ret; define <8 x half> @test_v8f16(<8 x half> %a) { %r = tail call <8 x half> @test_v8f16(<8 x half> %a); @@ -533,16 +533,16 @@ define <8 x half> @test_v8f16(<8 x half> %a) { ; CHECK-DAG: ld.param.v4.b16 {[[E4:%rs[0-9]+]], [[E5:%rs[0-9]+]], [[E6:%rs[0-9]+]], [[E7:%rs[0-9]+]]}, [test_v9f16_param_0+8]; ; CHECK-DAG: ld.param.b16 [[E8:%rs[0-9]+]], [test_v9f16_param_0+16]; ; CHECK: .param .align 32 .b8 param0[32]; -; CHECK-DAG: st.param.v4.b16 [param0+0], +; CHECK-DAG: st.param.v4.b16 [param0], ; CHECK-DAG: st.param.v4.b16 [param0+8], ; CHECK-DAG: st.param.b16 [param0+16], [[E8]]; ; CHECK: .param .align 32 .b8 retval0[32]; ; CHECK: call.uni (retval0), ; CHECK: test_v9f16, -; CHECK-DAG: ld.param.v4.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]], [[R2:%rs[0-9]+]], [[R3:%rs[0-9]+]]}, [retval0+0]; +; CHECK-DAG: ld.param.v4.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]], [[R2:%rs[0-9]+]], [[R3:%rs[0-9]+]]}, [retval0]; ; CHECK-DAG: ld.param.v4.b16 {[[R4:%rs[0-9]+]], [[R5:%rs[0-9]+]], [[R6:%rs[0-9]+]], [[R7:%rs[0-9]+]]}, [retval0+8]; ; CHECK-DAG: ld.param.b16 [[R8:%rs[0-9]+]], [retval0+16]; -; CHECK-DAG: st.param.v4.b16 [func_retval0+0], {[[R0]], [[R1]], [[R2]], [[R3]]}; +; CHECK-DAG: st.param.v4.b16 [func_retval0], {[[R0]], [[R1]], [[R2]], [[R3]]}; ; CHECK-DAG: st.param.v4.b16 [func_retval0+8], {[[R4]], [[R5]], [[R6]], [[R7]]}; ; CHECK-DAG: st.param.b16 [func_retval0+16], [[R8]]; ; CHECK: ret; @@ -557,12 +557,12 @@ define <9 x half> @test_v9f16(<9 x half> %a) { ; CHECK-DAG: ld.param.u16 {{%r[0-9]+}}, [test_i19_param_0]; ; CHECK-DAG: ld.param.u8 {{%r[0-9]+}}, [test_i19_param_0+2]; ; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0+0], {{%r[0-9]+}}; +; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: .param .b32 retval0; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_i19, -; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+0]; -; CHECK: st.param.b32 [func_retval0+0], {{%r[0-9]+}}; +; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; +; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}}; ; CHECK-NEXT: ret; define i19 @test_i19(i19 %a) { %r = tail call i19 @test_i19(i19 %a); @@ -575,12 +575,12 @@ define i19 @test_i19(i19 %a) { ; CHECK-DAG: ld.param.u16 {{%r[0-9]+}}, [test_i23_param_0]; ; CHECK-DAG: ld.param.u8 {{%r[0-9]+}}, [test_i23_param_0+2]; ; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0+0], {{%r[0-9]+}}; +; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: .param .b32 retval0; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_i23, -; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+0]; -; CHECK: st.param.b32 [func_retval0+0], {{%r[0-9]+}}; +; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; +; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}}; ; CHECK-NEXT: ret; define i23 @test_i23(i23 %a) { %r = tail call i23 @test_i23(i23 %a); @@ -593,12 +593,12 @@ define i23 @test_i23(i23 %a) { ; CHECK-DAG: ld.param.u8 {{%r[0-9]+}}, [test_i24_param_0+2]; ; CHECK-DAG: ld.param.u16 {{%r[0-9]+}}, [test_i24_param_0]; ; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0+0], {{%r[0-9]+}}; +; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: .param .b32 retval0; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_i24, -; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+0]; -; CHECK: st.param.b32 [func_retval0+0], {{%r[0-9]+}}; +; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; +; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}}; ; CHECK-NEXT: ret; define i24 @test_i24(i24 %a) { %r = tail call i24 @test_i24(i24 %a); @@ -610,12 +610,12 @@ define i24 @test_i24(i24 %a) { ; CHECK-NEXT: .param .b32 test_i29_param_0 ; CHECK: ld.param.u32 {{%r[0-9]+}}, [test_i29_param_0]; ; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0+0], {{%r[0-9]+}}; +; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: .param .b32 retval0; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_i29, -; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+0]; -; CHECK: st.param.b32 [func_retval0+0], {{%r[0-9]+}}; +; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; +; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}}; ; CHECK-NEXT: ret; define i29 @test_i29(i29 %a) { %r = tail call i29 @test_i29(i29 %a); @@ -627,12 +627,12 @@ define i29 @test_i29(i29 %a) { ; CHECK-NEXT: .param .b32 test_i32_param_0 ; CHECK: ld.param.u32 [[E:%r[0-9]+]], [test_i32_param_0]; ; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0+0], [[E]]; +; CHECK: st.param.b32 [param0], [[E]]; ; CHECK: .param .b32 retval0; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_i32, -; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0+0]; -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define i32 @test_i32(i32 %a) { %r = tail call i32 @test_i32(i32 %a); @@ -645,14 +645,14 @@ define i32 @test_i32(i32 %a) { ; CHECK-DAG: ld.param.u32 [[E2:%r[0-9]+]], [test_v3i32_param_0+8]; ; CHECK-DAG: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v3i32_param_0]; ; CHECK: .param .align 16 .b8 param0[16]; -; CHECK: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]}; +; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]}; ; CHECK: st.param.b32 [param0+8], [[E2]]; ; CHECK: .param .align 16 .b8 retval0[16]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_v3i32, -; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0]; +; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0]; ; CHECK: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8]; -; CHECK-DAG: st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]}; +; CHECK-DAG: st.param.v2.b32 [func_retval0], {[[RE0]], [[RE1]]}; ; CHECK-DAG: st.param.b32 [func_retval0+8], [[RE2]]; ; CHECK-NEXT: ret; define <3 x i32> @test_v3i32(<3 x i32> %a) { @@ -665,12 +665,12 @@ define <3 x i32> @test_v3i32(<3 x i32> %a) { ; CHECK-NEXT: .param .align 16 .b8 test_v4i32_param_0[16] ; CHECK: ld.param.v4.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v4i32_param_0] ; CHECK: .param .align 16 .b8 param0[16]; -; CHECK: st.param.v4.b32 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]}; +; CHECK: st.param.v4.b32 [param0], {[[E0]], [[E1]], [[E2]], [[E3]]}; ; CHECK: .param .align 16 .b8 retval0[16]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_v4i32, -; CHECK: ld.param.v4.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+0]; -; CHECK: st.param.v4.b32 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} +; CHECK: ld.param.v4.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0]; +; CHECK: st.param.v4.b32 [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} ; CHECK-NEXT: ret; define <4 x i32> @test_v4i32(<4 x i32> %a) { %r = tail call <4 x i32> @test_v4i32(<4 x i32> %a); @@ -683,14 +683,14 @@ define <4 x i32> @test_v4i32(<4 x i32> %a) { ; CHECK-DAG: ld.param.u32 [[E4:%r[0-9]+]], [test_v5i32_param_0+16]; ; CHECK-DAG: ld.param.v4.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v5i32_param_0] ; CHECK: .param .align 32 .b8 param0[32]; -; CHECK-DAG: st.param.v4.b32 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]}; +; CHECK-DAG: st.param.v4.b32 [param0], {[[E0]], [[E1]], [[E2]], [[E3]]}; ; CHECK-DAG: st.param.b32 [param0+16], [[E4]]; ; CHECK: .param .align 32 .b8 retval0[32]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_v5i32, -; CHECK-DAG: ld.param.v4.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+0]; +; CHECK-DAG: ld.param.v4.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0]; ; CHECK-DAG: ld.param.b32 [[RE4:%r[0-9]+]], [retval0+16]; -; CHECK-DAG: st.param.v4.b32 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} +; CHECK-DAG: st.param.v4.b32 [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} ; CHECK-DAG: st.param.b32 [func_retval0+16], [[RE4]]; ; CHECK-NEXT: ret; define <5 x i32> @test_v5i32(<5 x i32> %a) { @@ -703,12 +703,12 @@ define <5 x i32> @test_v5i32(<5 x i32> %a) { ; CHECK-NEXT: .param .b32 test_f32_param_0 ; CHECK: ld.param.f32 [[E:%f[0-9]+]], [test_f32_param_0]; ; CHECK: .param .b32 param0; -; CHECK: st.param.f32 [param0+0], [[E]]; +; CHECK: st.param.f32 [param0], [[E]]; ; CHECK: .param .b32 retval0; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_f32, -; CHECK: ld.param.f32 [[R:%f[0-9]+]], [retval0+0]; -; CHECK: st.param.f32 [func_retval0+0], [[R]]; +; CHECK: ld.param.f32 [[R:%f[0-9]+]], [retval0]; +; CHECK: st.param.f32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define float @test_f32(float %a) { %r = tail call float @test_f32(float %a); @@ -721,12 +721,12 @@ define float @test_f32(float %a) { ; CHECK-DAG: ld.param.u8 {{%rd[0-9]+}}, [test_i40_param_0+4]; ; CHECK-DAG: ld.param.u32 {{%rd[0-9]+}}, [test_i40_param_0]; ; CHECK: .param .b64 param0; -; CHECK: st.param.b64 [param0+0], {{%rd[0-9]+}}; +; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: .param .b64 retval0; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_i40, -; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0+0]; -; CHECK: st.param.b64 [func_retval0+0], {{%rd[0-9]+}}; +; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0]; +; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}}; ; CHECK-NEXT: ret; define i40 @test_i40(i40 %a) { %r = tail call i40 @test_i40(i40 %a); @@ -739,12 +739,12 @@ define i40 @test_i40(i40 %a) { ; CHECK-DAG: ld.param.u16 {{%rd[0-9]+}}, [test_i47_param_0+4]; ; CHECK-DAG: ld.param.u32 {{%rd[0-9]+}}, [test_i47_param_0]; ; CHECK: .param .b64 param0; -; CHECK: st.param.b64 [param0+0], {{%rd[0-9]+}}; +; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: .param .b64 retval0; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_i47, -; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0+0]; -; CHECK: st.param.b64 [func_retval0+0], {{%rd[0-9]+}}; +; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0]; +; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}}; ; CHECK-NEXT: ret; define i47 @test_i47(i47 %a) { %r = tail call i47 @test_i47(i47 %a); @@ -757,12 +757,12 @@ define i47 @test_i47(i47 %a) { ; CHECK-DAG: ld.param.u16 {{%rd[0-9]+}}, [test_i48_param_0+4]; ; CHECK-DAG: ld.param.u32 {{%rd[0-9]+}}, [test_i48_param_0]; ; CHECK: .param .b64 param0; -; CHECK: st.param.b64 [param0+0], {{%rd[0-9]+}}; +; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: .param .b64 retval0; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_i48, -; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0+0]; -; CHECK: st.param.b64 [func_retval0+0], {{%rd[0-9]+}}; +; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0]; +; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}}; ; CHECK-NEXT: ret; define i48 @test_i48(i48 %a) { %r = tail call i48 @test_i48(i48 %a); @@ -776,12 +776,12 @@ define i48 @test_i48(i48 %a) { ; CHECK-DAG: ld.param.u16 {{%rd[0-9]+}}, [test_i51_param_0+4]; ; CHECK-DAG: ld.param.u32 {{%rd[0-9]+}}, [test_i51_param_0]; ; CHECK: .param .b64 param0; -; CHECK: st.param.b64 [param0+0], {{%rd[0-9]+}}; +; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: .param .b64 retval0; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_i51, -; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0+0]; -; CHECK: st.param.b64 [func_retval0+0], {{%rd[0-9]+}}; +; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0]; +; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}}; ; CHECK-NEXT: ret; define i51 @test_i51(i51 %a) { %r = tail call i51 @test_i51(i51 %a); @@ -795,12 +795,12 @@ define i51 @test_i51(i51 %a) { ; CHECK-DAG: ld.param.u16 {{%rd[0-9]+}}, [test_i56_param_0+4]; ; CHECK-DAG: ld.param.u32 {{%rd[0-9]+}}, [test_i56_param_0]; ; CHECK: .param .b64 param0; -; CHECK: st.param.b64 [param0+0], {{%rd[0-9]+}}; +; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: .param .b64 retval0; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_i56, -; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0+0]; -; CHECK: st.param.b64 [func_retval0+0], {{%rd[0-9]+}}; +; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0]; +; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}}; ; CHECK-NEXT: ret; define i56 @test_i56(i56 %a) { %r = tail call i56 @test_i56(i56 %a); @@ -812,12 +812,12 @@ define i56 @test_i56(i56 %a) { ; CHECK-NEXT: .param .b64 test_i57_param_0 ; CHECK: ld.param.u64 {{%rd[0-9]+}}, [test_i57_param_0]; ; CHECK: .param .b64 param0; -; CHECK: st.param.b64 [param0+0], {{%rd[0-9]+}}; +; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: .param .b64 retval0; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_i57, -; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0+0]; -; CHECK: st.param.b64 [func_retval0+0], {{%rd[0-9]+}}; +; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0]; +; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}}; ; CHECK-NEXT: ret; define i57 @test_i57(i57 %a) { %r = tail call i57 @test_i57(i57 %a); @@ -829,12 +829,12 @@ define i57 @test_i57(i57 %a) { ; CHECK-NEXT: .param .b64 test_i64_param_0 ; CHECK: ld.param.u64 [[E:%rd[0-9]+]], [test_i64_param_0]; ; CHECK: .param .b64 param0; -; CHECK: st.param.b64 [param0+0], [[E]]; +; CHECK: st.param.b64 [param0], [[E]]; ; CHECK: .param .b64 retval0; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_i64, -; CHECK: ld.param.b64 [[R:%rd[0-9]+]], [retval0+0]; -; CHECK: st.param.b64 [func_retval0+0], [[R]]; +; CHECK: ld.param.b64 [[R:%rd[0-9]+]], [retval0]; +; CHECK: st.param.b64 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define i64 @test_i64(i64 %a) { %r = tail call i64 @test_i64(i64 %a); @@ -847,16 +847,16 @@ define i64 @test_i64(i64 %a) { ; CHECK-DAG: ld.param.u64 [[E2:%rd[0-9]+]], [test_v3i64_param_0+16]; ; CHECK-DAG: ld.param.v2.u64 {[[E0:%rd[0-9]+]], [[E1:%rd[0-9]+]]}, [test_v3i64_param_0]; ; CHECK: .param .align 32 .b8 param0[32]; -; CHECK: st.param.v2.b64 [param0+0], {[[E0]], [[E1]]}; +; CHECK: st.param.v2.b64 [param0], {[[E0]], [[E1]]}; ; CHECK: st.param.b64 [param0+16], [[E2]]; ; CHECK: .param .align 32 .b8 retval0[32]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_v3i64, -; CHECK: ld.param.v2.b64 {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0+0]; +; CHECK: ld.param.v2.b64 {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0]; ; CHECK: ld.param.b64 [[RE2:%rd[0-9]+]], [retval0+16]; -; CHECK-DAG: st.param.v2.b64 [func_retval0+0], {[[RE0]], [[RE1]]}; +; CHECK-DAG: st.param.v2.b64 [func_retval0], {[[RE0]], [[RE1]]}; ; CHECK-DAG: st.param.b64 [func_retval0+16], [[RE2]]; -; CHECK-DAG: st.param.v2.b64 [func_retval0+0], {[[RE0]], [[RE1]]}; +; CHECK-DAG: st.param.v2.b64 [func_retval0], {[[RE0]], [[RE1]]}; ; CHECK-DAG: st.param.b64 [func_retval0+16], [[RE2]]; ; CHECK-NEXT: ret; define <3 x i64> @test_v3i64(<3 x i64> %a) { @@ -871,15 +871,15 @@ define <3 x i64> @test_v3i64(<3 x i64> %a) { ; CHECK-DAG: ld.param.v2.u64 {[[E2:%rd[0-9]+]], [[E3:%rd[0-9]+]]}, [test_v4i64_param_0+16]; ; CHECK-DAG: ld.param.v2.u64 {[[E0:%rd[0-9]+]], [[E1:%rd[0-9]+]]}, [test_v4i64_param_0]; ; CHECK: .param .align 32 .b8 param0[32]; -; CHECK: st.param.v2.b64 [param0+0], {[[E0]], [[E1]]}; +; CHECK: st.param.v2.b64 [param0], {[[E0]], [[E1]]}; ; CHECK: st.param.v2.b64 [param0+16], {[[E2]], [[E3]]}; ; CHECK: .param .align 32 .b8 retval0[32]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_v4i64, -; CHECK: ld.param.v2.b64 {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0+0]; +; CHECK: ld.param.v2.b64 {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0]; ; CHECK: ld.param.v2.b64 {[[RE2:%rd[0-9]+]], [[RE3:%rd[0-9]+]]}, [retval0+16]; ; CHECK-DAG: st.param.v2.b64 [func_retval0+16], {[[RE2]], [[RE3]]}; -; CHECK-DAG: st.param.v2.b64 [func_retval0+0], {[[RE0]], [[RE1]]}; +; CHECK-DAG: st.param.v2.b64 [func_retval0], {[[RE0]], [[RE1]]}; ; CHECK-NEXT: ret; define <4 x i64> @test_v4i64(<4 x i64> %a) { %r = tail call <4 x i64> @test_v4i64(<4 x i64> %a); @@ -893,12 +893,12 @@ define <4 x i64> @test_v4i64(<4 x i64> %a) { ; CHECK-NEXT: .align 1 .b8 test_s_i1_param_0[1] ; CHECK: ld.param.u8 [[A:%rs[0-9]+]], [test_s_i1_param_0]; ; CHECK: .param .align 1 .b8 param0[1]; -; CHECK: st.param.b8 [param0+0], [[A]] +; CHECK: st.param.b8 [param0], [[A]] ; CHECK: .param .align 1 .b8 retval0[1]; ; CHECK: call.uni ; CHECK-NEXT: test_s_i1, -; CHECK: ld.param.b8 [[R:%rs[0-9]+]], [retval0+0]; -; CHECK: st.param.b8 [func_retval0+0], [[R]]; +; CHECK: ld.param.b8 [[R:%rs[0-9]+]], [retval0]; +; CHECK: st.param.b8 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define %s_i1 @test_s_i1(%s_i1 %a) { %r = tail call %s_i1 @test_s_i1(%s_i1 %a); @@ -910,12 +910,12 @@ define %s_i1 @test_s_i1(%s_i1 %a) { ; CHECK-NEXT: .param .align 1 .b8 test_s_i8_param_0[1] ; CHECK: ld.param.u8 [[A:%rs[0-9]+]], [test_s_i8_param_0]; ; CHECK: .param .align 1 .b8 param0[1]; -; CHECK: st.param.b8 [param0+0], [[A]] +; CHECK: st.param.b8 [param0], [[A]] ; CHECK: .param .align 1 .b8 retval0[1]; ; CHECK: call.uni ; CHECK-NEXT: test_s_i8, -; CHECK: ld.param.b8 [[R:%rs[0-9]+]], [retval0+0]; -; CHECK: st.param.b8 [func_retval0+0], [[R]]; +; CHECK: ld.param.b8 [[R:%rs[0-9]+]], [retval0]; +; CHECK: st.param.b8 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define %s_i8 @test_s_i8(%s_i8 %a) { %r = tail call %s_i8 @test_s_i8(%s_i8 %a); @@ -927,12 +927,12 @@ define %s_i8 @test_s_i8(%s_i8 %a) { ; CHECK-NEXT: .param .align 2 .b8 test_s_i16_param_0[2] ; CHECK: ld.param.u16 [[A:%rs[0-9]+]], [test_s_i16_param_0]; ; CHECK: .param .align 2 .b8 param0[2]; -; CHECK: st.param.b16 [param0+0], [[A]] +; CHECK: st.param.b16 [param0], [[A]] ; CHECK: .param .align 2 .b8 retval0[2]; ; CHECK: call.uni ; CHECK-NEXT: test_s_i16, -; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0]; -; CHECK: st.param.b16 [func_retval0+0], [[R]]; +; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0]; +; CHECK: st.param.b16 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define %s_i16 @test_s_i16(%s_i16 %a) { %r = tail call %s_i16 @test_s_i16(%s_i16 %a); @@ -944,12 +944,12 @@ define %s_i16 @test_s_i16(%s_i16 %a) { ; CHECK-NEXT: .param .align 2 .b8 test_s_f16_param_0[2] ; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_s_f16_param_0]; ; CHECK: .param .align 2 .b8 param0[2]; -; CHECK: st.param.b16 [param0+0], [[A]] +; CHECK: st.param.b16 [param0], [[A]] ; CHECK: .param .align 2 .b8 retval0[2]; ; CHECK: call.uni ; CHECK-NEXT: test_s_f16, -; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0]; -; CHECK: st.param.b16 [func_retval0+0], [[R]]; +; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0]; +; CHECK: st.param.b16 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define %s_f16 @test_s_f16(%s_f16 %a) { %r = tail call %s_f16 @test_s_f16(%s_f16 %a); @@ -961,12 +961,12 @@ define %s_f16 @test_s_f16(%s_f16 %a) { ; CHECK-NEXT: .param .align 4 .b8 test_s_i32_param_0[4] ; CHECK: ld.param.u32 [[E:%r[0-9]+]], [test_s_i32_param_0]; ; CHECK: .param .align 4 .b8 param0[4] -; CHECK: st.param.b32 [param0+0], [[E]]; +; CHECK: st.param.b32 [param0], [[E]]; ; CHECK: .param .align 4 .b8 retval0[4]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_s_i32, -; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0+0]; -; CHECK: st.param.b32 [func_retval0+0], [[R]]; +; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0]; +; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define %s_i32 @test_s_i32(%s_i32 %a) { %r = tail call %s_i32 @test_s_i32(%s_i32 %a); @@ -978,12 +978,12 @@ define %s_i32 @test_s_i32(%s_i32 %a) { ; CHECK-NEXT: .param .align 4 .b8 test_s_f32_param_0[4] ; CHECK: ld.param.f32 [[E:%f[0-9]+]], [test_s_f32_param_0]; ; CHECK: .param .align 4 .b8 param0[4] -; CHECK: st.param.f32 [param0+0], [[E]]; +; CHECK: st.param.f32 [param0], [[E]]; ; CHECK: .param .align 4 .b8 retval0[4]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_s_f32, -; CHECK: ld.param.f32 [[R:%f[0-9]+]], [retval0+0]; -; CHECK: st.param.f32 [func_retval0+0], [[R]]; +; CHECK: ld.param.f32 [[R:%f[0-9]+]], [retval0]; +; CHECK: st.param.f32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define %s_f32 @test_s_f32(%s_f32 %a) { %r = tail call %s_f32 @test_s_f32(%s_f32 %a); @@ -995,12 +995,12 @@ define %s_f32 @test_s_f32(%s_f32 %a) { ; CHECK-NEXT: .param .align 8 .b8 test_s_i64_param_0[8] ; CHECK: ld.param.u64 [[E:%rd[0-9]+]], [test_s_i64_param_0]; ; CHECK: .param .align 8 .b8 param0[8]; -; CHECK: st.param.b64 [param0+0], [[E]]; +; CHECK: st.param.b64 [param0], [[E]]; ; CHECK: .param .align 8 .b8 retval0[8]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_s_i64, -; CHECK: ld.param.b64 [[R:%rd[0-9]+]], [retval0+0]; -; CHECK: st.param.b64 [func_retval0+0], [[R]]; +; CHECK: ld.param.b64 [[R:%rd[0-9]+]], [retval0]; +; CHECK: st.param.b64 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define %s_i64 @test_s_i64(%s_i64 %a) { %r = tail call %s_i64 @test_s_i64(%s_i64 %a); @@ -1017,7 +1017,7 @@ define %s_i64 @test_s_i64(%s_i64 %a) { ; CHECK-DAG: ld.param.f32 [[E1:%f[0-9]+]], [test_s_i32f32_param_0+4]; ; CHECK-DAG: ld.param.u32 [[E0:%r[0-9]+]], [test_s_i32f32_param_0]; ; CHECK: .param .align 8 .b8 param0[24]; -; CHECK-DAG: st.param.b32 [param0+0], [[E0]]; +; CHECK-DAG: st.param.b32 [param0], [[E0]]; ; CHECK-DAG: st.param.f32 [param0+4], [[E1]]; ; CHECK-DAG: st.param.b32 [param0+8], [[E2]]; ; CHECK-DAG: st.param.f32 [param0+12], [[E3]]; @@ -1025,12 +1025,12 @@ define %s_i64 @test_s_i64(%s_i64 %a) { ; CHECK: .param .align 8 .b8 retval0[24]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_s_i32f32, -; CHECK-DAG: ld.param.b32 [[RE0:%r[0-9]+]], [retval0+0]; +; CHECK-DAG: ld.param.b32 [[RE0:%r[0-9]+]], [retval0]; ; CHECK-DAG: ld.param.f32 [[RE1:%f[0-9]+]], [retval0+4]; ; CHECK-DAG: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8]; ; CHECK-DAG: ld.param.f32 [[RE3:%f[0-9]+]], [retval0+12]; ; CHECK-DAG: ld.param.b64 [[RE4:%rd[0-9]+]], [retval0+16]; -; CHECK-DAG: st.param.b32 [func_retval0+0], [[RE0]]; +; CHECK-DAG: st.param.b32 [func_retval0], [[RE0]]; ; CHECK-DAG: st.param.f32 [func_retval0+4], [[RE1]]; ; CHECK-DAG: st.param.b32 [func_retval0+8], [[RE2]]; ; CHECK-DAG: st.param.f32 [func_retval0+12], [[RE3]]; @@ -1049,16 +1049,16 @@ define %s_i32f32 @test_s_i32f32(%s_i32f32 %a) { ; CHECK-DAG: ld.param.v2.u32 {[[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_s_i32x4_param_0+8]; ; CHECK-DAG: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_i32x4_param_0]; ; CHECK: .param .align 8 .b8 param0[24]; -; CHECK: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]}; +; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]}; ; CHECK: st.param.v2.b32 [param0+8], {[[E2]], [[E3]]}; ; CHECK: st.param.b64 [param0+16], [[E4]]; ; CHECK: .param .align 8 .b8 retval0[24]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_s_i32x4, -; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0]; +; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0]; ; CHECK: ld.param.v2.b32 {[[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+8]; ; CHECK: ld.param.b64 [[RE4:%rd[0-9]+]], [retval0+16]; -; CHECK-DAG: st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]}; +; CHECK-DAG: st.param.v2.b32 [func_retval0], {[[RE0]], [[RE1]]}; ; CHECK-DAG: st.param.v2.b32 [func_retval0+8], {[[RE2]], [[RE3]]}; ; CHECK-DAG: st.param.b64 [func_retval0+16], [[RE4]]; ; CHECK: ret; @@ -1077,7 +1077,7 @@ define %s_i32x4 @test_s_i32x4(%s_i32x4 %a) { ; CHECK: ld.param.u8 [[E2:%rs[0-9]+]], [test_s_i1i32x4_param_0+8]; ; CHECK: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_i1i32x4_param_0]; ; CHECK: .param .align 8 .b8 param0[32]; -; CHECK: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]}; +; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]}; ; CHECK: st.param.b8 [param0+8], [[E2]]; ; CHECK: st.param.b32 [param0+12], [[E3]]; ; CHECK: st.param.b32 [param0+16], [[E4]]; @@ -1088,12 +1088,12 @@ define %s_i32x4 @test_s_i32x4(%s_i32x4 %a) { ; CHECK: ( ; CHECK: param0 ; CHECK: ); -; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0]; +; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0]; ; CHECK: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+8]; ; CHECK: ld.param.b32 [[RE3:%r[0-9]+]], [retval0+12]; ; CHECK: ld.param.b32 [[RE4:%r[0-9]+]], [retval0+16]; ; CHECK: ld.param.b64 [[RE5:%rd[0-9]+]], [retval0+24]; -; CHECK: st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]}; +; CHECK: st.param.v2.b32 [func_retval0], {[[RE0]], [[RE1]]}; ; CHECK: st.param.b8 [func_retval0+8], [[RE2]]; ; CHECK: st.param.b32 [func_retval0+12], [[RE3]]; ; CHECK: st.param.b32 [func_retval0+16], [[RE4]]; @@ -1136,7 +1136,7 @@ define %s_i8i32x4 @test_s_i1i32x4(%s_i8i32x4 %a) { ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+1]; ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0]; ; CHECK: .param .align 1 .b8 param0[25]; -; CHECK-DAG: st.param.b8 [param0+0], +; CHECK-DAG: st.param.b8 [param0], ; CHECK-DAG: st.param.b8 [param0+1], ; CHECK-DAG: st.param.b8 [param0+2], ; CHECK-DAG: st.param.b8 [param0+3], @@ -1164,7 +1164,7 @@ define %s_i8i32x4 @test_s_i1i32x4(%s_i8i32x4 %a) { ; CHECK: .param .align 1 .b8 retval0[25]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_s_i1i32x4p, -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+0]; +; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0]; ; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+1]; ; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+2]; ; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+3]; @@ -1190,7 +1190,7 @@ define %s_i8i32x4 @test_s_i1i32x4(%s_i8i32x4 %a) { ; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+23]; ; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+24]; ; CHECK: } // callseq -; CHECK-DAG: st.param.b8 [func_retval0+0], +; CHECK-DAG: st.param.b8 [func_retval0], ; CHECK-DAG: st.param.b8 [func_retval0+1], ; CHECK-DAG: st.param.b8 [func_retval0+2], ; CHECK-DAG: st.param.b8 [func_retval0+3], @@ -1232,7 +1232,7 @@ define %s_i8i32x4p @test_s_i1i32x4p(%s_i8i32x4p %a) { ; CHECK: ld.param.u32 [[E2:%r[0-9]+]], [test_s_crossfield_param_0+8]; ; CHECK: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_crossfield_param_0]; ; CHECK: .param .align 16 .b8 param0[80]; -; CHECK: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]}; +; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]}; ; CHECK: st.param.b32 [param0+8], [[E2]]; ; CHECK: st.param.v4.b32 [param0+16], {[[E3]], [[E4]], [[E5]], [[E6]]}; ; CHECK: st.param.v4.b32 [param0+32], {[[E7]], [[E8]], [[E9]], [[E10]]}; @@ -1241,13 +1241,13 @@ define %s_i8i32x4p @test_s_i1i32x4p(%s_i8i32x4p %a) { ; CHECK: .param .align 16 .b8 retval0[80]; ; CHECK: call.uni (retval0), ; CHECK: test_s_crossfield, -; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0]; +; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0]; ; CHECK: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8]; ; CHECK: ld.param.v4.b32 {[[RE3:%r[0-9]+]], [[RE4:%r[0-9]+]], [[RE5:%r[0-9]+]], [[RE6:%r[0-9]+]]}, [retval0+16]; ; CHECK: ld.param.v4.b32 {[[RE7:%r[0-9]+]], [[RE8:%r[0-9]+]], [[RE9:%r[0-9]+]], [[RE10:%r[0-9]+]]}, [retval0+32]; ; CHECK: ld.param.v4.b32 {[[RE11:%r[0-9]+]], [[RE12:%r[0-9]+]], [[RE13:%r[0-9]+]], [[RE14:%r[0-9]+]]}, [retval0+48]; ; CHECK: ld.param.b32 [[RE15:%r[0-9]+]], [retval0+64]; -; CHECK: st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]}; +; CHECK: st.param.v2.b32 [func_retval0], {[[RE0]], [[RE1]]}; ; CHECK: st.param.b32 [func_retval0+8], [[RE2]]; ; CHECK: st.param.v4.b32 [func_retval0+16], {[[RE3]], [[RE4]], [[RE5]], [[RE6]]}; ; CHECK: st.param.v4.b32 [func_retval0+32], {[[RE7]], [[RE8]], [[RE9]], [[RE10]]}; diff --git a/llvm/test/CodeGen/NVPTX/param-overalign.ll b/llvm/test/CodeGen/NVPTX/param-overalign.ll index 5c09bb8e1a5d72..8c506fb0f75abe 100644 --- a/llvm/test/CodeGen/NVPTX/param-overalign.ll +++ b/llvm/test/CodeGen/NVPTX/param-overalign.ll @@ -28,16 +28,16 @@ define float @caller_md(float %a, float %b) { ; CHECK-NEXT: ld.param.f32 %f2, [caller_md_param_1]; ; CHECK-NEXT: { ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v2.f32 [param0+0], {%f1, %f2}; +; CHECK-NEXT: st.param.v2.f32 [param0], {%f1, %f2}; ; CHECK-NEXT: .param .b32 retval0; ; CHECK-NEXT: call.uni (retval0), ; CHECK-NEXT: callee_md, ; CHECK-NEXT: ( ; CHECK-NEXT: param0 ; CHECK-NEXT: ); -; CHECK-NEXT: ld.param.f32 %f3, [retval0+0]; +; CHECK-NEXT: ld.param.f32 %f3, [retval0]; ; CHECK-NEXT: } -; CHECK-NEXT: st.param.f32 [func_retval0+0], %f3; +; CHECK-NEXT: st.param.f32 [func_retval0], %f3; ; CHECK-NEXT: ret; %s1 = insertvalue %struct.float2 poison, float %a, 0 %s2 = insertvalue %struct.float2 %s1, float %b, 1 @@ -53,7 +53,7 @@ define float @callee_md(%struct.float2 %a) { ; CHECK: ld.param.v2.f32 {%f1, %f2}, [callee_md_param_0]; ; CHECK-NEXT: add.rn.f32 %f3, %f1, %f2; -; CHECK-NEXT: st.param.f32 [func_retval0+0], %f3; +; CHECK-NEXT: st.param.f32 [func_retval0], %f3; ; CHECK-NEXT: ret; %v0 = extractvalue %struct.float2 %a, 0 %v1 = extractvalue %struct.float2 %a, 1 @@ -72,16 +72,16 @@ define float @caller(float %a, float %b) { ; CHECK-NEXT: ld.param.f32 %f2, [caller_param_1]; ; CHECK-NEXT: { ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v2.f32 [param0+0], {%f1, %f2}; +; CHECK-NEXT: st.param.v2.f32 [param0], {%f1, %f2}; ; CHECK-NEXT: .param .b32 retval0; ; CHECK-NEXT: call.uni (retval0), ; CHECK-NEXT: callee, ; CHECK-NEXT: ( ; CHECK-NEXT: param0 ; CHECK-NEXT: ); -; CHECK-NEXT: ld.param.f32 %f3, [retval0+0]; +; CHECK-NEXT: ld.param.f32 %f3, [retval0]; ; CHECK-NEXT: } -; CHECK-NEXT: st.param.f32 [func_retval0+0], %f3; +; CHECK-NEXT: st.param.f32 [func_retval0], %f3; ; CHECK-NEXT: ret; %s1 = insertvalue %struct.float2 poison, float %a, 0 %s2 = insertvalue %struct.float2 %s1, float %b, 1 @@ -97,7 +97,7 @@ define float @callee(%struct.float2 alignstack(8) %a ) { ; CHECK: ld.param.v2.f32 {%f1, %f2}, [callee_param_0]; ; CHECK-NEXT: add.rn.f32 %f3, %f1, %f2; -; CHECK-NEXT: st.param.f32 [func_retval0+0], %f3; +; CHECK-NEXT: st.param.f32 [func_retval0], %f3; ; CHECK-NEXT: ret; %v0 = extractvalue %struct.float2 %a, 0 %v1 = extractvalue %struct.float2 %a, 1 diff --git a/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll b/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll index 55fadf10f8d6d3..db8b1a6f53d13c 100644 --- a/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll +++ b/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll @@ -84,14 +84,14 @@ define dso_local void @caller_St4x1(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .b64 caller_St4x1_param_1 ; CHECK: ) ; CHECK: .param .b32 param0; - ; CHECK: st.param.b32 [param0+0], {{%r[0-9]+}}; + ; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: .param .align 16 .b8 retval0[4]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: callee_St4x1, ; CHECK-NEXT: ( ; CHECK-NEXT: param0 ; CHECK-NEXT: ); - ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+0]; + ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; %1 = load i32, ptr %in, align 4 %call = tail call fastcc [1 x i32] @callee_St4x1(i32 %1) #2 %.fca.0.extract = extractvalue [1 x i32] %call, 0 @@ -104,7 +104,7 @@ define internal fastcc [1 x i32] @callee_St4x1(i32 %in.0.val) { ; CHECK-LABEL: callee_St4x1( ; CHECK-NEXT: .param .b32 callee_St4x1_param_0 ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [callee_St4x1_param_0]; - ; CHECK: st.param.b32 [func_retval0+0], [[R1]]; + ; CHECK: st.param.b32 [func_retval0], [[R1]]; ; CHECK-NEXT: ret; %oldret = insertvalue [1 x i32] poison, i32 %in.0.val, 0 ret [1 x i32] %oldret @@ -116,14 +116,14 @@ define dso_local void @caller_St4x2(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .b64 caller_St4x2_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[8]; - ; CHECK: st.param.v2.b32 [param0+0], {{{%r[0-9]+}}, {{%r[0-9]+}}}; + ; CHECK: st.param.v2.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: .param .align 16 .b8 retval0[8]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: callee_St4x2, ; CHECK-NEXT: ( ; CHECK-NEXT: param0 ; CHECK-NEXT: ); - ; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+0]; + ; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0]; %agg.tmp = alloca %struct.St4x2, align 8 %1 = load i64, ptr %in, align 4 store i64 %1, ptr %agg.tmp, align 8 @@ -141,7 +141,7 @@ define internal fastcc [2 x i32] @callee_St4x2(ptr nocapture noundef readonly by ; CHECK-LABEL: callee_St4x2( ; CHECK-NEXT: .param .align 16 .b8 callee_St4x2_param_0[8] ; CHECK: ld.param.v2.u32 {[[R1:%r[0-9]+]], [[R2:%r[0-9]+]]}, [callee_St4x2_param_0]; - ; CHECK: st.param.v2.b32 [func_retval0+0], {[[R1]], [[R2]]}; + ; CHECK: st.param.v2.b32 [func_retval0], {[[R1]], [[R2]]}; ; CHECK-NEXT: ret; %1 = load i32, ptr %in, align 4 %arrayidx.1 = getelementptr inbounds [2 x i32], ptr %in, i64 0, i64 1 @@ -157,7 +157,7 @@ define dso_local void @caller_St4x3(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .b64 caller_St4x3_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[12]; - ; CHECK: st.param.v2.b32 [param0+0], {{{%r[0-9]+}}, {{%r[0-9]+}}}; + ; CHECK: st.param.v2.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: st.param.b32 [param0+8], {{%r[0-9]+}}; ; CHECK: .param .align 16 .b8 retval0[12]; ; CHECK: call.uni (retval0), @@ -165,7 +165,7 @@ define dso_local void @caller_St4x3(ptr nocapture noundef readonly byval(%struct ; CHECK-NEXT: ( ; CHECK-NEXT: param0 ; CHECK-NEXT: ); - ; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+0]; + ; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0]; ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+8]; %call = tail call fastcc [3 x i32] @callee_St4x3(ptr noundef nonnull byval(%struct.St4x3) align 4 %in) #2 %.fca.0.extract = extractvalue [3 x i32] %call, 0 @@ -185,7 +185,7 @@ define internal fastcc [3 x i32] @callee_St4x3(ptr nocapture noundef readonly by ; CHECK-NEXT: .param .align 16 .b8 callee_St4x3_param_0[12] ; CHECK: ld.param.v2.u32 {[[R1:%r[0-9]+]], [[R2:%r[0-9]+]]}, [callee_St4x3_param_0]; ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [callee_St4x3_param_0+8]; - ; CHECK: st.param.v2.b32 [func_retval0+0], {[[R1]], [[R2]]}; + ; CHECK: st.param.v2.b32 [func_retval0], {[[R1]], [[R2]]}; ; CHECK: st.param.b32 [func_retval0+8], [[R3]]; ; CHECK-NEXT: ret; %1 = load i32, ptr %in, align 4 @@ -205,14 +205,14 @@ define dso_local void @caller_St4x4(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .b64 caller_St4x4_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[16]; - ; CHECK: st.param.v4.b32 [param0+0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; + ; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: .param .align 16 .b8 retval0[16]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: callee_St4x4, ; CHECK-NEXT: ( ; CHECK-NEXT: param0 ; CHECK-NEXT: ); - ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+0]; + ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0]; %call = tail call fastcc [4 x i32] @callee_St4x4(ptr noundef nonnull byval(%struct.St4x4) align 4 %in) #2 %.fca.0.extract = extractvalue [4 x i32] %call, 0 %.fca.1.extract = extractvalue [4 x i32] %call, 1 @@ -233,7 +233,7 @@ define internal fastcc [4 x i32] @callee_St4x4(ptr nocapture noundef readonly by ; CHECK-LABEL: callee_St4x4( ; CHECK-NEXT: .param .align 16 .b8 callee_St4x4_param_0[16] ; CHECK: ld.param.v4.u32 {[[R1:%r[0-9]+]], [[R2:%r[0-9]+]], [[R3:%r[0-9]+]], [[R4:%r[0-9]+]]}, [callee_St4x4_param_0]; - ; CHECK: st.param.v4.b32 [func_retval0+0], {[[R1]], [[R2]], [[R3]], [[R4]]}; + ; CHECK: st.param.v4.b32 [func_retval0], {[[R1]], [[R2]], [[R3]], [[R4]]}; ; CHECK-NEXT: ret; %1 = load i32, ptr %in, align 4 %arrayidx.1 = getelementptr inbounds [4 x i32], ptr %in, i64 0, i64 1 @@ -255,7 +255,7 @@ define dso_local void @caller_St4x5(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .b64 caller_St4x5_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[20]; - ; CHECK: st.param.v4.b32 [param0+0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; + ; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: st.param.b32 [param0+16], {{%r[0-9]+}}; ; CHECK: .param .align 16 .b8 retval0[20]; ; CHECK: call.uni (retval0), @@ -263,7 +263,7 @@ define dso_local void @caller_St4x5(ptr nocapture noundef readonly byval(%struct ; CHECK-NEXT: ( ; CHECK-NEXT: param0 ; CHECK-NEXT: ); - ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+0]; + ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0]; ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+16]; %call = tail call fastcc [5 x i32] @callee_St4x5(ptr noundef nonnull byval(%struct.St4x5) align 4 %in) #2 %.fca.0.extract = extractvalue [5 x i32] %call, 0 @@ -289,7 +289,7 @@ define internal fastcc [5 x i32] @callee_St4x5(ptr nocapture noundef readonly by ; CHECK-NEXT: .param .align 16 .b8 callee_St4x5_param_0[20] ; CHECK: ld.param.v4.u32 {[[R1:%r[0-9]+]], [[R2:%r[0-9]+]], [[R3:%r[0-9]+]], [[R4:%r[0-9]+]]}, [callee_St4x5_param_0]; ; CHECK: ld.param.u32 [[R5:%r[0-9]+]], [callee_St4x5_param_0+16]; - ; CHECK: st.param.v4.b32 [func_retval0+0], {[[R1]], [[R2]], [[R3]], [[R4]]}; + ; CHECK: st.param.v4.b32 [func_retval0], {[[R1]], [[R2]], [[R3]], [[R4]]}; ; CHECK: st.param.b32 [func_retval0+16], [[R5]]; ; CHECK-NEXT: ret; %1 = load i32, ptr %in, align 4 @@ -315,7 +315,7 @@ define dso_local void @caller_St4x6(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .b64 caller_St4x6_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[24]; - ; CHECK: st.param.v4.b32 [param0+0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; + ; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: st.param.v2.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: .param .align 16 .b8 retval0[24]; ; CHECK: call.uni (retval0), @@ -323,7 +323,7 @@ define dso_local void @caller_St4x6(ptr nocapture noundef readonly byval(%struct ; CHECK-NEXT: ( ; CHECK-NEXT: param0 ; CHECK-NEXT: ); - ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+0]; + ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0]; ; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16]; %call = tail call fastcc [6 x i32] @callee_St4x6(ptr noundef nonnull byval(%struct.St4x6) align 4 %in) #2 %.fca.0.extract = extractvalue [6 x i32] %call, 0 @@ -352,7 +352,7 @@ define internal fastcc [6 x i32] @callee_St4x6(ptr nocapture noundef readonly by ; CHECK-NEXT: .param .align 16 .b8 callee_St4x6_param_0[24] ; CHECK: ld.param.v4.u32 {[[R1:%r[0-9]+]], [[R2:%r[0-9]+]], [[R3:%r[0-9]+]], [[R4:%r[0-9]+]]}, [callee_St4x6_param_0]; ; CHECK: ld.param.v2.u32 {[[R5:%r[0-9]+]], [[R6:%r[0-9]+]]}, [callee_St4x6_param_0+16]; - ; CHECK: st.param.v4.b32 [func_retval0+0], {[[R1]], [[R2]], [[R3]], [[R4]]}; + ; CHECK: st.param.v4.b32 [func_retval0], {[[R1]], [[R2]], [[R3]], [[R4]]}; ; CHECK: st.param.v2.b32 [func_retval0+16], {[[R5]], [[R6]]}; ; CHECK-NEXT: ret; %1 = load i32, ptr %in, align 4 @@ -381,7 +381,7 @@ define dso_local void @caller_St4x7(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .b64 caller_St4x7_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[28]; - ; CHECK: st.param.v4.b32 [param0+0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; + ; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: st.param.v2.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: st.param.b32 [param0+24], {{%r[0-9]+}}; ; CHECK: .param .align 16 .b8 retval0[28]; @@ -390,7 +390,7 @@ define dso_local void @caller_St4x7(ptr nocapture noundef readonly byval(%struct ; CHECK-NEXT: ( ; CHECK-NEXT: param0 ; CHECK-NEXT: ); - ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+0]; + ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0]; ; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16]; ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+24]; %call = tail call fastcc [7 x i32] @callee_St4x7(ptr noundef nonnull byval(%struct.St4x7) align 4 %in) #2 @@ -424,7 +424,7 @@ define internal fastcc [7 x i32] @callee_St4x7(ptr nocapture noundef readonly by ; CHECK: ld.param.v4.u32 {[[R1:%r[0-9]+]], [[R2:%r[0-9]+]], [[R3:%r[0-9]+]], [[R4:%r[0-9]+]]}, [callee_St4x7_param_0]; ; CHECK: ld.param.v2.u32 {[[R5:%r[0-9]+]], [[R6:%r[0-9]+]]}, [callee_St4x7_param_0+16]; ; CHECK: ld.param.u32 [[R7:%r[0-9]+]], [callee_St4x7_param_0+24]; - ; CHECK: st.param.v4.b32 [func_retval0+0], {[[R1]], [[R2]], [[R3]], [[R4]]}; + ; CHECK: st.param.v4.b32 [func_retval0], {[[R1]], [[R2]], [[R3]], [[R4]]}; ; CHECK: st.param.v2.b32 [func_retval0+16], {[[R5]], [[R6]]}; ; CHECK: st.param.b32 [func_retval0+24], [[R7]]; ; CHECK-NEXT: ret; @@ -457,7 +457,7 @@ define dso_local void @caller_St4x8(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .b64 caller_St4x8_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[32]; - ; CHECK: st.param.v4.b32 [param0+0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; + ; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: st.param.v4.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: .param .align 16 .b8 retval0[32]; ; CHECK: call.uni (retval0), @@ -465,7 +465,7 @@ define dso_local void @caller_St4x8(ptr nocapture noundef readonly byval(%struct ; CHECK-NEXT: ( ; CHECK-NEXT: param0 ; CHECK-NEXT: ); - ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+0]; + ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0]; ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16]; %call = tail call fastcc [8 x i32] @callee_St4x8(ptr noundef nonnull byval(%struct.St4x8) align 4 %in) #2 %.fca.0.extract = extractvalue [8 x i32] %call, 0 @@ -500,7 +500,7 @@ define internal fastcc [8 x i32] @callee_St4x8(ptr nocapture noundef readonly by ; CHECK-NEXT: .param .align 16 .b8 callee_St4x8_param_0[32] ; CHECK: ld.param.v4.u32 {[[R1:%r[0-9]+]], [[R2:%r[0-9]+]], [[R3:%r[0-9]+]], [[R4:%r[0-9]+]]}, [callee_St4x8_param_0]; ; CHECK: ld.param.v4.u32 {[[R5:%r[0-9]+]], [[R6:%r[0-9]+]], [[R7:%r[0-9]+]], [[R8:%r[0-9]+]]}, [callee_St4x8_param_0+16]; - ; CHECK: st.param.v4.b32 [func_retval0+0], {[[R1]], [[R2]], [[R3]], [[R4]]}; + ; CHECK: st.param.v4.b32 [func_retval0], {[[R1]], [[R2]], [[R3]], [[R4]]}; ; CHECK: st.param.v4.b32 [func_retval0+16], {[[R5]], [[R6]], [[R7]], [[R8]]}; ; CHECK-NEXT: ret; %1 = load i32, ptr %in, align 4 @@ -535,14 +535,14 @@ define dso_local void @caller_St8x1(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .b64 caller_St8x1_param_1 ; CHECK: ) ; CHECK: .param .b64 param0; - ; CHECK: st.param.b64 [param0+0], {{%rd[0-9]+}}; + ; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: .param .align 16 .b8 retval0[8]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: callee_St8x1, ; CHECK-NEXT: ( ; CHECK-NEXT: param0 ; CHECK-NEXT: ); - ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0+0]; + ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0]; %1 = load i64, ptr %in, align 8 %call = tail call fastcc [1 x i64] @callee_St8x1(i64 %1) #2 %.fca.0.extract = extractvalue [1 x i64] %call, 0 @@ -555,7 +555,7 @@ define internal fastcc [1 x i64] @callee_St8x1(i64 %in.0.val) { ; CHECK-LABEL: callee_St8x1( ; CHECK-NEXT: .param .b64 callee_St8x1_param_0 ; CHECK: ld.param.u64 [[RD1:%rd[0-9]+]], [callee_St8x1_param_0]; - ; CHECK: st.param.b64 [func_retval0+0], [[RD1]]; + ; CHECK: st.param.b64 [func_retval0], [[RD1]]; ; CHECK-NEXT: ret; %oldret = insertvalue [1 x i64] poison, i64 %in.0.val, 0 ret [1 x i64] %oldret @@ -567,14 +567,14 @@ define dso_local void @caller_St8x2(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .b64 caller_St8x2_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[16]; - ; CHECK: st.param.v2.b64 [param0+0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}}; + ; CHECK: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}}; ; CHECK: .param .align 16 .b8 retval0[16]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: callee_St8x2, ; CHECK-NEXT: ( ; CHECK-NEXT: param0 ; CHECK-NEXT: ); - ; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0+0]; + ; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0]; %call = tail call fastcc [2 x i64] @callee_St8x2(ptr noundef nonnull byval(%struct.St8x2) align 8 %in) #2 %.fca.0.extract = extractvalue [2 x i64] %call, 0 %.fca.1.extract = extractvalue [2 x i64] %call, 1 @@ -589,7 +589,7 @@ define internal fastcc [2 x i64] @callee_St8x2(ptr nocapture noundef readonly by ; CHECK-LABEL: callee_St8x2( ; CHECK-NEXT: .param .align 16 .b8 callee_St8x2_param_0[16] ; CHECK: ld.param.v2.u64 {[[RD1:%rd[0-9]+]], [[RD2:%rd[0-9]+]]}, [callee_St8x2_param_0]; - ; CHECK: st.param.v2.b64 [func_retval0+0], {[[RD1]], [[RD2]]}; + ; CHECK: st.param.v2.b64 [func_retval0], {[[RD1]], [[RD2]]}; ; CHECK-NEXT: ret; %1 = load i64, ptr %in, align 8 %arrayidx.1 = getelementptr inbounds [2 x i64], ptr %in, i64 0, i64 1 @@ -605,7 +605,7 @@ define dso_local void @caller_St8x3(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .b64 caller_St8x3_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[24]; - ; CHECK: st.param.v2.b64 [param0+0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}}; + ; CHECK: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}}; ; CHECK: st.param.b64 [param0+16], {{%rd[0-9]+}}; ; CHECK: .param .align 16 .b8 retval0[24]; ; CHECK: call.uni (retval0), @@ -613,7 +613,7 @@ define dso_local void @caller_St8x3(ptr nocapture noundef readonly byval(%struct ; CHECK-NEXT: ( ; CHECK-NEXT: param0 ; CHECK-NEXT: ); - ; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0+0]; + ; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0]; ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0+16]; %call = tail call fastcc [3 x i64] @callee_St8x3(ptr noundef nonnull byval(%struct.St8x3) align 8 %in) #2 %.fca.0.extract = extractvalue [3 x i64] %call, 0 @@ -633,7 +633,7 @@ define internal fastcc [3 x i64] @callee_St8x3(ptr nocapture noundef readonly by ; CHECK-NEXT: .param .align 16 .b8 callee_St8x3_param_0[24] ; CHECK: ld.param.v2.u64 {[[RD1:%rd[0-9]+]], [[RD2:%rd[0-9]+]]}, [callee_St8x3_param_0]; ; CHECK: ld.param.u64 [[RD3:%rd[0-9]+]], [callee_St8x3_param_0+16]; - ; CHECK: st.param.v2.b64 [func_retval0+0], {[[RD1]], [[RD2]]}; + ; CHECK: st.param.v2.b64 [func_retval0], {[[RD1]], [[RD2]]}; ; CHECK: st.param.b64 [func_retval0+16], [[RD3]]; ; CHECK-NEXT: ret; %1 = load i64, ptr %in, align 8 @@ -653,7 +653,7 @@ define dso_local void @caller_St8x4(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .b64 caller_St8x4_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[32]; - ; CHECK: st.param.v2.b64 [param0+0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}}; + ; CHECK: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}}; ; CHECK: st.param.v2.b64 [param0+16], {{{%rd[0-9]+}}, {{%rd[0-9]+}}}; ; CHECK: .param .align 16 .b8 retval0[32]; ; CHECK: call.uni (retval0), @@ -661,7 +661,7 @@ define dso_local void @caller_St8x4(ptr nocapture noundef readonly byval(%struct ; CHECK-NEXT: ( ; CHECK-NEXT: param0 ; CHECK-NEXT: ); - ; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0+0]; + ; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0]; ; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0+16]; %call = tail call fastcc [4 x i64] @callee_St8x4(ptr noundef nonnull byval(%struct.St8x4) align 8 %in) #2 %.fca.0.extract = extractvalue [4 x i64] %call, 0 @@ -684,7 +684,7 @@ define internal fastcc [4 x i64] @callee_St8x4(ptr nocapture noundef readonly by ; CHECK-NEXT: .param .align 16 .b8 callee_St8x4_param_0[32] ; CHECK: ld.param.v2.u64 {[[RD1:%rd[0-9]+]], [[RD2:%rd[0-9]+]]}, [callee_St8x4_param_0]; ; CHECK: ld.param.v2.u64 {[[RD3:%rd[0-9]+]], [[RD4:%rd[0-9]+]]}, [callee_St8x4_param_0+16]; - ; CHECK: st.param.v2.b64 [func_retval0+0], {[[RD1]], [[RD2]]}; + ; CHECK: st.param.v2.b64 [func_retval0], {[[RD1]], [[RD2]]}; ; CHECK: st.param.v2.b64 [func_retval0+16], {[[RD3]], [[RD4]]}; ; CHECK-NEXT: ret; %1 = load i64, ptr %in, align 8 @@ -708,7 +708,7 @@ define private fastcc [4 x i32] @callee_St4x4_private(ptr nocapture noundef read ; CHECK-LABEL: callee_St4x4_private( ; CHECK-NEXT: .param .align 16 .b8 callee_St4x4_private_param_0[16] ; CHECK: ld.param.v4.u32 {[[R1:%r[0-9]+]], [[R2:%r[0-9]+]], [[R3:%r[0-9]+]], [[R4:%r[0-9]+]]}, [callee_St4x4_private_param_0]; - ; CHECK: st.param.v4.b32 [func_retval0+0], {[[R1]], [[R2]], [[R3]], [[R4]]}; + ; CHECK: st.param.v4.b32 [func_retval0], {[[R1]], [[R2]], [[R3]], [[R4]]}; ; CHECK-NEXT: ret; %1 = load i32, ptr %in, align 4 %arrayidx.1 = getelementptr inbounds [4 x i32], ptr %in, i64 0, i64 1 @@ -735,7 +735,7 @@ define external fastcc [4 x i32] @callee_St4x4_external(ptr nocapture noundef re ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [callee_St4x4_external_param_0+4]; ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [callee_St4x4_external_param_0+8]; ; CHECK: ld.param.u32 [[R4:%r[0-9]+]], [callee_St4x4_external_param_0+12]; - ; CHECK: st.param.b32 [func_retval0+0], [[R1]]; + ; CHECK: st.param.b32 [func_retval0], [[R1]]; ; CHECK: st.param.b32 [func_retval0+4], [[R2]]; ; CHECK: st.param.b32 [func_retval0+8], [[R3]]; ; CHECK: st.param.b32 [func_retval0+12], [[R4]]; diff --git a/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll b/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll index fa138f3d0936e9..4c9a2ee80c251f 100644 --- a/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll +++ b/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll @@ -12,14 +12,14 @@ declare i1 @callee_i1() define i1 @check_i1() { ; PTX-LABEL: check_i1 ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}} - ; PTX-DAG: ld.param.b32 [[LD:%r[0-9]+]], [retval0+0]; + ; PTX-DAG: ld.param.b32 [[LD:%r[0-9]+]], [retval0]; ; PTX-DAG: } // callseq {{[0-9]+}} ; PTX-WITHOUT-DAG: mov.b32 [[PROXY:%r[0-9]+]], [[LD]]; ; PTX-WITHOUT-DAG: and.b32 [[RES:%r[0-9]+]], [[PROXY]], 1; ; PTX-WITH-DAG: and.b32 [[RES:%r[0-9]+]], [[LD]], 1; - ; PTX-DAG: st.param.b32 [func_retval0+0], [[RES]]; + ; PTX-DAG: st.param.b32 [func_retval0], [[RES]]; %ret = call i1 @callee_i1() ret i1 %ret @@ -29,14 +29,14 @@ declare i16 @callee_i16() define i16 @check_i16() { ; PTX-LABEL: check_i16 ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}} - ; PTX-DAG: ld.param.b32 [[LD:%r[0-9]+]], [retval0+0]; + ; PTX-DAG: ld.param.b32 [[LD:%r[0-9]+]], [retval0]; ; PTX-DAG: } // callseq {{[0-9]+}} ; PTX-WITHOUT-DAG: mov.b32 [[PROXY:%r[0-9]+]], [[LD]]; ; PTX-WITHOUT-DAG: and.b32 [[RES:%r[0-9]+]], [[PROXY]], 65535; ; PTX-WITH-DAG: and.b32 [[RES:%r[0-9]+]], [[LD]], 65535; - ; PTX-DAG: st.param.b32 [func_retval0+0], [[RES]]; + ; PTX-DAG: st.param.b32 [func_retval0], [[RES]]; %ret = call i16 @callee_i16() ret i16 %ret @@ -46,12 +46,12 @@ declare i32 @callee_i32() define i32 @check_i32() { ; PTX-LABEL: check_i32 ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}} - ; PTX-DAG: ld.param.b32 [[LD:%r[0-9]+]], [retval0+0]; + ; PTX-DAG: ld.param.b32 [[LD:%r[0-9]+]], [retval0]; ; PTX-DAG: } // callseq {{[0-9]+}} ; PTX-WITHOUT-DAG: mov.b32 [[PROXY:%r[0-9]+]], [[LD]]; - ; PTX-WITHOUT-DAG: st.param.b32 [func_retval0+0], [[PROXY]]; - ; PTX-WITH-DAG: st.param.b32 [func_retval0+0], [[LD]]; + ; PTX-WITHOUT-DAG: st.param.b32 [func_retval0], [[PROXY]]; + ; PTX-WITH-DAG: st.param.b32 [func_retval0], [[LD]]; %ret = call i32 @callee_i32() ret i32 %ret @@ -61,12 +61,12 @@ declare i64 @callee_i64() define i64 @check_i64() { ; PTX-LABEL: check_i64 ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}} - ; PTX-DAG: ld.param.b64 [[LD:%rd[0-9]+]], [retval0+0]; + ; PTX-DAG: ld.param.b64 [[LD:%rd[0-9]+]], [retval0]; ; PTX-DAG: } // callseq {{[0-9]+}} ; PTX-WITHOUT-DAG: mov.b64 [[PROXY:%rd[0-9]+]], [[LD]]; - ; PTX-WITHOUT-DAG: st.param.b64 [func_retval0+0], [[PROXY]]; - ; PTX-WITH-DAG: st.param.b64 [func_retval0+0], [[LD]]; + ; PTX-WITHOUT-DAG: st.param.b64 [func_retval0], [[PROXY]]; + ; PTX-WITH-DAG: st.param.b64 [func_retval0], [[LD]]; %ret = call i64 @callee_i64() ret i64 %ret @@ -76,13 +76,13 @@ declare i128 @callee_i128() define i128 @check_i128() { ; PTX-LABEL: check_i128 ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}} - ; PTX-DAG: ld.param.v2.b64 {[[LD0:%rd[0-9]+]], [[LD1:%rd[0-9]+]]}, [retval0+0]; + ; PTX-DAG: ld.param.v2.b64 {[[LD0:%rd[0-9]+]], [[LD1:%rd[0-9]+]]}, [retval0]; ; PTX-DAG: } // callseq {{[0-9]+}} ; PTX-WITHOUT-DAG: mov.b64 [[PROXY0:%rd[0-9]+]], [[LD0]]; ; PTX-WITHOUT-DAG: mov.b64 [[PROXY1:%rd[0-9]+]], [[LD1]]; - ; PTX-WITHOUT-DAG: st.param.v2.b64 [func_retval0+0], {[[PROXY0]], [[PROXY1]]}; - ; PTX-WITH-DAG: st.param.v2.b64 [func_retval0+0], {[[LD0]], [[LD1]]}; + ; PTX-WITHOUT-DAG: st.param.v2.b64 [func_retval0], {[[PROXY0]], [[PROXY1]]}; + ; PTX-WITH-DAG: st.param.v2.b64 [func_retval0], {[[LD0]], [[LD1]]}; %ret = call i128 @callee_i128() ret i128 %ret @@ -92,12 +92,12 @@ declare half @callee_f16() define half @check_f16() { ; PTX-LABEL: check_f16 ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}} - ; PTX-DAG: ld.param.b16 [[LD:%rs[0-9]+]], [retval0+0]; + ; PTX-DAG: ld.param.b16 [[LD:%rs[0-9]+]], [retval0]; ; PTX-DAG: } // callseq {{[0-9]+}} ; PTX-WITHOUT-DAG: mov.b16 [[PROXY:%rs[0-9]+]], [[LD]]; - ; PTX-WITHOUT-DAG: st.param.b16 [func_retval0+0], [[PROXY]]; - ; PTX-WITH-DAG: st.param.b16 [func_retval0+0], [[LD]]; + ; PTX-WITHOUT-DAG: st.param.b16 [func_retval0], [[PROXY]]; + ; PTX-WITH-DAG: st.param.b16 [func_retval0], [[LD]]; %ret = call half @callee_f16() ret half %ret @@ -107,12 +107,12 @@ declare float @callee_f32() define float @check_f32() { ; PTX-LABEL: check_f32 ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}} - ; PTX-DAG: ld.param.f32 [[LD:%f[0-9]+]], [retval0+0]; + ; PTX-DAG: ld.param.f32 [[LD:%f[0-9]+]], [retval0]; ; PTX-DAG: } // callseq {{[0-9]+}} ; PTX-WITHOUT-DAG: mov.f32 [[PROXY:%f[0-9]+]], [[LD]]; - ; PTX-WITHOUT-DAG: st.param.f32 [func_retval0+0], [[PROXY]]; - ; PTX-WITH-DAG: st.param.f32 [func_retval0+0], [[LD]]; + ; PTX-WITHOUT-DAG: st.param.f32 [func_retval0], [[PROXY]]; + ; PTX-WITH-DAG: st.param.f32 [func_retval0], [[LD]]; %ret = call float @callee_f32() ret float %ret @@ -122,12 +122,12 @@ declare double @callee_f64() define double @check_f64() { ; PTX-LABEL: check_f64 ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}} - ; PTX-DAG: ld.param.f64 [[LD:%fd[0-9]+]], [retval0+0]; + ; PTX-DAG: ld.param.f64 [[LD:%fd[0-9]+]], [retval0]; ; PTX-DAG: } // callseq {{[0-9]+}} ; PTX-WITHOUT-DAG: mov.f64 [[PROXY:%fd[0-9]+]], [[LD]]; - ; PTX-WITHOUT-DAG: st.param.f64 [func_retval0+0], [[PROXY]]; - ; PTX-WITH-DAG: st.param.f64 [func_retval0+0], [[LD]]; + ; PTX-WITHOUT-DAG: st.param.f64 [func_retval0], [[PROXY]]; + ; PTX-WITH-DAG: st.param.f64 [func_retval0], [[LD]]; %ret = call double @callee_f64() ret double %ret @@ -137,15 +137,15 @@ declare <4 x i32> @callee_vec_i32() define <4 x i32> @check_vec_i32() { ; PTX-LABEL: check_vec_i32 ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}} - ; PTX-DAG: ld.param.v4.b32 {[[LD0:%r[0-9]+]], [[LD1:%r[0-9]+]], [[LD2:%r[0-9]+]], [[LD3:%r[0-9]+]]}, [retval0+0]; + ; PTX-DAG: ld.param.v4.b32 {[[LD0:%r[0-9]+]], [[LD1:%r[0-9]+]], [[LD2:%r[0-9]+]], [[LD3:%r[0-9]+]]}, [retval0]; ; PTX-DAG: } // callseq {{[0-9]+}} ; PTX-WITHOUT-DAG: mov.b32 [[PROXY0:%r[0-9]+]], [[LD0]]; ; PTX-WITHOUT-DAG: mov.b32 [[PROXY1:%r[0-9]+]], [[LD1]]; ; PTX-WITHOUT-DAG: mov.b32 [[PROXY2:%r[0-9]+]], [[LD2]]; ; PTX-WITHOUT-DAG: mov.b32 [[PROXY3:%r[0-9]+]], [[LD3]]; - ; PTX-WITHOUT-DAG: st.param.v4.b32 [func_retval0+0], {[[PROXY0]], [[PROXY1]], [[PROXY2]], [[PROXY3]]}; - ; PTX-WITH-DAG: st.param.v4.b32 [func_retval0+0], {[[LD0]], [[LD1]], [[LD2]], [[LD3]]}; + ; PTX-WITHOUT-DAG: st.param.v4.b32 [func_retval0], {[[PROXY0]], [[PROXY1]], [[PROXY2]], [[PROXY3]]}; + ; PTX-WITH-DAG: st.param.v4.b32 [func_retval0], {[[LD0]], [[LD1]], [[LD2]], [[LD3]]}; %ret = call <4 x i32> @callee_vec_i32() ret <4 x i32> %ret @@ -155,12 +155,12 @@ declare <2 x half> @callee_vec_f16() define <2 x half> @check_vec_f16() { ; PTX-LABEL: check_vec_f16 ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}} - ; PTX-DAG: ld.param.b32 [[LD:%r[0-9]+]], [retval0+0]; + ; PTX-DAG: ld.param.b32 [[LD:%r[0-9]+]], [retval0]; ; PTX-DAG: } // callseq {{[0-9]+}} ; PTX-WITHOUT-DAG: mov.b32 [[PROXY:%r[0-9]+]], [[LD]]; - ; PTX-WITHOUT-DAG: st.param.b32 [func_retval0+0], [[PROXY]]; - ; PTX-WITH-DAG: st.param.b32 [func_retval0+0], [[LD]]; + ; PTX-WITHOUT-DAG: st.param.b32 [func_retval0], [[PROXY]]; + ; PTX-WITH-DAG: st.param.b32 [func_retval0], [[LD]]; %ret = call <2 x half> @callee_vec_f16() ret <2 x half> %ret @@ -170,13 +170,13 @@ declare <2 x double> @callee_vec_f64() define <2 x double> @check_vec_f64() { ; PTX-LABEL: check_vec_f64 ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}} - ; PTX-DAG: ld.param.v2.f64 {[[LD0:%fd[0-9]+]], [[LD1:%fd[0-9]+]]}, [retval0+0]; + ; PTX-DAG: ld.param.v2.f64 {[[LD0:%fd[0-9]+]], [[LD1:%fd[0-9]+]]}, [retval0]; ; PTX-DAG: } // callseq {{[0-9]+}} ; PTX-WITHOUT-DAG: mov.f64 [[PROXY0:%fd[0-9]+]], [[LD0]]; ; PTX-WITHOUT-DAG: mov.f64 [[PROXY1:%fd[0-9]+]], [[LD1]]; - ; PTX-WITHOUT-DAG: st.param.v2.f64 [func_retval0+0], {[[PROXY0]], [[PROXY1]]}; - ; PTX-WITH-DAG: st.param.v2.f64 [func_retval0+0], {[[LD0]], [[LD1]]}; + ; PTX-WITHOUT-DAG: st.param.v2.f64 [func_retval0], {[[PROXY0]], [[PROXY1]]}; + ; PTX-WITH-DAG: st.param.v2.f64 [func_retval0], {[[LD0]], [[LD1]]}; %ret = call <2 x double> @callee_vec_f64() ret <2 x double> %ret diff --git a/llvm/test/CodeGen/NVPTX/rcp-opt.ll b/llvm/test/CodeGen/NVPTX/rcp-opt.ll index e2443c27e8490a..ccc3db54009785 100644 --- a/llvm/test/CodeGen/NVPTX/rcp-opt.ll +++ b/llvm/test/CodeGen/NVPTX/rcp-opt.ll @@ -15,7 +15,7 @@ define double @test1(double %in) { ; CHECK-NEXT: ld.param.f64 %fd1, [test1_param_0]; ; CHECK-NEXT: rcp.rn.f64 %fd2, %fd1; ; CHECK-NEXT: neg.f64 %fd3, %fd2; -; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd3; +; CHECK-NEXT: st.param.f64 [func_retval0], %fd3; ; CHECK-NEXT: ret; %div = fdiv double 1.000000e+00, %in %neg = fsub double -0.000000e+00, %div @@ -33,7 +33,7 @@ define double @test2(double %in) { ; CHECK-NEXT: ld.param.f64 %fd1, [test2_param_0]; ; CHECK-NEXT: rcp.rn.f64 %fd2, %fd1; ; CHECK-NEXT: neg.f64 %fd3, %fd2; -; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd3; +; CHECK-NEXT: st.param.f64 [func_retval0], %fd3; ; CHECK-NEXT: ret; %div = fdiv double -1.000000e+00, %in ret double %div @@ -50,7 +50,7 @@ define double @test3(double %in) { ; CHECK-NEXT: ld.param.f64 %fd1, [test3_param_0]; ; CHECK-NEXT: rcp.rn.f64 %fd2, %fd1; ; CHECK-NEXT: neg.f64 %fd3, %fd2; -; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd3; +; CHECK-NEXT: st.param.f64 [func_retval0], %fd3; ; CHECK-NEXT: ret; %neg = fsub double -0.000000e+00, %in %div = fdiv double 1.000000e+00, %neg diff --git a/llvm/test/CodeGen/NVPTX/rotate.ll b/llvm/test/CodeGen/NVPTX/rotate.ll index 6586393f83d440..4174fd2f3ec2cc 100644 --- a/llvm/test/CodeGen/NVPTX/rotate.ll +++ b/llvm/test/CodeGen/NVPTX/rotate.ll @@ -31,7 +31,7 @@ define i32 @rotate32(i32 %a, i32 %b) { ; SM20-NEXT: and.b32 %r6, %r5, 31; ; SM20-NEXT: shr.u32 %r7, %r1, %r6; ; SM20-NEXT: or.b32 %r8, %r4, %r7; -; SM20-NEXT: st.param.b32 [func_retval0+0], %r8; +; SM20-NEXT: st.param.b32 [func_retval0], %r8; ; SM20-NEXT: ret; ; ; SM35-LABEL: rotate32( @@ -42,7 +42,7 @@ define i32 @rotate32(i32 %a, i32 %b) { ; SM35-NEXT: ld.param.u32 %r1, [rotate32_param_0]; ; SM35-NEXT: ld.param.u32 %r2, [rotate32_param_1]; ; SM35-NEXT: shf.l.wrap.b32 %r3, %r1, %r1, %r2; -; SM35-NEXT: st.param.b32 [func_retval0+0], %r3; +; SM35-NEXT: st.param.b32 [func_retval0], %r3; ; SM35-NEXT: ret; %val = tail call i32 @llvm.nvvm.rotate.b32(i32 %a, i32 %b) ret i32 %val @@ -65,7 +65,7 @@ define i64 @rotate64(i64 %a, i32 %b) { ; SM20-NEXT: and.b32 %r4, %r3, 63; ; SM20-NEXT: shr.u64 %rd3, %rd1, %r4; ; SM20-NEXT: or.b64 %rd4, %rd2, %rd3; -; SM20-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM20-NEXT: st.param.b64 [func_retval0], %rd4; ; SM20-NEXT: ret; ; ; SM35-LABEL: rotate64( @@ -82,7 +82,7 @@ define i64 @rotate64(i64 %a, i32 %b) { ; SM35-NEXT: and.b32 %r4, %r3, 63; ; SM35-NEXT: shr.u64 %rd3, %rd1, %r4; ; SM35-NEXT: or.b64 %rd4, %rd2, %rd3; -; SM35-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM35-NEXT: st.param.b64 [func_retval0], %rd4; ; SM35-NEXT: ret; %val = tail call i64 @llvm.nvvm.rotate.b64(i64 %a, i32 %b) ret i64 %val @@ -105,7 +105,7 @@ define i64 @rotateright64(i64 %a, i32 %b) { ; SM20-NEXT: and.b32 %r4, %r3, 63; ; SM20-NEXT: shl.b64 %rd3, %rd1, %r4; ; SM20-NEXT: or.b64 %rd4, %rd2, %rd3; -; SM20-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM20-NEXT: st.param.b64 [func_retval0], %rd4; ; SM20-NEXT: ret; ; ; SM35-LABEL: rotateright64( @@ -122,7 +122,7 @@ define i64 @rotateright64(i64 %a, i32 %b) { ; SM35-NEXT: and.b32 %r4, %r3, 63; ; SM35-NEXT: shl.b64 %rd3, %rd1, %r4; ; SM35-NEXT: or.b64 %rd4, %rd2, %rd3; -; SM35-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM35-NEXT: st.param.b64 [func_retval0], %rd4; ; SM35-NEXT: ret; %val = tail call i64 @llvm.nvvm.rotate.right.b64(i64 %a, i32 %b) ret i64 %val @@ -140,7 +140,7 @@ define i32 @rotl0(i32 %x) { ; SM20-NEXT: shr.u32 %r2, %r1, 24; ; SM20-NEXT: shl.b32 %r3, %r1, 8; ; SM20-NEXT: or.b32 %r4, %r3, %r2; -; SM20-NEXT: st.param.b32 [func_retval0+0], %r4; +; SM20-NEXT: st.param.b32 [func_retval0], %r4; ; SM20-NEXT: ret; ; ; SM35-LABEL: rotl0( @@ -150,7 +150,7 @@ define i32 @rotl0(i32 %x) { ; SM35-NEXT: // %bb.0: ; SM35-NEXT: ld.param.u32 %r1, [rotl0_param_0]; ; SM35-NEXT: shf.l.wrap.b32 %r2, %r1, %r1, 8; -; SM35-NEXT: st.param.b32 [func_retval0+0], %r2; +; SM35-NEXT: st.param.b32 [func_retval0], %r2; ; SM35-NEXT: ret; %t0 = shl i32 %x, 8 %t1 = lshr i32 %x, 24 @@ -174,7 +174,7 @@ define i64 @rotl64(i64 %a, i64 %n) { ; SM20-NEXT: and.b32 %r4, %r3, 63; ; SM20-NEXT: shr.u64 %rd3, %rd1, %r4; ; SM20-NEXT: or.b64 %rd4, %rd2, %rd3; -; SM20-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM20-NEXT: st.param.b64 [func_retval0], %rd4; ; SM20-NEXT: ret; ; ; SM35-LABEL: rotl64( @@ -191,7 +191,7 @@ define i64 @rotl64(i64 %a, i64 %n) { ; SM35-NEXT: and.b32 %r4, %r3, 63; ; SM35-NEXT: shr.u64 %rd3, %rd1, %r4; ; SM35-NEXT: or.b64 %rd4, %rd2, %rd3; -; SM35-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM35-NEXT: st.param.b64 [func_retval0], %rd4; ; SM35-NEXT: ret; %val = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %n) ret i64 %val @@ -208,7 +208,7 @@ define i64 @rotl64_imm(i64 %a) { ; SM20-NEXT: shr.u64 %rd2, %rd1, 62; ; SM20-NEXT: shl.b64 %rd3, %rd1, 2; ; SM20-NEXT: or.b64 %rd4, %rd3, %rd2; -; SM20-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM20-NEXT: st.param.b64 [func_retval0], %rd4; ; SM20-NEXT: ret; ; ; SM35-LABEL: rotl64_imm( @@ -220,7 +220,7 @@ define i64 @rotl64_imm(i64 %a) { ; SM35-NEXT: shr.u64 %rd2, %rd1, 62; ; SM35-NEXT: shl.b64 %rd3, %rd1, 2; ; SM35-NEXT: or.b64 %rd4, %rd3, %rd2; -; SM35-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM35-NEXT: st.param.b64 [func_retval0], %rd4; ; SM35-NEXT: ret; %val = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 66) ret i64 %val @@ -242,7 +242,7 @@ define i64 @rotr64(i64 %a, i64 %n) { ; SM20-NEXT: and.b32 %r4, %r3, 63; ; SM20-NEXT: shl.b64 %rd3, %rd1, %r4; ; SM20-NEXT: or.b64 %rd4, %rd2, %rd3; -; SM20-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM20-NEXT: st.param.b64 [func_retval0], %rd4; ; SM20-NEXT: ret; ; ; SM35-LABEL: rotr64( @@ -259,7 +259,7 @@ define i64 @rotr64(i64 %a, i64 %n) { ; SM35-NEXT: and.b32 %r4, %r3, 63; ; SM35-NEXT: shl.b64 %rd3, %rd1, %r4; ; SM35-NEXT: or.b64 %rd4, %rd2, %rd3; -; SM35-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM35-NEXT: st.param.b64 [func_retval0], %rd4; ; SM35-NEXT: ret; %val = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %n) ret i64 %val @@ -276,7 +276,7 @@ define i64 @rotr64_imm(i64 %a) { ; SM20-NEXT: shl.b64 %rd2, %rd1, 62; ; SM20-NEXT: shr.u64 %rd3, %rd1, 2; ; SM20-NEXT: or.b64 %rd4, %rd3, %rd2; -; SM20-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM20-NEXT: st.param.b64 [func_retval0], %rd4; ; SM20-NEXT: ret; ; ; SM35-LABEL: rotr64_imm( @@ -288,7 +288,7 @@ define i64 @rotr64_imm(i64 %a) { ; SM35-NEXT: shl.b64 %rd2, %rd1, 62; ; SM35-NEXT: shr.u64 %rd3, %rd1, 2; ; SM35-NEXT: or.b64 %rd4, %rd3, %rd2; -; SM35-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM35-NEXT: st.param.b64 [func_retval0], %rd4; ; SM35-NEXT: ret; %val = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 66) ret i64 %val @@ -310,7 +310,7 @@ define i32 @funnel_shift_right_32(i32 %a, i32 %b, i32 %c) { ; SM20-NEXT: and.b32 %r8, %r7, 31; ; SM20-NEXT: shl.b32 %r9, %r6, %r8; ; SM20-NEXT: or.b32 %r10, %r9, %r5; -; SM20-NEXT: st.param.b32 [func_retval0+0], %r10; +; SM20-NEXT: st.param.b32 [func_retval0], %r10; ; SM20-NEXT: ret; ; ; SM35-LABEL: funnel_shift_right_32( @@ -322,7 +322,7 @@ define i32 @funnel_shift_right_32(i32 %a, i32 %b, i32 %c) { ; SM35-NEXT: ld.param.u32 %r2, [funnel_shift_right_32_param_1]; ; SM35-NEXT: ld.param.u32 %r3, [funnel_shift_right_32_param_2]; ; SM35-NEXT: shf.r.wrap.b32 %r4, %r2, %r1, %r3; -; SM35-NEXT: st.param.b32 [func_retval0+0], %r4; +; SM35-NEXT: st.param.b32 [func_retval0], %r4; ; SM35-NEXT: ret; %val = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c) ret i32 %val @@ -344,7 +344,7 @@ define i32 @funnel_shift_left_32(i32 %a, i32 %b, i32 %c) { ; SM20-NEXT: and.b32 %r8, %r7, 31; ; SM20-NEXT: shr.u32 %r9, %r6, %r8; ; SM20-NEXT: or.b32 %r10, %r4, %r9; -; SM20-NEXT: st.param.b32 [func_retval0+0], %r10; +; SM20-NEXT: st.param.b32 [func_retval0], %r10; ; SM20-NEXT: ret; ; ; SM35-LABEL: funnel_shift_left_32( @@ -356,7 +356,7 @@ define i32 @funnel_shift_left_32(i32 %a, i32 %b, i32 %c) { ; SM35-NEXT: ld.param.u32 %r2, [funnel_shift_left_32_param_1]; ; SM35-NEXT: ld.param.u32 %r3, [funnel_shift_left_32_param_2]; ; SM35-NEXT: shf.l.wrap.b32 %r4, %r2, %r1, %r3; -; SM35-NEXT: st.param.b32 [func_retval0+0], %r4; +; SM35-NEXT: st.param.b32 [func_retval0], %r4; ; SM35-NEXT: ret; %val = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) ret i32 %val @@ -379,7 +379,7 @@ define i64 @funnel_shift_right_64(i64 %a, i64 %b, i64 %c) { ; SM20-NEXT: and.b32 %r4, %r3, 63; ; SM20-NEXT: shl.b64 %rd5, %rd4, %r4; ; SM20-NEXT: or.b64 %rd6, %rd5, %rd3; -; SM20-NEXT: st.param.b64 [func_retval0+0], %rd6; +; SM20-NEXT: st.param.b64 [func_retval0], %rd6; ; SM20-NEXT: ret; ; ; SM35-LABEL: funnel_shift_right_64( @@ -398,7 +398,7 @@ define i64 @funnel_shift_right_64(i64 %a, i64 %b, i64 %c) { ; SM35-NEXT: and.b32 %r4, %r3, 63; ; SM35-NEXT: shl.b64 %rd5, %rd4, %r4; ; SM35-NEXT: or.b64 %rd6, %rd5, %rd3; -; SM35-NEXT: st.param.b64 [func_retval0+0], %rd6; +; SM35-NEXT: st.param.b64 [func_retval0], %rd6; ; SM35-NEXT: ret; %val = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c) ret i64 %val @@ -421,7 +421,7 @@ define i64 @funnel_shift_left_64(i64 %a, i64 %b, i64 %c) { ; SM20-NEXT: and.b32 %r4, %r3, 63; ; SM20-NEXT: shr.u64 %rd5, %rd4, %r4; ; SM20-NEXT: or.b64 %rd6, %rd2, %rd5; -; SM20-NEXT: st.param.b64 [func_retval0+0], %rd6; +; SM20-NEXT: st.param.b64 [func_retval0], %rd6; ; SM20-NEXT: ret; ; ; SM35-LABEL: funnel_shift_left_64( @@ -440,7 +440,7 @@ define i64 @funnel_shift_left_64(i64 %a, i64 %b, i64 %c) { ; SM35-NEXT: and.b32 %r4, %r3, 63; ; SM35-NEXT: shr.u64 %rd5, %rd4, %r4; ; SM35-NEXT: or.b64 %rd6, %rd2, %rd5; -; SM35-NEXT: st.param.b64 [func_retval0+0], %rd6; +; SM35-NEXT: st.param.b64 [func_retval0], %rd6; ; SM35-NEXT: ret; %val = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c) ret i64 %val diff --git a/llvm/test/CodeGen/NVPTX/rotate_64.ll b/llvm/test/CodeGen/NVPTX/rotate_64.ll index 05fdb02ac74794..d4851f55d93c99 100644 --- a/llvm/test/CodeGen/NVPTX/rotate_64.ll +++ b/llvm/test/CodeGen/NVPTX/rotate_64.ll @@ -15,7 +15,7 @@ define i64 @rotate64(i64 %a, i32 %b) { ; CHECK-NEXT: shr.u64 %rd2, %rd1, 61; ; CHECK-NEXT: shl.b64 %rd3, %rd1, 3; ; CHECK-NEXT: or.b64 %rd4, %rd3, %rd2; -; CHECK-NEXT: st.param.b64 [func_retval0+0], %rd4; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd4; ; CHECK-NEXT: ret; %val = tail call i64 @llvm.nvvm.rotate.b64(i64 %a, i32 3) ret i64 %val @@ -31,7 +31,7 @@ define i64 @rotateright64(i64 %a, i32 %b) { ; CHECK-NEXT: shl.b64 %rd2, %rd1, 61; ; CHECK-NEXT: shr.u64 %rd3, %rd1, 3; ; CHECK-NEXT: or.b64 %rd4, %rd3, %rd2; -; CHECK-NEXT: st.param.b64 [func_retval0+0], %rd4; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd4; ; CHECK-NEXT: ret; %val = tail call i64 @llvm.nvvm.rotate.right.b64(i64 %a, i32 3) ret i64 %val diff --git a/llvm/test/CodeGen/NVPTX/sad-intrins.ll b/llvm/test/CodeGen/NVPTX/sad-intrins.ll index a09413bc4e5242..8258dca605e9ef 100644 --- a/llvm/test/CodeGen/NVPTX/sad-intrins.ll +++ b/llvm/test/CodeGen/NVPTX/sad-intrins.ll @@ -14,7 +14,7 @@ define i16 @test_sad_i16(i16 %x, i16 %y, i16 %z) { ; CHECK-NEXT: ld.param.u16 %rs3, [test_sad_i16_param_2]; ; CHECK-NEXT: sad.s16 %rs4, %rs1, %rs2, %rs3; ; CHECK-NEXT: cvt.u32.u16 %r1, %rs4; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; ; CHECK-NEXT: ret; %1 = call i16 @llvm.nvvm.sad.s(i16 %x, i16 %y, i16 %z) ret i16 %1 @@ -32,7 +32,7 @@ define i16 @test_sad_u16(i16 %x, i16 %y, i16 %z) { ; CHECK-NEXT: ld.param.u16 %rs3, [test_sad_u16_param_2]; ; CHECK-NEXT: sad.u16 %rs4, %rs1, %rs2, %rs3; ; CHECK-NEXT: cvt.u32.u16 %r1, %rs4; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; ; CHECK-NEXT: ret; %1 = call i16 @llvm.nvvm.sad.us(i16 %x, i16 %y, i16 %z) ret i16 %1 @@ -48,7 +48,7 @@ define i32 @test_sad_i32(i32 %x, i32 %y, i32 %z) { ; CHECK-NEXT: ld.param.u32 %r2, [test_sad_i32_param_1]; ; CHECK-NEXT: ld.param.u32 %r3, [test_sad_i32_param_2]; ; CHECK-NEXT: sad.s32 %r4, %r1, %r2, %r3; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r4; +; CHECK-NEXT: st.param.b32 [func_retval0], %r4; ; CHECK-NEXT: ret; %1 = call i32 @llvm.nvvm.sad.i(i32 %x, i32 %y, i32 %z) ret i32 %1 @@ -64,7 +64,7 @@ define i32 @test_sad_u32(i32 %x, i32 %y, i32 %z) { ; CHECK-NEXT: ld.param.u32 %r2, [test_sad_u32_param_1]; ; CHECK-NEXT: ld.param.u32 %r3, [test_sad_u32_param_2]; ; CHECK-NEXT: sad.u32 %r4, %r1, %r2, %r3; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r4; +; CHECK-NEXT: st.param.b32 [func_retval0], %r4; ; CHECK-NEXT: ret; %1 = call i32 @llvm.nvvm.sad.ui(i32 %x, i32 %y, i32 %z) ret i32 %1 @@ -80,7 +80,7 @@ define i64 @test_sad_i64(i64 %x, i64 %y, i64 %z) { ; CHECK-NEXT: ld.param.u64 %rd2, [test_sad_i64_param_1]; ; CHECK-NEXT: ld.param.u64 %rd3, [test_sad_i64_param_2]; ; CHECK-NEXT: sad.s64 %rd4, %rd1, %rd2, %rd3; -; CHECK-NEXT: st.param.b64 [func_retval0+0], %rd4; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd4; ; CHECK-NEXT: ret; %1 = call i64 @llvm.nvvm.sad.ll(i64 %x, i64 %y, i64 %z) ret i64 %1 @@ -96,7 +96,7 @@ define i64 @test_sad_u64(i64 %x, i64 %y, i64 %z) { ; CHECK-NEXT: ld.param.u64 %rd2, [test_sad_u64_param_1]; ; CHECK-NEXT: ld.param.u64 %rd3, [test_sad_u64_param_2]; ; CHECK-NEXT: sad.u64 %rd4, %rd1, %rd2, %rd3; -; CHECK-NEXT: st.param.b64 [func_retval0+0], %rd4; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd4; ; CHECK-NEXT: ret; %1 = call i64 @llvm.nvvm.sad.ull(i64 %x, i64 %y, i64 %z) ret i64 %1 diff --git a/llvm/test/CodeGen/NVPTX/sext-setcc.ll b/llvm/test/CodeGen/NVPTX/sext-setcc.ll index f471d47077cf0d..0cb0c1ba8c6bd0 100644 --- a/llvm/test/CodeGen/NVPTX/sext-setcc.ll +++ b/llvm/test/CodeGen/NVPTX/sext-setcc.ll @@ -19,7 +19,7 @@ define <2 x i16> @sext_setcc_v2i1_to_v2i16(ptr %p) { ; CHECK-NEXT: selp.s16 %rs3, -1, 0, %p2; ; CHECK-NEXT: selp.s16 %rs4, -1, 0, %p1; ; CHECK-NEXT: mov.b32 %r2, {%rs4, %rs3}; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; entry: %v = load <2 x i16>, ptr %p, align 4 @@ -62,7 +62,7 @@ define <4 x i8> @sext_setcc_v4i1_to_v4i8(ptr %p) { ; CHECK-NEXT: bfi.b32 %r10, %r9, %r8, 16, 8; ; CHECK-NEXT: selp.s32 %r11, -1, 0, %p1; ; CHECK-NEXT: bfi.b32 %r12, %r11, %r10, 24, 8; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r12; +; CHECK-NEXT: st.param.b32 [func_retval0], %r12; ; CHECK-NEXT: ret; entry: %v = load <4 x i8>, ptr %p, align 4 diff --git a/llvm/test/CodeGen/NVPTX/st-param-imm.ll b/llvm/test/CodeGen/NVPTX/st-param-imm.ll index 29f27c1ba6cdcf..b178f5e05296cf 100644 --- a/llvm/test/CodeGen/NVPTX/st-param-imm.ll +++ b/llvm/test/CodeGen/NVPTX/st-param-imm.ll @@ -26,7 +26,7 @@ define void @st_param_i8_i16() { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .align 2 .b8 param0[4]; -; CHECK-NEXT: st.param.b8 [param0+0], 1; +; CHECK-NEXT: st.param.b8 [param0], 1; ; CHECK-NEXT: st.param.b16 [param0+2], 2; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_i8_i16, @@ -47,7 +47,7 @@ define void @st_param_i32() { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: { // callseq 1, 0 ; CHECK-NEXT: .param .b32 param0; -; CHECK-NEXT: st.param.b32 [param0+0], 3; +; CHECK-NEXT: st.param.b32 [param0], 3; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_i32, ; CHECK-NEXT: ( @@ -67,7 +67,7 @@ define void @st_param_i64() { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: { // callseq 2, 0 ; CHECK-NEXT: .param .b64 param0; -; CHECK-NEXT: st.param.b64 [param0+0], 4; +; CHECK-NEXT: st.param.b64 [param0], 4; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_i64, ; CHECK-NEXT: ( @@ -87,7 +87,7 @@ define void @st_param_f32() { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: { // callseq 3, 0 ; CHECK-NEXT: .param .b32 param0; -; CHECK-NEXT: st.param.f32 [param0+0], 0f40A00000; +; CHECK-NEXT: st.param.f32 [param0], 0f40A00000; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_f32, ; CHECK-NEXT: ( @@ -107,7 +107,7 @@ define void @st_param_f64() { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: { // callseq 4, 0 ; CHECK-NEXT: .param .b64 param0; -; CHECK-NEXT: st.param.f64 [param0+0], 0d4018000000000000; +; CHECK-NEXT: st.param.f64 [param0], 0d4018000000000000; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_f64, ; CHECK-NEXT: ( @@ -133,7 +133,7 @@ define void @st_param_v2_i8_ii() { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: { // callseq 5, 0 ; CHECK-NEXT: .param .align 2 .b8 param0[2]; -; CHECK-NEXT: st.param.v2.b8 [param0+0], {1, 2}; +; CHECK-NEXT: st.param.v2.b8 [param0], {1, 2}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v2_i8, ; CHECK-NEXT: ( @@ -153,7 +153,7 @@ define void @st_param_v2_i8_ir(i8 %val) { ; CHECK-NEXT: ld.param.u8 %rs1, [st_param_v2_i8_ir_param_0]; ; CHECK-NEXT: { // callseq 6, 0 ; CHECK-NEXT: .param .align 2 .b8 param0[2]; -; CHECK-NEXT: st.param.v2.b8 [param0+0], {1, %rs1}; +; CHECK-NEXT: st.param.v2.b8 [param0], {1, %rs1}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v2_i8, ; CHECK-NEXT: ( @@ -175,7 +175,7 @@ define void @st_param_v2_i8_ri(i8 %val) { ; CHECK-NEXT: ld.param.u8 %rs1, [st_param_v2_i8_ri_param_0]; ; CHECK-NEXT: { // callseq 7, 0 ; CHECK-NEXT: .param .align 2 .b8 param0[2]; -; CHECK-NEXT: st.param.v2.b8 [param0+0], {%rs1, 2}; +; CHECK-NEXT: st.param.v2.b8 [param0], {%rs1, 2}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v2_i8, ; CHECK-NEXT: ( @@ -197,7 +197,7 @@ define void @st_param_v2_i16_ii() { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: { // callseq 8, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v2.b16 [param0+0], {1, 2}; +; CHECK-NEXT: st.param.v2.b16 [param0], {1, 2}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v2_i16, ; CHECK-NEXT: ( @@ -217,7 +217,7 @@ define void @st_param_v2_i16_ir(i16 %val) { ; CHECK-NEXT: ld.param.u16 %rs1, [st_param_v2_i16_ir_param_0]; ; CHECK-NEXT: { // callseq 9, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v2.b16 [param0+0], {1, %rs1}; +; CHECK-NEXT: st.param.v2.b16 [param0], {1, %rs1}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v2_i16, ; CHECK-NEXT: ( @@ -239,7 +239,7 @@ define void @st_param_v2_i16_ri(i16 %val) { ; CHECK-NEXT: ld.param.u16 %rs1, [st_param_v2_i16_ri_param_0]; ; CHECK-NEXT: { // callseq 10, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v2.b16 [param0+0], {%rs1, 2}; +; CHECK-NEXT: st.param.v2.b16 [param0], {%rs1, 2}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v2_i16, ; CHECK-NEXT: ( @@ -261,7 +261,7 @@ define void @st_param_v2_i32_ii() { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: { // callseq 11, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v2.b32 [param0+0], {1, 2}; +; CHECK-NEXT: st.param.v2.b32 [param0], {1, 2}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v2_i32, ; CHECK-NEXT: ( @@ -281,7 +281,7 @@ define void @st_param_v2_i32_ir(i32 %val) { ; CHECK-NEXT: ld.param.u32 %r1, [st_param_v2_i32_ir_param_0]; ; CHECK-NEXT: { // callseq 12, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v2.b32 [param0+0], {1, %r1}; +; CHECK-NEXT: st.param.v2.b32 [param0], {1, %r1}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v2_i32, ; CHECK-NEXT: ( @@ -303,7 +303,7 @@ define void @st_param_v2_i32_ri(i32 %val) { ; CHECK-NEXT: ld.param.u32 %r1, [st_param_v2_i32_ri_param_0]; ; CHECK-NEXT: { // callseq 13, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v2.b32 [param0+0], {%r1, 2}; +; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, 2}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v2_i32, ; CHECK-NEXT: ( @@ -325,7 +325,7 @@ define void @st_param_v2_i64_ii() { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: { // callseq 14, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; -; CHECK-NEXT: st.param.v2.b64 [param0+0], {1, 2}; +; CHECK-NEXT: st.param.v2.b64 [param0], {1, 2}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v2_i64, ; CHECK-NEXT: ( @@ -345,7 +345,7 @@ define void @st_param_v2_i64_ir(i64 %val) { ; CHECK-NEXT: ld.param.u64 %rd1, [st_param_v2_i64_ir_param_0]; ; CHECK-NEXT: { // callseq 15, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; -; CHECK-NEXT: st.param.v2.b64 [param0+0], {1, %rd1}; +; CHECK-NEXT: st.param.v2.b64 [param0], {1, %rd1}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v2_i64, ; CHECK-NEXT: ( @@ -367,7 +367,7 @@ define void @st_param_v2_i64_ri(i64 %val) { ; CHECK-NEXT: ld.param.u64 %rd1, [st_param_v2_i64_ri_param_0]; ; CHECK-NEXT: { // callseq 16, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; -; CHECK-NEXT: st.param.v2.b64 [param0+0], {%rd1, 2}; +; CHECK-NEXT: st.param.v2.b64 [param0], {%rd1, 2}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v2_i64, ; CHECK-NEXT: ( @@ -389,7 +389,7 @@ define void @st_param_v2_f32_ii(float %val) { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: { // callseq 17, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v2.f32 [param0+0], {0f3F800000, 0f40000000}; +; CHECK-NEXT: st.param.v2.f32 [param0], {0f3F800000, 0f40000000}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v2_f32, ; CHECK-NEXT: ( @@ -409,7 +409,7 @@ define void @st_param_v2_f32_ir(float %val) { ; CHECK-NEXT: ld.param.f32 %f1, [st_param_v2_f32_ir_param_0]; ; CHECK-NEXT: { // callseq 18, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v2.f32 [param0+0], {0f3F800000, %f1}; +; CHECK-NEXT: st.param.v2.f32 [param0], {0f3F800000, %f1}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v2_f32, ; CHECK-NEXT: ( @@ -431,7 +431,7 @@ define void @st_param_v2_f32_ri(float %val) { ; CHECK-NEXT: ld.param.f32 %f1, [st_param_v2_f32_ri_param_0]; ; CHECK-NEXT: { // callseq 19, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v2.f32 [param0+0], {%f1, 0f40000000}; +; CHECK-NEXT: st.param.v2.f32 [param0], {%f1, 0f40000000}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v2_f32, ; CHECK-NEXT: ( @@ -453,7 +453,7 @@ define void @st_param_v2_f64_ii(double %val) { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: { // callseq 20, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; -; CHECK-NEXT: st.param.v2.f64 [param0+0], {0d3FF0000000000000, 0d4000000000000000}; +; CHECK-NEXT: st.param.v2.f64 [param0], {0d3FF0000000000000, 0d4000000000000000}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v2_f64, ; CHECK-NEXT: ( @@ -473,7 +473,7 @@ define void @st_param_v2_f64_ir(double %val) { ; CHECK-NEXT: ld.param.f64 %fd1, [st_param_v2_f64_ir_param_0]; ; CHECK-NEXT: { // callseq 21, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; -; CHECK-NEXT: st.param.v2.f64 [param0+0], {0d3FF0000000000000, %fd1}; +; CHECK-NEXT: st.param.v2.f64 [param0], {0d3FF0000000000000, %fd1}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v2_f64, ; CHECK-NEXT: ( @@ -495,7 +495,7 @@ define void @st_param_v2_f64_ri(double %val) { ; CHECK-NEXT: ld.param.f64 %fd1, [st_param_v2_f64_ri_param_0]; ; CHECK-NEXT: { // callseq 22, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; -; CHECK-NEXT: st.param.v2.f64 [param0+0], {%fd1, 0d4000000000000000}; +; CHECK-NEXT: st.param.v2.f64 [param0], {%fd1, 0d4000000000000000}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v2_f64, ; CHECK-NEXT: ( @@ -524,7 +524,7 @@ define void @st_param_v4_i8_iiii() { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: { // callseq 23, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0+0], {1, 2, 3, 4}; +; CHECK-NEXT: st.param.v4.b8 [param0], {1, 2, 3, 4}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i8, ; CHECK-NEXT: ( @@ -546,7 +546,7 @@ define void @st_param_v4_i8_irrr(i8 %b, i8 %c, i8 %d) { ; CHECK-NEXT: ld.param.u8 %rs3, [st_param_v4_i8_irrr_param_2]; ; CHECK-NEXT: { // callseq 24, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0+0], {1, %rs1, %rs2, %rs3}; +; CHECK-NEXT: st.param.v4.b8 [param0], {1, %rs1, %rs2, %rs3}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i8, ; CHECK-NEXT: ( @@ -572,7 +572,7 @@ define void @st_param_v4_i8_rirr(i8 %a, i8 %c, i8 %d) { ; CHECK-NEXT: ld.param.u8 %rs3, [st_param_v4_i8_rirr_param_2]; ; CHECK-NEXT: { // callseq 25, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0+0], {%rs1, 2, %rs2, %rs3}; +; CHECK-NEXT: st.param.v4.b8 [param0], {%rs1, 2, %rs2, %rs3}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i8, ; CHECK-NEXT: ( @@ -598,7 +598,7 @@ define void @st_param_v4_i8_rrir(i8 %a, i8 %b, i8 %d) { ; CHECK-NEXT: ld.param.u8 %rs3, [st_param_v4_i8_rrir_param_2]; ; CHECK-NEXT: { // callseq 26, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0+0], {%rs1, %rs2, 3, %rs3}; +; CHECK-NEXT: st.param.v4.b8 [param0], {%rs1, %rs2, 3, %rs3}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i8, ; CHECK-NEXT: ( @@ -624,7 +624,7 @@ define void @st_param_v4_i8_rrri(i8 %a, i8 %b, i8 %c) { ; CHECK-NEXT: ld.param.u8 %rs3, [st_param_v4_i8_rrri_param_2]; ; CHECK-NEXT: { // callseq 27, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0+0], {%rs1, %rs2, %rs3, 4}; +; CHECK-NEXT: st.param.v4.b8 [param0], {%rs1, %rs2, %rs3, 4}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i8, ; CHECK-NEXT: ( @@ -649,7 +649,7 @@ define void @st_param_v4_i8_iirr(i8 %c, i8 %d) { ; CHECK-NEXT: ld.param.u8 %rs2, [st_param_v4_i8_iirr_param_1]; ; CHECK-NEXT: { // callseq 28, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0+0], {1, 2, %rs1, %rs2}; +; CHECK-NEXT: st.param.v4.b8 [param0], {1, 2, %rs1, %rs2}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i8, ; CHECK-NEXT: ( @@ -674,7 +674,7 @@ define void @st_param_v4_i8_irir(i8 %b, i8 %d) { ; CHECK-NEXT: ld.param.u8 %rs2, [st_param_v4_i8_irir_param_1]; ; CHECK-NEXT: { // callseq 29, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0+0], {1, %rs1, 3, %rs2}; +; CHECK-NEXT: st.param.v4.b8 [param0], {1, %rs1, 3, %rs2}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i8, ; CHECK-NEXT: ( @@ -699,7 +699,7 @@ define void @st_param_v4_i8_irri(i8 %b, i8 %c) { ; CHECK-NEXT: ld.param.u8 %rs2, [st_param_v4_i8_irri_param_1]; ; CHECK-NEXT: { // callseq 30, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0+0], {1, %rs1, %rs2, 4}; +; CHECK-NEXT: st.param.v4.b8 [param0], {1, %rs1, %rs2, 4}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i8, ; CHECK-NEXT: ( @@ -724,7 +724,7 @@ define void @st_param_v4_i8_riir(i8 %a, i8 %d) { ; CHECK-NEXT: ld.param.u8 %rs2, [st_param_v4_i8_riir_param_1]; ; CHECK-NEXT: { // callseq 31, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0+0], {%rs1, 2, 3, %rs2}; +; CHECK-NEXT: st.param.v4.b8 [param0], {%rs1, 2, 3, %rs2}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i8, ; CHECK-NEXT: ( @@ -749,7 +749,7 @@ define void @st_param_v4_i8_riri(i8 %a, i8 %c) { ; CHECK-NEXT: ld.param.u8 %rs2, [st_param_v4_i8_riri_param_1]; ; CHECK-NEXT: { // callseq 32, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0+0], {%rs1, 2, %rs2, 4}; +; CHECK-NEXT: st.param.v4.b8 [param0], {%rs1, 2, %rs2, 4}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i8, ; CHECK-NEXT: ( @@ -774,7 +774,7 @@ define void @st_param_v4_i8_rrii(i8 %a, i8 %b) { ; CHECK-NEXT: ld.param.u8 %rs2, [st_param_v4_i8_rrii_param_1]; ; CHECK-NEXT: { // callseq 33, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0+0], {%rs1, %rs2, 3, 4}; +; CHECK-NEXT: st.param.v4.b8 [param0], {%rs1, %rs2, 3, 4}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i8, ; CHECK-NEXT: ( @@ -798,7 +798,7 @@ define void @st_param_v4_i8_iiir(i8 %d) { ; CHECK-NEXT: ld.param.u8 %rs1, [st_param_v4_i8_iiir_param_0]; ; CHECK-NEXT: { // callseq 34, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0+0], {1, 2, 3, %rs1}; +; CHECK-NEXT: st.param.v4.b8 [param0], {1, 2, 3, %rs1}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i8, ; CHECK-NEXT: ( @@ -822,7 +822,7 @@ define void @st_param_v4_i8_iiri(i8 %c) { ; CHECK-NEXT: ld.param.u8 %rs1, [st_param_v4_i8_iiri_param_0]; ; CHECK-NEXT: { // callseq 35, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0+0], {1, 2, %rs1, 4}; +; CHECK-NEXT: st.param.v4.b8 [param0], {1, 2, %rs1, 4}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i8, ; CHECK-NEXT: ( @@ -846,7 +846,7 @@ define void @st_param_v4_i8_irii(i8 %b) { ; CHECK-NEXT: ld.param.u8 %rs1, [st_param_v4_i8_irii_param_0]; ; CHECK-NEXT: { // callseq 36, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0+0], {1, %rs1, 3, 4}; +; CHECK-NEXT: st.param.v4.b8 [param0], {1, %rs1, 3, 4}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i8, ; CHECK-NEXT: ( @@ -870,7 +870,7 @@ define void @st_param_v4_i8_riii(i8 %a) { ; CHECK-NEXT: ld.param.u8 %rs1, [st_param_v4_i8_riii_param_0]; ; CHECK-NEXT: { // callseq 37, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0+0], {%rs1, 2, 3, 4}; +; CHECK-NEXT: st.param.v4.b8 [param0], {%rs1, 2, 3, 4}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i8, ; CHECK-NEXT: ( @@ -894,7 +894,7 @@ define void @st_param_v4_i16_iiii() { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: { // callseq 38, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v4.b16 [param0+0], {1, 2, 3, 4}; +; CHECK-NEXT: st.param.v4.b16 [param0], {1, 2, 3, 4}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i16, ; CHECK-NEXT: ( @@ -916,7 +916,7 @@ define void @st_param_v4_i16_irrr(i16 %b, i16 %c, i16 %d) { ; CHECK-NEXT: ld.param.u16 %rs3, [st_param_v4_i16_irrr_param_2]; ; CHECK-NEXT: { // callseq 39, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v4.b16 [param0+0], {1, %rs1, %rs2, %rs3}; +; CHECK-NEXT: st.param.v4.b16 [param0], {1, %rs1, %rs2, %rs3}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i16, ; CHECK-NEXT: ( @@ -942,7 +942,7 @@ define void @st_param_v4_i16_rirr(i16 %a, i16 %c, i16 %d) { ; CHECK-NEXT: ld.param.u16 %rs3, [st_param_v4_i16_rirr_param_2]; ; CHECK-NEXT: { // callseq 40, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v4.b16 [param0+0], {%rs1, 2, %rs2, %rs3}; +; CHECK-NEXT: st.param.v4.b16 [param0], {%rs1, 2, %rs2, %rs3}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i16, ; CHECK-NEXT: ( @@ -968,7 +968,7 @@ define void @st_param_v4_i16_rrir(i16 %a, i16 %b, i16 %d) { ; CHECK-NEXT: ld.param.u16 %rs3, [st_param_v4_i16_rrir_param_2]; ; CHECK-NEXT: { // callseq 41, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v4.b16 [param0+0], {%rs1, %rs2, 3, %rs3}; +; CHECK-NEXT: st.param.v4.b16 [param0], {%rs1, %rs2, 3, %rs3}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i16, ; CHECK-NEXT: ( @@ -994,7 +994,7 @@ define void @st_param_v4_i16_rrri(i16 %a, i16 %b, i16 %c) { ; CHECK-NEXT: ld.param.u16 %rs3, [st_param_v4_i16_rrri_param_2]; ; CHECK-NEXT: { // callseq 42, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v4.b16 [param0+0], {%rs1, %rs2, %rs3, 4}; +; CHECK-NEXT: st.param.v4.b16 [param0], {%rs1, %rs2, %rs3, 4}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i16, ; CHECK-NEXT: ( @@ -1019,7 +1019,7 @@ define void @st_param_v4_i16_iirr(i16 %c, i16 %d) { ; CHECK-NEXT: ld.param.u16 %rs2, [st_param_v4_i16_iirr_param_1]; ; CHECK-NEXT: { // callseq 43, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v4.b16 [param0+0], {1, 2, %rs1, %rs2}; +; CHECK-NEXT: st.param.v4.b16 [param0], {1, 2, %rs1, %rs2}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i16, ; CHECK-NEXT: ( @@ -1044,7 +1044,7 @@ define void @st_param_v4_i16_irir(i16 %b, i16 %d) { ; CHECK-NEXT: ld.param.u16 %rs2, [st_param_v4_i16_irir_param_1]; ; CHECK-NEXT: { // callseq 44, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v4.b16 [param0+0], {1, %rs1, 3, %rs2}; +; CHECK-NEXT: st.param.v4.b16 [param0], {1, %rs1, 3, %rs2}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i16, ; CHECK-NEXT: ( @@ -1069,7 +1069,7 @@ define void @st_param_v4_i16_irri(i16 %b, i16 %c) { ; CHECK-NEXT: ld.param.u16 %rs2, [st_param_v4_i16_irri_param_1]; ; CHECK-NEXT: { // callseq 45, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v4.b16 [param0+0], {1, %rs1, %rs2, 4}; +; CHECK-NEXT: st.param.v4.b16 [param0], {1, %rs1, %rs2, 4}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i16, ; CHECK-NEXT: ( @@ -1094,7 +1094,7 @@ define void @st_param_v4_i16_riir(i16 %a, i16 %d) { ; CHECK-NEXT: ld.param.u16 %rs2, [st_param_v4_i16_riir_param_1]; ; CHECK-NEXT: { // callseq 46, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v4.b16 [param0+0], {%rs1, 2, 3, %rs2}; +; CHECK-NEXT: st.param.v4.b16 [param0], {%rs1, 2, 3, %rs2}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i16, ; CHECK-NEXT: ( @@ -1119,7 +1119,7 @@ define void @st_param_v4_i16_riri(i16 %a, i16 %c) { ; CHECK-NEXT: ld.param.u16 %rs2, [st_param_v4_i16_riri_param_1]; ; CHECK-NEXT: { // callseq 47, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v4.b16 [param0+0], {%rs1, 2, %rs2, 4}; +; CHECK-NEXT: st.param.v4.b16 [param0], {%rs1, 2, %rs2, 4}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i16, ; CHECK-NEXT: ( @@ -1144,7 +1144,7 @@ define void @st_param_v4_i16_rrii(i16 %a, i16 %b) { ; CHECK-NEXT: ld.param.u16 %rs2, [st_param_v4_i16_rrii_param_1]; ; CHECK-NEXT: { // callseq 48, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v4.b16 [param0+0], {%rs1, %rs2, 3, 4}; +; CHECK-NEXT: st.param.v4.b16 [param0], {%rs1, %rs2, 3, 4}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i16, ; CHECK-NEXT: ( @@ -1168,7 +1168,7 @@ define void @st_param_v4_i16_iiir(i16 %d) { ; CHECK-NEXT: ld.param.u16 %rs1, [st_param_v4_i16_iiir_param_0]; ; CHECK-NEXT: { // callseq 49, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v4.b16 [param0+0], {1, 2, 3, %rs1}; +; CHECK-NEXT: st.param.v4.b16 [param0], {1, 2, 3, %rs1}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i16, ; CHECK-NEXT: ( @@ -1192,7 +1192,7 @@ define void @st_param_v4_i16_iiri(i16 %c) { ; CHECK-NEXT: ld.param.u16 %rs1, [st_param_v4_i16_iiri_param_0]; ; CHECK-NEXT: { // callseq 50, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v4.b16 [param0+0], {1, 2, %rs1, 4}; +; CHECK-NEXT: st.param.v4.b16 [param0], {1, 2, %rs1, 4}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i16, ; CHECK-NEXT: ( @@ -1216,7 +1216,7 @@ define void @st_param_v4_i16_irii(i16 %b) { ; CHECK-NEXT: ld.param.u16 %rs1, [st_param_v4_i16_irii_param_0]; ; CHECK-NEXT: { // callseq 51, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v4.b16 [param0+0], {1, %rs1, 3, 4}; +; CHECK-NEXT: st.param.v4.b16 [param0], {1, %rs1, 3, 4}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i16, ; CHECK-NEXT: ( @@ -1240,7 +1240,7 @@ define void @st_param_v4_i16_riii(i16 %a) { ; CHECK-NEXT: ld.param.u16 %rs1, [st_param_v4_i16_riii_param_0]; ; CHECK-NEXT: { // callseq 52, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v4.b16 [param0+0], {%rs1, 2, 3, 4}; +; CHECK-NEXT: st.param.v4.b16 [param0], {%rs1, 2, 3, 4}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i16, ; CHECK-NEXT: ( @@ -1264,7 +1264,7 @@ define void @st_param_v4_i32_iiii() { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: { // callseq 53, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; -; CHECK-NEXT: st.param.v4.b32 [param0+0], {1, 2, 3, 4}; +; CHECK-NEXT: st.param.v4.b32 [param0], {1, 2, 3, 4}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i32, ; CHECK-NEXT: ( @@ -1286,7 +1286,7 @@ define void @st_param_v4_i32_irrr(i32 %b, i32 %c, i32 %d) { ; CHECK-NEXT: ld.param.u32 %r3, [st_param_v4_i32_irrr_param_2]; ; CHECK-NEXT: { // callseq 54, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; -; CHECK-NEXT: st.param.v4.b32 [param0+0], {1, %r1, %r2, %r3}; +; CHECK-NEXT: st.param.v4.b32 [param0], {1, %r1, %r2, %r3}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i32, ; CHECK-NEXT: ( @@ -1312,7 +1312,7 @@ define void @st_param_v4_i32_rirr(i32 %a, i32 %c, i32 %d) { ; CHECK-NEXT: ld.param.u32 %r3, [st_param_v4_i32_rirr_param_2]; ; CHECK-NEXT: { // callseq 55, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; -; CHECK-NEXT: st.param.v4.b32 [param0+0], {%r1, 2, %r2, %r3}; +; CHECK-NEXT: st.param.v4.b32 [param0], {%r1, 2, %r2, %r3}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i32, ; CHECK-NEXT: ( @@ -1338,7 +1338,7 @@ define void @st_param_v4_i32_rrir(i32 %a, i32 %b, i32 %d) { ; CHECK-NEXT: ld.param.u32 %r3, [st_param_v4_i32_rrir_param_2]; ; CHECK-NEXT: { // callseq 56, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; -; CHECK-NEXT: st.param.v4.b32 [param0+0], {%r1, %r2, 3, %r3}; +; CHECK-NEXT: st.param.v4.b32 [param0], {%r1, %r2, 3, %r3}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i32, ; CHECK-NEXT: ( @@ -1364,7 +1364,7 @@ define void @st_param_v4_i32_rrri(i32 %a, i32 %b, i32 %c) { ; CHECK-NEXT: ld.param.u32 %r3, [st_param_v4_i32_rrri_param_2]; ; CHECK-NEXT: { // callseq 57, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; -; CHECK-NEXT: st.param.v4.b32 [param0+0], {%r1, %r2, %r3, 4}; +; CHECK-NEXT: st.param.v4.b32 [param0], {%r1, %r2, %r3, 4}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i32, ; CHECK-NEXT: ( @@ -1389,7 +1389,7 @@ define void @st_param_v4_i32_iirr(i32 %c, i32 %d) { ; CHECK-NEXT: ld.param.u32 %r2, [st_param_v4_i32_iirr_param_1]; ; CHECK-NEXT: { // callseq 58, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; -; CHECK-NEXT: st.param.v4.b32 [param0+0], {1, 2, %r1, %r2}; +; CHECK-NEXT: st.param.v4.b32 [param0], {1, 2, %r1, %r2}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i32, ; CHECK-NEXT: ( @@ -1414,7 +1414,7 @@ define void @st_param_v4_i32_irir(i32 %b, i32 %d) { ; CHECK-NEXT: ld.param.u32 %r2, [st_param_v4_i32_irir_param_1]; ; CHECK-NEXT: { // callseq 59, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; -; CHECK-NEXT: st.param.v4.b32 [param0+0], {1, %r1, 3, %r2}; +; CHECK-NEXT: st.param.v4.b32 [param0], {1, %r1, 3, %r2}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i32, ; CHECK-NEXT: ( @@ -1439,7 +1439,7 @@ define void @st_param_v4_i32_irri(i32 %b, i32 %c) { ; CHECK-NEXT: ld.param.u32 %r2, [st_param_v4_i32_irri_param_1]; ; CHECK-NEXT: { // callseq 60, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; -; CHECK-NEXT: st.param.v4.b32 [param0+0], {1, %r1, %r2, 4}; +; CHECK-NEXT: st.param.v4.b32 [param0], {1, %r1, %r2, 4}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i32, ; CHECK-NEXT: ( @@ -1464,7 +1464,7 @@ define void @st_param_v4_i32_riir(i32 %a, i32 %d) { ; CHECK-NEXT: ld.param.u32 %r2, [st_param_v4_i32_riir_param_1]; ; CHECK-NEXT: { // callseq 61, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; -; CHECK-NEXT: st.param.v4.b32 [param0+0], {%r1, 2, 3, %r2}; +; CHECK-NEXT: st.param.v4.b32 [param0], {%r1, 2, 3, %r2}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i32, ; CHECK-NEXT: ( @@ -1489,7 +1489,7 @@ define void @st_param_v4_i32_riri(i32 %a, i32 %c) { ; CHECK-NEXT: ld.param.u32 %r2, [st_param_v4_i32_riri_param_1]; ; CHECK-NEXT: { // callseq 62, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; -; CHECK-NEXT: st.param.v4.b32 [param0+0], {%r1, 2, %r2, 4}; +; CHECK-NEXT: st.param.v4.b32 [param0], {%r1, 2, %r2, 4}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i32, ; CHECK-NEXT: ( @@ -1514,7 +1514,7 @@ define void @st_param_v4_i32_rrii(i32 %a, i32 %b) { ; CHECK-NEXT: ld.param.u32 %r2, [st_param_v4_i32_rrii_param_1]; ; CHECK-NEXT: { // callseq 63, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; -; CHECK-NEXT: st.param.v4.b32 [param0+0], {%r1, %r2, 3, 4}; +; CHECK-NEXT: st.param.v4.b32 [param0], {%r1, %r2, 3, 4}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i32, ; CHECK-NEXT: ( @@ -1538,7 +1538,7 @@ define void @st_param_v4_i32_iiir(i32 %d) { ; CHECK-NEXT: ld.param.u32 %r1, [st_param_v4_i32_iiir_param_0]; ; CHECK-NEXT: { // callseq 64, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; -; CHECK-NEXT: st.param.v4.b32 [param0+0], {1, 2, 3, %r1}; +; CHECK-NEXT: st.param.v4.b32 [param0], {1, 2, 3, %r1}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i32, ; CHECK-NEXT: ( @@ -1562,7 +1562,7 @@ define void @st_param_v4_i32_iiri(i32 %c) { ; CHECK-NEXT: ld.param.u32 %r1, [st_param_v4_i32_iiri_param_0]; ; CHECK-NEXT: { // callseq 65, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; -; CHECK-NEXT: st.param.v4.b32 [param0+0], {1, 2, %r1, 4}; +; CHECK-NEXT: st.param.v4.b32 [param0], {1, 2, %r1, 4}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i32, ; CHECK-NEXT: ( @@ -1586,7 +1586,7 @@ define void @st_param_v4_i32_irii(i32 %b) { ; CHECK-NEXT: ld.param.u32 %r1, [st_param_v4_i32_irii_param_0]; ; CHECK-NEXT: { // callseq 66, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; -; CHECK-NEXT: st.param.v4.b32 [param0+0], {1, %r1, 3, 4}; +; CHECK-NEXT: st.param.v4.b32 [param0], {1, %r1, 3, 4}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i32, ; CHECK-NEXT: ( @@ -1610,7 +1610,7 @@ define void @st_param_v4_i32_riii(i32 %a) { ; CHECK-NEXT: ld.param.u32 %r1, [st_param_v4_i32_riii_param_0]; ; CHECK-NEXT: { // callseq 67, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; -; CHECK-NEXT: st.param.v4.b32 [param0+0], {%r1, 2, 3, 4}; +; CHECK-NEXT: st.param.v4.b32 [param0], {%r1, 2, 3, 4}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_i32, ; CHECK-NEXT: ( @@ -1634,7 +1634,7 @@ define void @st_param_v4_f32_iiii() { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: { // callseq 68, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; -; CHECK-NEXT: st.param.v4.f32 [param0+0], {0f3F800000, 0f40000000, 0f40400000, 0f40800000}; +; CHECK-NEXT: st.param.v4.f32 [param0], {0f3F800000, 0f40000000, 0f40400000, 0f40800000}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_f32, ; CHECK-NEXT: ( @@ -1656,7 +1656,7 @@ define void @st_param_v4_f32_irrr(float %b, float %c, float %d) { ; CHECK-NEXT: ld.param.f32 %f3, [st_param_v4_f32_irrr_param_2]; ; CHECK-NEXT: { // callseq 69, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; -; CHECK-NEXT: st.param.v4.f32 [param0+0], {0f3F800000, %f1, %f2, %f3}; +; CHECK-NEXT: st.param.v4.f32 [param0], {0f3F800000, %f1, %f2, %f3}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_f32, ; CHECK-NEXT: ( @@ -1682,7 +1682,7 @@ define void @st_param_v4_f32_rirr(float %a, float %c, float %d) { ; CHECK-NEXT: ld.param.f32 %f3, [st_param_v4_f32_rirr_param_2]; ; CHECK-NEXT: { // callseq 70, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; -; CHECK-NEXT: st.param.v4.f32 [param0+0], {%f1, 0f40000000, %f2, %f3}; +; CHECK-NEXT: st.param.v4.f32 [param0], {%f1, 0f40000000, %f2, %f3}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_f32, ; CHECK-NEXT: ( @@ -1708,7 +1708,7 @@ define void @st_param_v4_f32_rrir(float %a, float %b, float %d) { ; CHECK-NEXT: ld.param.f32 %f3, [st_param_v4_f32_rrir_param_2]; ; CHECK-NEXT: { // callseq 71, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; -; CHECK-NEXT: st.param.v4.f32 [param0+0], {%f1, %f2, 0f40400000, %f3}; +; CHECK-NEXT: st.param.v4.f32 [param0], {%f1, %f2, 0f40400000, %f3}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_f32, ; CHECK-NEXT: ( @@ -1734,7 +1734,7 @@ define void @st_param_v4_f32_rrri(float %a, float %b, float %c) { ; CHECK-NEXT: ld.param.f32 %f3, [st_param_v4_f32_rrri_param_2]; ; CHECK-NEXT: { // callseq 72, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; -; CHECK-NEXT: st.param.v4.f32 [param0+0], {%f1, %f2, %f3, 0f40800000}; +; CHECK-NEXT: st.param.v4.f32 [param0], {%f1, %f2, %f3, 0f40800000}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_f32, ; CHECK-NEXT: ( @@ -1759,7 +1759,7 @@ define void @st_param_v4_f32_iirr(float %c, float %d) { ; CHECK-NEXT: ld.param.f32 %f2, [st_param_v4_f32_iirr_param_1]; ; CHECK-NEXT: { // callseq 73, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; -; CHECK-NEXT: st.param.v4.f32 [param0+0], {0f3F800000, 0f40000000, %f1, %f2}; +; CHECK-NEXT: st.param.v4.f32 [param0], {0f3F800000, 0f40000000, %f1, %f2}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_f32, ; CHECK-NEXT: ( @@ -1784,7 +1784,7 @@ define void @st_param_v4_f32_irir(float %b, float %d) { ; CHECK-NEXT: ld.param.f32 %f2, [st_param_v4_f32_irir_param_1]; ; CHECK-NEXT: { // callseq 74, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; -; CHECK-NEXT: st.param.v4.f32 [param0+0], {0f3F800000, %f1, 0f40400000, %f2}; +; CHECK-NEXT: st.param.v4.f32 [param0], {0f3F800000, %f1, 0f40400000, %f2}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_f32, ; CHECK-NEXT: ( @@ -1809,7 +1809,7 @@ define void @st_param_v4_f32_irri(float %b, float %c) { ; CHECK-NEXT: ld.param.f32 %f2, [st_param_v4_f32_irri_param_1]; ; CHECK-NEXT: { // callseq 75, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; -; CHECK-NEXT: st.param.v4.f32 [param0+0], {0f3F800000, %f1, %f2, 0f40800000}; +; CHECK-NEXT: st.param.v4.f32 [param0], {0f3F800000, %f1, %f2, 0f40800000}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_f32, ; CHECK-NEXT: ( @@ -1834,7 +1834,7 @@ define void @st_param_v4_f32_riir(float %a, float %d) { ; CHECK-NEXT: ld.param.f32 %f2, [st_param_v4_f32_riir_param_1]; ; CHECK-NEXT: { // callseq 76, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; -; CHECK-NEXT: st.param.v4.f32 [param0+0], {%f1, 0f40000000, 0f40400000, %f2}; +; CHECK-NEXT: st.param.v4.f32 [param0], {%f1, 0f40000000, 0f40400000, %f2}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_f32, ; CHECK-NEXT: ( @@ -1859,7 +1859,7 @@ define void @st_param_v4_f32_riri(float %a, float %c) { ; CHECK-NEXT: ld.param.f32 %f2, [st_param_v4_f32_riri_param_1]; ; CHECK-NEXT: { // callseq 77, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; -; CHECK-NEXT: st.param.v4.f32 [param0+0], {%f1, 0f40000000, %f2, 0f40800000}; +; CHECK-NEXT: st.param.v4.f32 [param0], {%f1, 0f40000000, %f2, 0f40800000}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_f32, ; CHECK-NEXT: ( @@ -1884,7 +1884,7 @@ define void @st_param_v4_f32_rrii(float %a, float %b) { ; CHECK-NEXT: ld.param.f32 %f2, [st_param_v4_f32_rrii_param_1]; ; CHECK-NEXT: { // callseq 78, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; -; CHECK-NEXT: st.param.v4.f32 [param0+0], {%f1, %f2, 0f40400000, 0f40800000}; +; CHECK-NEXT: st.param.v4.f32 [param0], {%f1, %f2, 0f40400000, 0f40800000}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_f32, ; CHECK-NEXT: ( @@ -1908,7 +1908,7 @@ define void @st_param_v4_f32_iiir(float %d) { ; CHECK-NEXT: ld.param.f32 %f1, [st_param_v4_f32_iiir_param_0]; ; CHECK-NEXT: { // callseq 79, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; -; CHECK-NEXT: st.param.v4.f32 [param0+0], {0f3F800000, 0f40000000, 0f40400000, %f1}; +; CHECK-NEXT: st.param.v4.f32 [param0], {0f3F800000, 0f40000000, 0f40400000, %f1}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_f32, ; CHECK-NEXT: ( @@ -1932,7 +1932,7 @@ define void @st_param_v4_f32_iiri(float %c) { ; CHECK-NEXT: ld.param.f32 %f1, [st_param_v4_f32_iiri_param_0]; ; CHECK-NEXT: { // callseq 80, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; -; CHECK-NEXT: st.param.v4.f32 [param0+0], {0f3F800000, 0f40000000, %f1, 0f40800000}; +; CHECK-NEXT: st.param.v4.f32 [param0], {0f3F800000, 0f40000000, %f1, 0f40800000}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_f32, ; CHECK-NEXT: ( @@ -1956,7 +1956,7 @@ define void @st_param_v4_f32_irii(float %b) { ; CHECK-NEXT: ld.param.f32 %f1, [st_param_v4_f32_irii_param_0]; ; CHECK-NEXT: { // callseq 81, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; -; CHECK-NEXT: st.param.v4.f32 [param0+0], {0f3F800000, %f1, 0f40400000, 0f40800000}; +; CHECK-NEXT: st.param.v4.f32 [param0], {0f3F800000, %f1, 0f40400000, 0f40800000}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_f32, ; CHECK-NEXT: ( @@ -1980,7 +1980,7 @@ define void @st_param_v4_f32_riii(float %a) { ; CHECK-NEXT: ld.param.f32 %f1, [st_param_v4_f32_riii_param_0]; ; CHECK-NEXT: { // callseq 82, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; -; CHECK-NEXT: st.param.v4.f32 [param0+0], {%f1, 0f40000000, 0f40400000, 0f40800000}; +; CHECK-NEXT: st.param.v4.f32 [param0], {%f1, 0f40000000, 0f40400000, 0f40800000}; ; CHECK-NEXT: call.uni ; CHECK-NEXT: call_v4_f32, ; CHECK-NEXT: ( diff --git a/llvm/test/CodeGen/NVPTX/store-undef.ll b/llvm/test/CodeGen/NVPTX/store-undef.ll index 109d28a3e3c597..1b991ab82db8f4 100644 --- a/llvm/test/CodeGen/NVPTX/store-undef.ll +++ b/llvm/test/CodeGen/NVPTX/store-undef.ll @@ -38,7 +38,7 @@ define void @test_store_param_def(i64 %param0, i32 %param1) { ; CHECK-NEXT: ld.param.u32 %r1, [test_store_param_def_param_1]; ; CHECK-NEXT: { // callseq 1, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[32]; -; CHECK-NEXT: st.param.b64 [param0+0], %rd1; +; CHECK-NEXT: st.param.b64 [param0], %rd1; ; CHECK-NEXT: st.param.v2.b32 [param0+8], {%r2, %r1}; ; CHECK-NEXT: st.param.v4.b32 [param0+16], {%r3, %r1, %r4, %r5}; ; CHECK-NEXT: call.uni diff --git a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll index 107671d1d1f399..473bc28ed4ee7c 100644 --- a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll +++ b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll @@ -58,8 +58,8 @@ define void @baz(ptr %red, i32 %idx) { ; SM20: texfunc, ; SM30: texfunc, %texcall = tail call float @texfunc(i64 %texHandle) -; SM20: ld.param.f32 %f[[TEXCALL:[0-9]+]], [[[RETVAL]]+0] -; SM30: ld.param.f32 %f[[TEXCALL:[0-9]+]], [[[RETVAL]]+0] +; SM20: ld.param.f32 %f[[TEXCALL:[0-9]+]], [[[RETVAL]]] +; SM30: ld.param.f32 %f[[TEXCALL:[0-9]+]], [[[RETVAL]]] ; SM20: add.rn.f32 %f[[RET2:[0-9]+]], %f[[RED]], %f[[TEXCALL]] ; SM30: add.rn.f32 %f[[RET2:[0-9]+]], %f[[RED]], %f[[TEXCALL]] %ret2 = fadd float %ret, %texcall diff --git a/llvm/test/CodeGen/NVPTX/tid-range.ll b/llvm/test/CodeGen/NVPTX/tid-range.ll index c4dd33960d44ac..4af4cc3845353f 100644 --- a/llvm/test/CodeGen/NVPTX/tid-range.ll +++ b/llvm/test/CodeGen/NVPTX/tid-range.ll @@ -13,7 +13,7 @@ entry: ; CHECK-LABEL: test1( ; CHECK: setp.eq.s32 %p1, %r1, 1; ; CHECK: selp.u32 %[[R:.+]], 1, 0, %p1; -; CHECK: st.param.b32 [func_retval0+0], %[[R]]; +; CHECK: st.param.b32 [func_retval0], %[[R]]; declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() diff --git a/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll b/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll index 40a3e9e945a23e..7dd751cab630b0 100644 --- a/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll +++ b/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll @@ -29,7 +29,7 @@ ; CHECK-DAG: or.b16 [[P2_1_or:%rs[0-9]+]], [[P2_1_shl]], [[P2_0]]; ; CHECK: { // callseq ; CHECK: .param .align 8 .b8 param0[16]; -; CHECK-DAG: st.param.b16 [param0+0], [[P0]]; +; CHECK-DAG: st.param.b16 [param0], [[P0]]; ; CHECK-DAG: st.param.b8 [param0+3], [[P2_1_or]]; ; CHECK-DAG: st.param.b8 [param0+4], [[P2_1]]; ; CHECK: .param .align 8 .b8 retval0[16]; @@ -38,11 +38,11 @@ ; CHECK-NEXT: ( ; CHECK-NEXT: param0 ; CHECK-NEXT: ); -; CHECK-DAG: ld.param.b16 [[R0:%rs[0-9]+]], [retval0+0]; +; CHECK-DAG: ld.param.b16 [[R0:%rs[0-9]+]], [retval0]; ; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+3]; ; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+4]; ; CHECK: } // callseq -; CHECK-DAG: st.param.b16 [func_retval0+0], [[R0]]; +; CHECK-DAG: st.param.b16 [func_retval0], [[R0]]; ; CHECK-DAG: shl.b16 [[R2_1_shl:%rs[0-9]+]], [[R2_1]], 8; ; CHECK-DAG: and.b16 [[R2_0_and:%rs[0-9]+]], [[R2_0]], 255; ; CHECK-DAG: or.b16 [[R2:%rs[0-9]+]], [[R2_0_and]], [[R2_1_shl]]; @@ -74,7 +74,7 @@ define %s_i8i16p @test_s_i8i16p(%s_i8i16p %a) { ; CHECK-DAG: shr.u32 [[P2_2_shr:%r[0-9]+]], [[P2_or_1]], 16; ; CHECK: { // callseq ; CHECK-DAG: .param .align 8 .b8 param0[24]; -; CHECK-DAG: st.param.b32 [param0+0], [[P0]]; +; CHECK-DAG: st.param.b32 [param0], [[P0]]; ; CHECK-DAG: st.param.b8 [param0+5], [[P2]]; ; CHECK-DAG: st.param.b8 [param0+6], [[P2_1_shr]]; ; CHECK-DAG: st.param.b8 [param0+7], [[P2_2_shr]]; @@ -85,13 +85,13 @@ define %s_i8i16p @test_s_i8i16p(%s_i8i16p %a) { ; CHECK-NEXT: ( ; CHECK-NEXT: param0 ; CHECK-NEXT: ); -; CHECK-DAG: ld.param.b32 [[R0:%r[0-9]+]], [retval0+0]; +; CHECK-DAG: ld.param.b32 [[R0:%r[0-9]+]], [retval0]; ; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+5]; ; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+6]; ; CHECK-DAG: ld.param.b8 [[R2_2:%rs[0-9]+]], [retval0+7]; ; CHECK-DAG: ld.param.b8 [[R2_3:%rs[0-9]+]], [retval0+8]; ; CHECK: } // callseq -; CHECK-DAG: st.param.b32 [func_retval0+0], [[R0]]; +; CHECK-DAG: st.param.b32 [func_retval0], [[R0]]; ; CHECK-DAG: st.param.b8 [func_retval0+5], ; CHECK-DAG: st.param.b8 [func_retval0+6], ; CHECK-DAG: st.param.b8 [func_retval0+7], @@ -137,7 +137,7 @@ define %s_i8i32p @test_s_i8i32p(%s_i8i32p %a) { ; CHECK-DAG: bfe.u64 [[P2_bfe_6:%rd[0-9]+]], [[P2_or_5]], 24, 8; ; CHECK: { // callseq ; CHECK: .param .align 8 .b8 param0[32]; -; CHECK-DAG: st.param.b64 [param0+0], [[P0]]; +; CHECK-DAG: st.param.b64 [param0], [[P0]]; ; CHECK-DAG: st.param.b8 [param0+9], [[P2]]; ; CHECK-DAG: st.param.b8 [param0+10], [[P2_shr_1]]; ; CHECK-DAG: st.param.b8 [param0+11], [[P2_shr_2]]; @@ -152,7 +152,7 @@ define %s_i8i32p @test_s_i8i32p(%s_i8i32p %a) { ; CHECK-NEXT: ( ; CHECK-NEXT: param0 ; CHECK-NEXT: ); -; CHECK-DAG: ld.param.b64 [[R0:%rd[0-9]+]], [retval0+0]; +; CHECK-DAG: ld.param.b64 [[R0:%rd[0-9]+]], [retval0]; ; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+9]; ; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+10]; ; CHECK-DAG: ld.param.b8 [[R2_2:%rs[0-9]+]], [retval0+11]; @@ -162,7 +162,7 @@ define %s_i8i32p @test_s_i8i32p(%s_i8i32p %a) { ; CHECK-DAG: ld.param.b8 [[R2_6:%rs[0-9]+]], [retval0+15]; ; CHECK-DAG: ld.param.b8 [[R2_7:%rs[0-9]+]], [retval0+16]; ; CHECK: } // callseq -; CHECK-DAG: st.param.b64 [func_retval0+0], [[R0]]; +; CHECK-DAG: st.param.b64 [func_retval0], [[R0]]; ; CHECK-DAG: st.param.b8 [func_retval0+9], ; CHECK-DAG: st.param.b8 [func_retval0+10], ; CHECK-DAG: st.param.b8 [func_retval0+11], @@ -188,7 +188,7 @@ define %s_i8i64p @test_s_i8i64p(%s_i8i64p %a) { ; CHECK-DAG: or.b16 [[P2_1_or:%rs[0-9]+]], [[P2_1_shl]], [[P2_0]]; ; CHECK: { // callseq ; CHECK: .param .align 8 .b8 param0[16]; -; CHECK-DAG: st.param.b16 [param0+0], [[P0]]; +; CHECK-DAG: st.param.b16 [param0], [[P0]]; ; CHECK-DAG: st.param.b8 [param0+3], [[P2_1_or]]; ; CHECK-DAG: st.param.b8 [param0+4], [[P2_1]]; ; CHECK: .param .align 8 .b8 retval0[16]; @@ -197,11 +197,11 @@ define %s_i8i64p @test_s_i8i64p(%s_i8i64p %a) { ; CHECK-NEXT: ( ; CHECK-NEXT: param0 ; CHECK-NEXT: ); -; CHECK-DAG: ld.param.b16 [[R0:%rs[0-9]+]], [retval0+0]; +; CHECK-DAG: ld.param.b16 [[R0:%rs[0-9]+]], [retval0]; ; CHECK-DAG: ld.param.b8 [[R2I_0:%rs[0-9]+]], [retval0+3]; ; CHECK-DAG: ld.param.b8 [[R2I_1:%rs[0-9]+]], [retval0+4]; ; CHECK: } // callseq -; CHECK-DAG: st.param.b16 [func_retval0+0], [[R0]]; +; CHECK-DAG: st.param.b16 [func_retval0], [[R0]]; ; CHECK-DAG: shl.b16 [[R2I_1_shl:%rs[0-9]+]], [[R2I_1]], 8; ; CHECK-DAG: and.b16 [[R2I_0_and:%rs[0-9]+]], [[R2I_0]], 255; ; CHECK-DAG: or.b16 [[R2I:%rs[0-9]+]], [[R2I_0_and]], [[R2I_1_shl]]; @@ -233,7 +233,7 @@ define %s_i8f16p @test_s_i8f16p(%s_i8f16p %a) { ; CHECK-DAG: shr.u32 [[P2_2_shr:%r[0-9]+]], [[P2_or_1]], 16; ; CHECK: { // callseq ; CHECK-DAG: .param .align 8 .b8 param0[24]; -; CHECK-DAG: st.param.b32 [param0+0], [[P0]]; +; CHECK-DAG: st.param.b32 [param0], [[P0]]; ; CHECK-DAG: st.param.b8 [param0+5], [[P2]]; ; CHECK-DAG: st.param.b8 [param0+6], [[P2_1_shr]]; ; CHECK-DAG: st.param.b8 [param0+7], [[P2_2_shr]]; @@ -244,13 +244,13 @@ define %s_i8f16p @test_s_i8f16p(%s_i8f16p %a) { ; CHECK-NEXT: ( ; CHECK-NEXT: param0 ; CHECK-NEXT: ); -; CHECK-DAG: ld.param.b32 [[R0:%r[0-9]+]], [retval0+0]; +; CHECK-DAG: ld.param.b32 [[R0:%r[0-9]+]], [retval0]; ; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+5]; ; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+6]; ; CHECK-DAG: ld.param.b8 [[R2_2:%rs[0-9]+]], [retval0+7]; ; CHECK-DAG: ld.param.b8 [[R2_3:%rs[0-9]+]], [retval0+8]; ; CHECK: } // callseq -; CHECK-DAG: st.param.b32 [func_retval0+0], [[R0]]; +; CHECK-DAG: st.param.b32 [func_retval0], [[R0]]; ; CHECK-DAG: st.param.b8 [func_retval0+5], ; CHECK-DAG: st.param.b8 [func_retval0+6], ; CHECK-DAG: st.param.b8 [func_retval0+7], @@ -280,7 +280,7 @@ define %s_i8f16x2p @test_s_i8f16x2p(%s_i8f16x2p %a) { ; CHECK-DAG: shr.u32 [[P2_2_shr:%r[0-9]+]], [[P2_or_1]], 16; ; CHECK: { // callseq ; CHECK-DAG: .param .align 8 .b8 param0[24]; -; CHECK-DAG: st.param.f32 [param0+0], [[P0]]; +; CHECK-DAG: st.param.f32 [param0], [[P0]]; ; CHECK-DAG: st.param.b8 [param0+5], [[P2]]; ; CHECK-DAG: st.param.b8 [param0+6], [[P2_1_shr]]; ; CHECK-DAG: st.param.b8 [param0+7], [[P2_2_shr]]; @@ -291,13 +291,13 @@ define %s_i8f16x2p @test_s_i8f16x2p(%s_i8f16x2p %a) { ; CHECK-NEXT: ( ; CHECK-NEXT: param0 ; CHECK-NEXT: ); -; CHECK-DAG: ld.param.f32 [[R0:%f[0-9]+]], [retval0+0]; +; CHECK-DAG: ld.param.f32 [[R0:%f[0-9]+]], [retval0]; ; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+5]; ; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+6]; ; CHECK-DAG: ld.param.b8 [[R2_2:%rs[0-9]+]], [retval0+7]; ; CHECK-DAG: ld.param.b8 [[R2_3:%rs[0-9]+]], [retval0+8]; ; CHECK: } // callseq -; CHECK-DAG: st.param.f32 [func_retval0+0], [[R0]]; +; CHECK-DAG: st.param.f32 [func_retval0], [[R0]]; ; CHECK-DAG: st.param.b8 [func_retval0+5], ; CHECK-DAG: st.param.b8 [func_retval0+6], ; CHECK-DAG: st.param.b8 [func_retval0+7], @@ -343,7 +343,7 @@ define %s_i8f32p @test_s_i8f32p(%s_i8f32p %a) { ; CHECK-DAG: bfe.u64 [[P2_bfe_6:%rd[0-9]+]], [[P2_or_5]], 24, 8; ; CHECK: { // callseq ; CHECK: .param .align 8 .b8 param0[32]; -; CHECK-DAG: st.param.f64 [param0+0], [[P0]]; +; CHECK-DAG: st.param.f64 [param0], [[P0]]; ; CHECK-DAG: st.param.b8 [param0+9], [[P2]]; ; CHECK-DAG: st.param.b8 [param0+10], [[P2_shr_1]]; ; CHECK-DAG: st.param.b8 [param0+11], [[P2_shr_2]]; @@ -358,7 +358,7 @@ define %s_i8f32p @test_s_i8f32p(%s_i8f32p %a) { ; CHECK-NEXT: ( ; CHECK-NEXT: param0 ; CHECK-NEXT: ); -; CHECK-DAG: ld.param.f64 [[R0:%fd[0-9]+]], [retval0+0]; +; CHECK-DAG: ld.param.f64 [[R0:%fd[0-9]+]], [retval0]; ; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+9]; ; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+10]; ; CHECK-DAG: ld.param.b8 [[R2_2:%rs[0-9]+]], [retval0+11]; @@ -368,7 +368,7 @@ define %s_i8f32p @test_s_i8f32p(%s_i8f32p %a) { ; CHECK-DAG: ld.param.b8 [[R2_6:%rs[0-9]+]], [retval0+15]; ; CHECK-DAG: ld.param.b8 [[R2_7:%rs[0-9]+]], [retval0+16]; ; CHECK: } // callseq -; CHECK-DAG: st.param.f64 [func_retval0+0], [[R0]]; +; CHECK-DAG: st.param.f64 [func_retval0], [[R0]]; ; CHECK-DAG: st.param.b8 [func_retval0+9], ; CHECK-DAG: st.param.b8 [func_retval0+10], ; CHECK-DAG: st.param.b8 [func_retval0+11], diff --git a/llvm/test/CodeGen/NVPTX/unfold-masked-merge-vector-variablemask.ll b/llvm/test/CodeGen/NVPTX/unfold-masked-merge-vector-variablemask.ll index 8633b09af04873..044d21643ed9d0 100644 --- a/llvm/test/CodeGen/NVPTX/unfold-masked-merge-vector-variablemask.ll +++ b/llvm/test/CodeGen/NVPTX/unfold-masked-merge-vector-variablemask.ll @@ -18,7 +18,7 @@ define <1 x i8> @out_v1i8(<1 x i8> %x, <1 x i8> %y, <1 x i8> %mask) nounwind { ; CHECK-NEXT: not.b16 %rs5, %rs2; ; CHECK-NEXT: and.b16 %rs6, %rs4, %rs5; ; CHECK-NEXT: or.b16 %rs7, %rs3, %rs6; -; CHECK-NEXT: st.param.b8 [func_retval0+0], %rs7; +; CHECK-NEXT: st.param.b8 [func_retval0], %rs7; ; CHECK-NEXT: ret; %mx = and <1 x i8> %x, %mask %notmask = xor <1 x i8> %mask, @@ -44,7 +44,7 @@ define <1 x i16> @out_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwin ; CHECK-NEXT: not.b16 %rs5, %rs2; ; CHECK-NEXT: and.b16 %rs6, %rs4, %rs5; ; CHECK-NEXT: or.b16 %rs7, %rs3, %rs6; -; CHECK-NEXT: st.param.b16 [func_retval0+0], %rs7; +; CHECK-NEXT: st.param.b16 [func_retval0], %rs7; ; CHECK-NEXT: ret; %mx = and <1 x i16> %x, %mask %notmask = xor <1 x i16> %mask, @@ -70,7 +70,7 @@ define <4 x i8> @out_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { ; CHECK-NEXT: xor.b32 %r7, %r1, -1; ; CHECK-NEXT: and.b32 %r8, %r3, %r7; ; CHECK-NEXT: or.b32 %r9, %r5, %r8; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r9; +; CHECK-NEXT: st.param.b32 [func_retval0], %r9; ; CHECK-NEXT: ret; %mx = and <4 x i8> %x, %mask %notmask = xor <4 x i8> %mask, @@ -92,7 +92,7 @@ define <4 x i8> @out_v4i8_undef(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwi ; CHECK-NEXT: xor.b32 %r7, %r1, -16711681; ; CHECK-NEXT: and.b32 %r8, %r3, %r7; ; CHECK-NEXT: or.b32 %r9, %r5, %r8; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r9; +; CHECK-NEXT: st.param.b32 [func_retval0], %r9; ; CHECK-NEXT: ret; %mx = and <4 x i8> %x, %mask %notmask = xor <4 x i8> %mask, @@ -114,7 +114,7 @@ define <2 x i16> @out_v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %mask) nounwin ; CHECK-NEXT: xor.b32 %r7, %r1, -1; ; CHECK-NEXT: and.b32 %r8, %r3, %r7; ; CHECK-NEXT: or.b32 %r9, %r5, %r8; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r9; +; CHECK-NEXT: st.param.b32 [func_retval0], %r9; ; CHECK-NEXT: ret; %mx = and <2 x i16> %x, %mask %notmask = xor <2 x i16> %mask, @@ -136,7 +136,7 @@ define <1 x i32> @out_v1i32(<1 x i32> %x, <1 x i32> %y, <1 x i32> %mask) nounwin ; CHECK-NEXT: not.b32 %r5, %r2; ; CHECK-NEXT: and.b32 %r6, %r4, %r5; ; CHECK-NEXT: or.b32 %r7, %r3, %r6; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r7; +; CHECK-NEXT: st.param.b32 [func_retval0], %r7; ; CHECK-NEXT: ret; %mx = and <1 x i32> %x, %mask %notmask = xor <1 x i32> %mask, @@ -166,7 +166,7 @@ define <8 x i8> @out_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind { ; CHECK-NEXT: and.b32 %r18, %r2, %r15; ; CHECK-NEXT: or.b32 %r19, %r13, %r18; ; CHECK-NEXT: or.b32 %r20, %r11, %r17; -; CHECK-NEXT: st.param.v2.b32 [func_retval0+0], {%r20, %r19}; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r20, %r19}; ; CHECK-NEXT: ret; %mx = and <8 x i8> %x, %mask %notmask = xor <8 x i8> %mask, @@ -192,7 +192,7 @@ define <4 x i16> @out_v4i16(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwin ; CHECK-NEXT: and.b32 %r18, %r2, %r15; ; CHECK-NEXT: or.b32 %r19, %r13, %r18; ; CHECK-NEXT: or.b32 %r20, %r11, %r17; -; CHECK-NEXT: st.param.v2.b32 [func_retval0+0], {%r20, %r19}; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r20, %r19}; ; CHECK-NEXT: ret; %mx = and <4 x i16> %x, %mask %notmask = xor <4 x i16> %mask, @@ -218,7 +218,7 @@ define <4 x i16> @out_v4i16_undef(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) n ; CHECK-NEXT: and.b32 %r18, %r2, %r15; ; CHECK-NEXT: or.b32 %r19, %r13, %r18; ; CHECK-NEXT: or.b32 %r20, %r11, %r17; -; CHECK-NEXT: st.param.v2.b32 [func_retval0+0], {%r20, %r19}; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r20, %r19}; ; CHECK-NEXT: ret; %mx = and <4 x i16> %x, %mask %notmask = xor <4 x i16> %mask, @@ -244,7 +244,7 @@ define <2 x i32> @out_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %mask) nounwin ; CHECK-NEXT: and.b32 %r12, %r8, %r9; ; CHECK-NEXT: or.b32 %r13, %r6, %r12; ; CHECK-NEXT: or.b32 %r14, %r5, %r11; -; CHECK-NEXT: st.param.v2.b32 [func_retval0+0], {%r14, %r13}; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r14, %r13}; ; CHECK-NEXT: ret; %mx = and <2 x i32> %x, %mask %notmask = xor <2 x i32> %mask, @@ -266,7 +266,7 @@ define <1 x i64> @out_v1i64(<1 x i64> %x, <1 x i64> %y, <1 x i64> %mask) nounwin ; CHECK-NEXT: not.b64 %rd5, %rd2; ; CHECK-NEXT: and.b64 %rd6, %rd4, %rd5; ; CHECK-NEXT: or.b64 %rd7, %rd3, %rd6; -; CHECK-NEXT: st.param.b64 [func_retval0+0], %rd7; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd7; ; CHECK-NEXT: ret; %mx = and <1 x i64> %x, %mask %notmask = xor <1 x i64> %mask, @@ -304,7 +304,7 @@ define <16 x i8> @out_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwin ; CHECK-NEXT: or.b32 %r38, %r25, %r35; ; CHECK-NEXT: or.b32 %r39, %r23, %r34; ; CHECK-NEXT: or.b32 %r40, %r21, %r33; -; CHECK-NEXT: st.param.v4.b32 [func_retval0+0], {%r40, %r39, %r38, %r37}; +; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r40, %r39, %r38, %r37}; ; CHECK-NEXT: ret; %mx = and <16 x i8> %x, %mask %notmask = xor <16 x i8> %mask, @@ -338,7 +338,7 @@ define <8 x i16> @out_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) nounwin ; CHECK-NEXT: or.b32 %r38, %r25, %r35; ; CHECK-NEXT: or.b32 %r39, %r23, %r34; ; CHECK-NEXT: or.b32 %r40, %r21, %r33; -; CHECK-NEXT: st.param.v4.b32 [func_retval0+0], {%r40, %r39, %r38, %r37}; +; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r40, %r39, %r38, %r37}; ; CHECK-NEXT: ret; %mx = and <8 x i16> %x, %mask %notmask = xor <8 x i16> %mask, @@ -372,7 +372,7 @@ define <4 x i32> @out_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) nounwin ; CHECK-NEXT: or.b32 %r26, %r11, %r23; ; CHECK-NEXT: or.b32 %r27, %r10, %r22; ; CHECK-NEXT: or.b32 %r28, %r9, %r21; -; CHECK-NEXT: st.param.v4.b32 [func_retval0+0], {%r28, %r27, %r26, %r25}; +; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r28, %r27, %r26, %r25}; ; CHECK-NEXT: ret; %mx = and <4 x i32> %x, %mask %notmask = xor <4 x i32> %mask, @@ -403,7 +403,7 @@ define <4 x i32> @out_v4i32_undef(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) n ; CHECK-NEXT: or.b32 %r23, %r12, %r22; ; CHECK-NEXT: or.b32 %r24, %r11, %r21; ; CHECK-NEXT: or.b32 %r25, %r10, %r20; -; CHECK-NEXT: st.param.v4.b32 [func_retval0+0], {%r25, %r24, %r9, %r23}; +; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r25, %r24, %r9, %r23}; ; CHECK-NEXT: ret; %mx = and <4 x i32> %x, %mask %notmask = xor <4 x i32> %mask, @@ -429,7 +429,7 @@ define <2 x i64> @out_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) nounwin ; CHECK-NEXT: and.b64 %rd12, %rd8, %rd9; ; CHECK-NEXT: or.b64 %rd13, %rd6, %rd12; ; CHECK-NEXT: or.b64 %rd14, %rd5, %rd11; -; CHECK-NEXT: st.param.v2.b64 [func_retval0+0], {%rd14, %rd13}; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd14, %rd13}; ; CHECK-NEXT: ret; %mx = and <2 x i64> %x, %mask %notmask = xor <2 x i64> %mask, @@ -458,7 +458,7 @@ define <1 x i8> @in_v1i8(<1 x i8> %x, <1 x i8> %y, <1 x i8> %mask) nounwind { ; CHECK-NEXT: ld.param.u8 %rs4, [in_v1i8_param_2]; ; CHECK-NEXT: and.b16 %rs5, %rs3, %rs4; ; CHECK-NEXT: xor.b16 %rs6, %rs5, %rs2; -; CHECK-NEXT: st.param.b8 [func_retval0+0], %rs6; +; CHECK-NEXT: st.param.b8 [func_retval0], %rs6; ; CHECK-NEXT: ret; %n0 = xor <1 x i8> %x, %y %n1 = and <1 x i8> %n0, %mask @@ -482,7 +482,7 @@ define <1 x i16> @in_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwind ; CHECK-NEXT: ld.param.u16 %rs4, [in_v1i16_param_2]; ; CHECK-NEXT: and.b16 %rs5, %rs3, %rs4; ; CHECK-NEXT: xor.b16 %rs6, %rs5, %rs2; -; CHECK-NEXT: st.param.b16 [func_retval0+0], %rs6; +; CHECK-NEXT: st.param.b16 [func_retval0], %rs6; ; CHECK-NEXT: ret; %n0 = xor <1 x i16> %x, %y %n1 = and <1 x i16> %n0, %mask @@ -506,7 +506,7 @@ define <4 x i8> @in_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { ; CHECK-NEXT: ld.param.u32 %r4, [in_v4i8_param_2]; ; CHECK-NEXT: and.b32 %r5, %r3, %r4; ; CHECK-NEXT: xor.b32 %r6, %r5, %r2; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r6; +; CHECK-NEXT: st.param.b32 [func_retval0], %r6; ; CHECK-NEXT: ret; %n0 = xor <4 x i8> %x, %y %n1 = and <4 x i8> %n0, %mask @@ -526,7 +526,7 @@ define <2 x i16> @in_v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %mask) nounwind ; CHECK-NEXT: ld.param.u32 %r4, [in_v2i16_param_2]; ; CHECK-NEXT: and.b32 %r5, %r3, %r4; ; CHECK-NEXT: xor.b32 %r6, %r5, %r2; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r6; +; CHECK-NEXT: st.param.b32 [func_retval0], %r6; ; CHECK-NEXT: ret; %n0 = xor <2 x i16> %x, %y %n1 = and <2 x i16> %n0, %mask @@ -546,7 +546,7 @@ define <1 x i32> @in_v1i32(<1 x i32> %x, <1 x i32> %y, <1 x i32> %mask) nounwind ; CHECK-NEXT: ld.param.u32 %r4, [in_v1i32_param_2]; ; CHECK-NEXT: and.b32 %r5, %r3, %r4; ; CHECK-NEXT: xor.b32 %r6, %r5, %r2; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r6; +; CHECK-NEXT: st.param.b32 [func_retval0], %r6; ; CHECK-NEXT: ret; %n0 = xor <1 x i32> %x, %y %n1 = and <1 x i32> %n0, %mask @@ -573,7 +573,7 @@ define <8 x i8> @in_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind { ; CHECK-NEXT: xor.b32 %r11, %r1, %r3; ; CHECK-NEXT: and.b32 %r12, %r11, %r5; ; CHECK-NEXT: xor.b32 %r13, %r12, %r3; -; CHECK-NEXT: st.param.v2.b32 [func_retval0+0], {%r13, %r9}; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r13, %r9}; ; CHECK-NEXT: ret; %n0 = xor <8 x i8> %x, %y %n1 = and <8 x i8> %n0, %mask @@ -596,7 +596,7 @@ define <4 x i16> @in_v4i16(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind ; CHECK-NEXT: xor.b32 %r11, %r1, %r3; ; CHECK-NEXT: and.b32 %r12, %r11, %r5; ; CHECK-NEXT: xor.b32 %r13, %r12, %r3; -; CHECK-NEXT: st.param.v2.b32 [func_retval0+0], {%r13, %r9}; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r13, %r9}; ; CHECK-NEXT: ret; %n0 = xor <4 x i16> %x, %y %n1 = and <4 x i16> %n0, %mask @@ -619,7 +619,7 @@ define <2 x i32> @in_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %mask) nounwind ; CHECK-NEXT: and.b32 %r10, %r5, %r8; ; CHECK-NEXT: xor.b32 %r11, %r10, %r4; ; CHECK-NEXT: xor.b32 %r12, %r9, %r3; -; CHECK-NEXT: st.param.v2.b32 [func_retval0+0], {%r12, %r11}; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r12, %r11}; ; CHECK-NEXT: ret; %n0 = xor <2 x i32> %x, %y %n1 = and <2 x i32> %n0, %mask @@ -639,7 +639,7 @@ define <1 x i64> @in_v1i64(<1 x i64> %x, <1 x i64> %y, <1 x i64> %mask) nounwind ; CHECK-NEXT: ld.param.u64 %rd4, [in_v1i64_param_2]; ; CHECK-NEXT: and.b64 %rd5, %rd3, %rd4; ; CHECK-NEXT: xor.b64 %rd6, %rd5, %rd2; -; CHECK-NEXT: st.param.b64 [func_retval0+0], %rd6; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd6; ; CHECK-NEXT: ret; %n0 = xor <1 x i64> %x, %y %n1 = and <1 x i64> %n0, %mask @@ -672,7 +672,7 @@ define <16 x i8> @in_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwind ; CHECK-NEXT: xor.b32 %r23, %r19, %r7; ; CHECK-NEXT: xor.b32 %r25, %r18, %r6; ; CHECK-NEXT: xor.b32 %r27, %r17, %r5; -; CHECK-NEXT: st.param.v4.b32 [func_retval0+0], {%r27, %r25, %r23, %r21}; +; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r27, %r25, %r23, %r21}; ; CHECK-NEXT: ret; %n0 = xor <16 x i8> %x, %y %n1 = and <16 x i8> %n0, %mask @@ -701,7 +701,7 @@ define <8 x i16> @in_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) nounwind ; CHECK-NEXT: xor.b32 %r23, %r19, %r7; ; CHECK-NEXT: xor.b32 %r25, %r18, %r6; ; CHECK-NEXT: xor.b32 %r27, %r17, %r5; -; CHECK-NEXT: st.param.v4.b32 [func_retval0+0], {%r27, %r25, %r23, %r21}; +; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r27, %r25, %r23, %r21}; ; CHECK-NEXT: ret; %n0 = xor <8 x i16> %x, %y %n1 = and <8 x i16> %n0, %mask @@ -730,7 +730,7 @@ define <4 x i32> @in_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) nounwind ; CHECK-NEXT: xor.b32 %r22, %r19, %r7; ; CHECK-NEXT: xor.b32 %r23, %r18, %r6; ; CHECK-NEXT: xor.b32 %r24, %r17, %r5; -; CHECK-NEXT: st.param.v4.b32 [func_retval0+0], {%r24, %r23, %r22, %r21}; +; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r24, %r23, %r22, %r21}; ; CHECK-NEXT: ret; %n0 = xor <4 x i32> %x, %y %n1 = and <4 x i32> %n0, %mask @@ -753,7 +753,7 @@ define <2 x i64> @in_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) nounwind ; CHECK-NEXT: and.b64 %rd10, %rd5, %rd8; ; CHECK-NEXT: xor.b64 %rd11, %rd10, %rd4; ; CHECK-NEXT: xor.b64 %rd12, %rd9, %rd3; -; CHECK-NEXT: st.param.v2.b64 [func_retval0+0], {%rd12, %rd11}; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd12, %rd11}; ; CHECK-NEXT: ret; %n0 = xor <2 x i64> %x, %y %n1 = and <2 x i64> %n0, %mask diff --git a/llvm/test/CodeGen/NVPTX/vaargs.ll b/llvm/test/CodeGen/NVPTX/vaargs.ll index b8c213de04f8db..8ecdff9d65ac17 100644 --- a/llvm/test/CodeGen/NVPTX/vaargs.ll +++ b/llvm/test/CodeGen/NVPTX/vaargs.ll @@ -17,55 +17,55 @@ entry: ; Test va_start ; CHECK: .param .align 8 .b8 foo_vararg[] ; CHECK: mov.u[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], foo_vararg; -; CHECK-NEXT: st.u[[BITS]] [%SP+0], [[VA_PTR]]; +; CHECK-NEXT: st.u[[BITS]] [%SP], [[VA_PTR]]; call void @llvm.va_start(ptr %al) ; Test va_copy() -; CHECK-NEXT: ld.u[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], [%SP+0]; +; CHECK-NEXT: ld.u[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], [%SP]; ; CHECK-NEXT: st.u[[BITS]] [%SP+{{[0-9]+}}], [[VA_PTR]]; call void @llvm.va_copy(ptr %al2, ptr %al) ; Test va_arg(ap, int32_t) -; CHECK-NEXT: ld.u[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], [%SP+0]; +; CHECK-NEXT: ld.u[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], [%SP]; ; CHECK-NEXT: add.s[[BITS]] [[VA_PTR_TMP:%(r|rd)[0-9]+]], [[VA_PTR]], 3; ; CHECK-NEXT: and.b[[BITS]] [[VA_PTR_ALIGN:%(r|rd)[0-9]+]], [[VA_PTR_TMP]], -4; ; CHECK-NEXT: add.s[[BITS]] [[VA_PTR_NEXT:%(r|rd)[0-9]+]], [[VA_PTR_ALIGN]], 4; -; CHECK-NEXT: st.u[[BITS]] [%SP+0], [[VA_PTR_NEXT]]; +; CHECK-NEXT: st.u[[BITS]] [%SP], [[VA_PTR_NEXT]]; ; CHECK-NEXT: ld.local.u32 %r{{[0-9]+}}, [[[VA_PTR_ALIGN]]]; %0 = va_arg ptr %al, i32 ; Test va_arg(ap, int64_t) -; CHECK-NEXT: ld.u[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], [%SP+0]; +; CHECK-NEXT: ld.u[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], [%SP]; ; CHECK-NEXT: add.s[[BITS]] [[VA_PTR_TMP:%(r|rd)[0-9]+]], [[VA_PTR]], 7; ; CHECK-NEXT: and.b[[BITS]] [[VA_PTR_ALIGN:%(r|rd)[0-9]+]], [[VA_PTR_TMP]], -8; ; CHECK-NEXT: add.s[[BITS]] [[VA_PTR_NEXT:%(r|rd)[0-9]+]], [[VA_PTR_ALIGN]], 8; -; CHECK-NEXT: st.u[[BITS]] [%SP+0], [[VA_PTR_NEXT]]; +; CHECK-NEXT: st.u[[BITS]] [%SP], [[VA_PTR_NEXT]]; ; CHECK-NEXT: ld.local.u64 %rd{{[0-9]+}}, [[[VA_PTR_ALIGN]]]; %1 = va_arg ptr %al, i64 ; Test va_arg(ap, double) -; CHECK-NEXT: ld.u[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], [%SP+0]; +; CHECK-NEXT: ld.u[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], [%SP]; ; CHECK-NEXT: add.s[[BITS]] [[VA_PTR_TMP:%(r|rd)[0-9]+]], [[VA_PTR]], 7; ; CHECK-NEXT: and.b[[BITS]] [[VA_PTR_ALIGN:%(r|rd)[0-9]+]], [[VA_PTR_TMP]], -8; ; CHECK-NEXT: add.s[[BITS]] [[VA_PTR_NEXT:%(r|rd)[0-9]+]], [[VA_PTR_ALIGN]], 8; -; CHECK-NEXT: st.u[[BITS]] [%SP+0], [[VA_PTR_NEXT]]; +; CHECK-NEXT: st.u[[BITS]] [%SP], [[VA_PTR_NEXT]]; ; CHECK-NEXT: ld.local.f64 %fd{{[0-9]+}}, [[[VA_PTR_ALIGN]]]; %2 = va_arg ptr %al, double ; Test va_arg(ap, ptr) -; CHECK-NEXT: ld.u[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], [%SP+0]; +; CHECK-NEXT: ld.u[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], [%SP]; ; CHECK32-NEXT: add.s32 [[VA_PTR_TMP:%r[0-9]+]], [[VA_PTR]], 3; ; CHECK64-NEXT: add.s64 [[VA_PTR_TMP:%rd[0-9]+]], [[VA_PTR]], 7; ; CHECK32-NEXT: and.b32 [[VA_PTR_ALIGN:%r[0-9]+]], [[VA_PTR_TMP]], -4; ; CHECK64-NEXT: and.b64 [[VA_PTR_ALIGN:%rd[0-9]+]], [[VA_PTR_TMP]], -8; ; CHECK32-NEXT: add.s32 [[VA_PTR_NEXT:%r[0-9]+]], [[VA_PTR_ALIGN]], 4; ; CHECK64-NEXT: add.s64 [[VA_PTR_NEXT:%rd[0-9]+]], [[VA_PTR_ALIGN]], 8; -; CHECK-NEXT: st.u[[BITS]] [%SP+0], [[VA_PTR_NEXT]]; +; CHECK-NEXT: st.u[[BITS]] [%SP], [[VA_PTR_NEXT]]; ; CHECK-NEXT: ld.local.u[[BITS]] %{{(r|rd)[0-9]+}}, [[[VA_PTR_ALIGN]]]; %3 = va_arg ptr %al, ptr @@ -91,7 +91,7 @@ define i32 @test_foo(i32 %i, i64 %l, double %d, ptr %p) { ; Store arguments to an array ; CHECK32: .param .align 8 .b8 param1[24]; ; CHECK64: .param .align 8 .b8 param1[28]; -; CHECK-NEXT: st.param.b32 [param1+0], [[ARG_I32]]; +; CHECK-NEXT: st.param.b32 [param1], [[ARG_I32]]; ; CHECK-NEXT: st.param.b64 [param1+4], [[ARG_I64]]; ; CHECK-NEXT: st.param.f64 [param1+12], [[ARG_DOUBLE]]; ; CHECK-NEXT: st.param.b[[BITS]] [param1+20], [[ARG_VOID_PTR]]; diff --git a/llvm/test/CodeGen/NVPTX/variadics-backend.ll b/llvm/test/CodeGen/NVPTX/variadics-backend.ll index 0e0c89d3e0214f..6d14986b7ff319 100644 --- a/llvm/test/CodeGen/NVPTX/variadics-backend.ll +++ b/llvm/test/CodeGen/NVPTX/variadics-backend.ll @@ -42,7 +42,7 @@ define dso_local i32 @variadics1(i32 noundef %first, ...) { ; CHECK-PTX-NEXT: cvt.rn.f64.s32 %fd5, %r9; ; CHECK-PTX-NEXT: add.rn.f64 %fd6, %fd5, %fd4; ; CHECK-PTX-NEXT: cvt.rzi.s32.f64 %r10, %fd6; -; CHECK-PTX-NEXT: st.param.b32 [func_retval0+0], %r10; +; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r10; ; CHECK-PTX-NEXT: ret; entry: %vlist = alloca ptr, align 8 @@ -112,7 +112,7 @@ define dso_local i32 @foo() { ; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot1; ; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL; ; CHECK-PTX-NEXT: mov.u64 %rd1, 4294967297; -; CHECK-PTX-NEXT: st.u64 [%SP+0], %rd1; +; CHECK-PTX-NEXT: st.u64 [%SP], %rd1; ; CHECK-PTX-NEXT: mov.b32 %r1, 1; ; CHECK-PTX-NEXT: st.u32 [%SP+8], %r1; ; CHECK-PTX-NEXT: mov.u64 %rd2, 1; @@ -123,9 +123,9 @@ define dso_local i32 @foo() { ; CHECK-PTX-NEXT: add.u64 %rd4, %SP, 0; ; CHECK-PTX-NEXT: { // callseq 0, 0 ; CHECK-PTX-NEXT: .param .b32 param0; -; CHECK-PTX-NEXT: st.param.b32 [param0+0], 1; +; CHECK-PTX-NEXT: st.param.b32 [param0], 1; ; CHECK-PTX-NEXT: .param .b64 param1; -; CHECK-PTX-NEXT: st.param.b64 [param1+0], %rd4; +; CHECK-PTX-NEXT: st.param.b64 [param1], %rd4; ; CHECK-PTX-NEXT: .param .b32 retval0; ; CHECK-PTX-NEXT: call.uni (retval0), ; CHECK-PTX-NEXT: variadics1, @@ -133,9 +133,9 @@ define dso_local i32 @foo() { ; CHECK-PTX-NEXT: param0, ; CHECK-PTX-NEXT: param1 ; CHECK-PTX-NEXT: ); -; CHECK-PTX-NEXT: ld.param.b32 %r2, [retval0+0]; +; CHECK-PTX-NEXT: ld.param.b32 %r2, [retval0]; ; CHECK-PTX-NEXT: } // callseq 0 -; CHECK-PTX-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-PTX-NEXT: ret; entry: %conv = sext i8 1 to i32 @@ -174,14 +174,14 @@ define dso_local i32 @variadics2(i32 noundef %first, ...) { ; CHECK-PTX-NEXT: ld.u8 %rs3, [%rd7]; ; CHECK-PTX-NEXT: shl.b16 %rs4, %rs3, 8; ; CHECK-PTX-NEXT: or.b16 %rs5, %rs4, %rs2; -; CHECK-PTX-NEXT: st.u16 [%SP+0], %rs5; +; CHECK-PTX-NEXT: st.u16 [%SP], %rs5; ; CHECK-PTX-NEXT: ld.u64 %rd8, [%rd3+8]; ; CHECK-PTX-NEXT: add.s32 %r4, %r1, %r2; ; CHECK-PTX-NEXT: add.s32 %r5, %r4, %r3; ; CHECK-PTX-NEXT: cvt.u64.u32 %rd9, %r5; ; CHECK-PTX-NEXT: add.s64 %rd10, %rd9, %rd8; ; CHECK-PTX-NEXT: cvt.u32.u64 %r6, %rd10; -; CHECK-PTX-NEXT: st.param.b32 [func_retval0+0], %r6; +; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r6; ; CHECK-PTX-NEXT: ret; entry: %vlist = alloca ptr, align 8 @@ -237,7 +237,7 @@ define dso_local i32 @bar() { ; CHECK-PTX-NEXT: cvt.u16.u8 %rs6, %rs5; ; CHECK-PTX-NEXT: shl.b16 %rs7, %rs6, 8; ; CHECK-PTX-NEXT: or.b16 %rs8, %rs7, %rs4; -; CHECK-PTX-NEXT: st.u16 [%SP+0], %rs8; +; CHECK-PTX-NEXT: st.u16 [%SP], %rs8; ; CHECK-PTX-NEXT: mov.b32 %r1, 1; ; CHECK-PTX-NEXT: st.u32 [%SP+8], %r1; ; CHECK-PTX-NEXT: add.u64 %rd5, %SP, 8; @@ -248,9 +248,9 @@ define dso_local i32 @bar() { ; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd7; ; CHECK-PTX-NEXT: { // callseq 1, 0 ; CHECK-PTX-NEXT: .param .b32 param0; -; CHECK-PTX-NEXT: st.param.b32 [param0+0], 1; +; CHECK-PTX-NEXT: st.param.b32 [param0], 1; ; CHECK-PTX-NEXT: .param .b64 param1; -; CHECK-PTX-NEXT: st.param.b64 [param1+0], %rd5; +; CHECK-PTX-NEXT: st.param.b64 [param1], %rd5; ; CHECK-PTX-NEXT: .param .b32 retval0; ; CHECK-PTX-NEXT: call.uni (retval0), ; CHECK-PTX-NEXT: variadics2, @@ -258,9 +258,9 @@ define dso_local i32 @bar() { ; CHECK-PTX-NEXT: param0, ; CHECK-PTX-NEXT: param1 ; CHECK-PTX-NEXT: ); -; CHECK-PTX-NEXT: ld.param.b32 %r2, [retval0+0]; +; CHECK-PTX-NEXT: ld.param.b32 %r2, [retval0]; ; CHECK-PTX-NEXT: } // callseq 1 -; CHECK-PTX-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-PTX-NEXT: ret; entry: %s1.sroa.3 = alloca [3 x i8], align 1 @@ -286,7 +286,7 @@ define dso_local i32 @variadics3(i32 noundef %first, ...) { ; CHECK-PTX-NEXT: add.s32 %r5, %r1, %r2; ; CHECK-PTX-NEXT: add.s32 %r6, %r5, %r3; ; CHECK-PTX-NEXT: add.s32 %r7, %r6, %r4; -; CHECK-PTX-NEXT: st.param.b32 [func_retval0+0], %r7; +; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r7; ; CHECK-PTX-NEXT: ret; entry: %vlist = alloca ptr, align 8 @@ -321,13 +321,13 @@ define dso_local i32 @baz() { ; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot5; ; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL; ; CHECK-PTX-NEXT: mov.b32 %r1, 1; -; CHECK-PTX-NEXT: st.v4.u32 [%SP+0], {%r1, %r1, %r1, %r1}; +; CHECK-PTX-NEXT: st.v4.u32 [%SP], {%r1, %r1, %r1, %r1}; ; CHECK-PTX-NEXT: add.u64 %rd1, %SP, 0; ; CHECK-PTX-NEXT: { // callseq 2, 0 ; CHECK-PTX-NEXT: .param .b32 param0; -; CHECK-PTX-NEXT: st.param.b32 [param0+0], 1; +; CHECK-PTX-NEXT: st.param.b32 [param0], 1; ; CHECK-PTX-NEXT: .param .b64 param1; -; CHECK-PTX-NEXT: st.param.b64 [param1+0], %rd1; +; CHECK-PTX-NEXT: st.param.b64 [param1], %rd1; ; CHECK-PTX-NEXT: .param .b32 retval0; ; CHECK-PTX-NEXT: call.uni (retval0), ; CHECK-PTX-NEXT: variadics3, @@ -335,9 +335,9 @@ define dso_local i32 @baz() { ; CHECK-PTX-NEXT: param0, ; CHECK-PTX-NEXT: param1 ; CHECK-PTX-NEXT: ); -; CHECK-PTX-NEXT: ld.param.b32 %r2, [retval0+0]; +; CHECK-PTX-NEXT: ld.param.b32 %r2, [retval0]; ; CHECK-PTX-NEXT: } // callseq 2 -; CHECK-PTX-NEXT: st.param.b32 [func_retval0+0], %r2; +; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-PTX-NEXT: ret; entry: %call = call i32 (i32, ...) @variadics3(i32 noundef 1, <4 x i32> noundef ) @@ -360,7 +360,7 @@ define dso_local i32 @variadics4(ptr noundef byval(%struct.S2) align 8 %first, . ; CHECK-PTX-NEXT: add.s64 %rd7, %rd5, %rd6; ; CHECK-PTX-NEXT: add.s64 %rd8, %rd7, %rd4; ; CHECK-PTX-NEXT: cvt.u32.u64 %r1, %rd8; -; CHECK-PTX-NEXT: st.param.b32 [func_retval0+0], %r1; +; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r1; ; CHECK-PTX-NEXT: ret; entry: %vlist = alloca ptr, align 8 @@ -395,7 +395,7 @@ define dso_local void @qux() { ; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot7; ; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL; ; CHECK-PTX-NEXT: ld.global.nc.u64 %rd1, [__const_$_qux_$_s]; -; CHECK-PTX-NEXT: st.u64 [%SP+0], %rd1; +; CHECK-PTX-NEXT: st.u64 [%SP], %rd1; ; CHECK-PTX-NEXT: mov.u64 %rd2, __const_$_qux_$_s; ; CHECK-PTX-NEXT: add.s64 %rd3, %rd2, 8; ; CHECK-PTX-NEXT: ld.global.nc.u64 %rd4, [%rd3]; @@ -405,10 +405,10 @@ define dso_local void @qux() { ; CHECK-PTX-NEXT: add.u64 %rd6, %SP, 16; ; CHECK-PTX-NEXT: { // callseq 3, 0 ; CHECK-PTX-NEXT: .param .align 8 .b8 param0[16]; -; CHECK-PTX-NEXT: st.param.b64 [param0+0], %rd1; +; CHECK-PTX-NEXT: st.param.b64 [param0], %rd1; ; CHECK-PTX-NEXT: st.param.b64 [param0+8], %rd4; ; CHECK-PTX-NEXT: .param .b64 param1; -; CHECK-PTX-NEXT: st.param.b64 [param1+0], %rd6; +; CHECK-PTX-NEXT: st.param.b64 [param1], %rd6; ; CHECK-PTX-NEXT: .param .b32 retval0; ; CHECK-PTX-NEXT: call.uni (retval0), ; CHECK-PTX-NEXT: variadics4, @@ -416,7 +416,7 @@ define dso_local void @qux() { ; CHECK-PTX-NEXT: param0, ; CHECK-PTX-NEXT: param1 ; CHECK-PTX-NEXT: ); -; CHECK-PTX-NEXT: ld.param.b32 %r1, [retval0+0]; +; CHECK-PTX-NEXT: ld.param.b32 %r1, [retval0]; ; CHECK-PTX-NEXT: } // callseq 3 ; CHECK-PTX-NEXT: ret; entry: diff --git a/llvm/test/CodeGen/NVPTX/vec-param-load.ll b/llvm/test/CodeGen/NVPTX/vec-param-load.ll index f4f5c26be3474b..9a190a0892e576 100644 --- a/llvm/test/CodeGen/NVPTX/vec-param-load.ll +++ b/llvm/test/CodeGen/NVPTX/vec-param-load.ll @@ -9,7 +9,7 @@ define <16 x float> @test_v16f32(<16 x float> %a) { ; CHECK-DAG: ld.param.v4.f32 {[[V_8_11:(%f[0-9]+[, ]*){4}]]}, [test_v16f32_param_0+32]; ; CHECK-DAG: ld.param.v4.f32 {[[V_4_7:(%f[0-9]+[, ]*){4}]]}, [test_v16f32_param_0+16]; ; CHECK-DAG: ld.param.v4.f32 {[[V_0_3:(%f[0-9]+[, ]*){4}]]}, [test_v16f32_param_0]; -; CHECK-DAG: st.param.v4.f32 [func_retval0+0], {[[V_0_3]]} +; CHECK-DAG: st.param.v4.f32 [func_retval0], {[[V_0_3]]} ; CHECK-DAG: st.param.v4.f32 [func_retval0+16], {[[V_4_7]]} ; CHECK-DAG: st.param.v4.f32 [func_retval0+32], {[[V_8_11]]} ; CHECK-DAG: st.param.v4.f32 [func_retval0+48], {[[V_12_15]]} @@ -21,7 +21,7 @@ define <8 x float> @test_v8f32(<8 x float> %a) { ; CHECK-LABEL: test_v8f32( ; CHECK-DAG: ld.param.v4.f32 {[[V_4_7:(%f[0-9]+[, ]*){4}]]}, [test_v8f32_param_0+16]; ; CHECK-DAG: ld.param.v4.f32 {[[V_0_3:(%f[0-9]+[, ]*){4}]]}, [test_v8f32_param_0]; -; CHECK-DAG: st.param.v4.f32 [func_retval0+0], {[[V_0_3]]} +; CHECK-DAG: st.param.v4.f32 [func_retval0], {[[V_0_3]]} ; CHECK-DAG: st.param.v4.f32 [func_retval0+16], {[[V_4_7]]} ; CHECK: ret; ret <8 x float> %a @@ -30,7 +30,7 @@ define <8 x float> @test_v8f32(<8 x float> %a) { define <4 x float> @test_v4f32(<4 x float> %a) { ; CHECK-LABEL: test_v4f32( ; CHECK-DAG: ld.param.v4.f32 {[[V_0_3:(%f[0-9]+[, ]*){4}]]}, [test_v4f32_param_0]; -; CHECK-DAG: st.param.v4.f32 [func_retval0+0], {[[V_0_3]]} +; CHECK-DAG: st.param.v4.f32 [func_retval0], {[[V_0_3]]} ; CHECK: ret; ret <4 x float> %a } @@ -38,7 +38,7 @@ define <4 x float> @test_v4f32(<4 x float> %a) { define <2 x float> @test_v2f32(<2 x float> %a) { ; CHECK-LABEL: test_v2f32( ; CHECK-DAG: ld.param.v2.f32 {[[V_0_3:(%f[0-9]+[, ]*){2}]]}, [test_v2f32_param_0]; -; CHECK-DAG: st.param.v2.f32 [func_retval0+0], {[[V_0_3]]} +; CHECK-DAG: st.param.v2.f32 [func_retval0], {[[V_0_3]]} ; CHECK: ret; ret <2 x float> %a } @@ -48,7 +48,7 @@ define <3 x float> @test_v3f32(<3 x float> %a) { ; CHECK-LABEL: test_v3f32( ; CHECK-DAG: ld.param.f32 [[V_2:%f[0-9]+]], [test_v3f32_param_0+8]; ; CHECK-DAG: ld.param.v2.f32 {[[V_0_1:(%f[0-9]+[, ]*){2}]]}, [test_v3f32_param_0]; -; CHECK-DAG: st.param.v2.f32 [func_retval0+0], {[[V_0_1]]} +; CHECK-DAG: st.param.v2.f32 [func_retval0], {[[V_0_1]]} ; CHECK-DAG: st.param.f32 [func_retval0+8], [[V_2]] ; CHECK: ret; ret <3 x float> %a @@ -60,7 +60,7 @@ define <8 x i64> @test_v8i64(<8 x i64> %a) { ; CHECK-DAG: ld.param.v2.u64 {[[V_4_5:(%rd[0-9]+[, ]*){2}]]}, [test_v8i64_param_0+32]; ; CHECK-DAG: ld.param.v2.u64 {[[V_2_3:(%rd[0-9]+[, ]*){2}]]}, [test_v8i64_param_0+16]; ; CHECK-DAG: ld.param.v2.u64 {[[V_0_1:(%rd[0-9]+[, ]*){2}]]}, [test_v8i64_param_0]; -; CHECK-DAG: st.param.v2.b64 [func_retval0+0], {[[V_0_1]]} +; CHECK-DAG: st.param.v2.b64 [func_retval0], {[[V_0_1]]} ; CHECK-DAG: st.param.v2.b64 [func_retval0+16], {[[V_2_3]]} ; CHECK-DAG: st.param.v2.b64 [func_retval0+32], {[[V_4_5]]} ; CHECK-DAG: st.param.v2.b64 [func_retval0+48], {[[V_6_7]]} @@ -72,7 +72,7 @@ define <16 x i16> @test_v16i16(<16 x i16> %a) { ; CHECK-LABEL: test_v16i16( ; CHECK-DAG: ld.param.v4.u32 {[[V_8_15:(%r[0-9]+[, ]*){4}]]}, [test_v16i16_param_0+16]; ; CHECK-DAG: ld.param.v4.u32 {[[V_0_7:(%r[0-9]+[, ]*){4}]]}, [test_v16i16_param_0]; -; CHECK-DAG: st.param.v4.b32 [func_retval0+0], {[[V_0_7]]} +; CHECK-DAG: st.param.v4.b32 [func_retval0], {[[V_0_7]]} ; CHECK-DAG: st.param.v4.b32 [func_retval0+16], {[[V_8_15]]} ; CHECK: ret; ret <16 x i16> %a diff --git a/llvm/test/CodeGen/NVPTX/vector-args.ll b/llvm/test/CodeGen/NVPTX/vector-args.ll index 162061ff34ba1e..2a45c8271e9b8f 100644 --- a/llvm/test/CodeGen/NVPTX/vector-args.ll +++ b/llvm/test/CodeGen/NVPTX/vector-args.ll @@ -29,7 +29,7 @@ define <4 x float> @baz(<4 x float> %a) { ; CHECK: .func (.param .align 16 .b8 func_retval0[16]) baz ; CHECK: .param .align 16 .b8 baz_param_0[16] ; CHECK: ld.param.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}} -; CHECK: st.param.v4.f32 [func_retval0+0], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}} +; CHECK: st.param.v4.f32 [func_retval0], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}} %t1 = fmul <4 x float> %a, %a ret <4 x float> %t1 } diff --git a/llvm/test/CodeGen/NVPTX/vector-call.ll b/llvm/test/CodeGen/NVPTX/vector-call.ll index 15e4697333cb4e..e91d4e20a44ac8 100644 --- a/llvm/test/CodeGen/NVPTX/vector-call.ll +++ b/llvm/test/CodeGen/NVPTX/vector-call.ll @@ -8,7 +8,7 @@ declare void @bar(<4 x i32>) ; CHECK-LABEL: .func foo( ; CHECK-DAG: ld.param.v4.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [foo_param_0]; ; CHECK: .param .align 16 .b8 param0[16]; -; CHECK-DAG: st.param.v4.b32 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]}; +; CHECK-DAG: st.param.v4.b32 [param0], {[[E0]], [[E1]], [[E2]], [[E3]]}; ; CHECK: call.uni ; CHECK: ret; define void @foo(<4 x i32> %a) { @@ -20,7 +20,7 @@ define void @foo(<4 x i32> %a) { ; CHECK-DAG: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [foo3_param_0]; ; CHECK-DAG: ld.param.u32 [[E2:%r[0-9]+]], [foo3_param_0+8]; ; CHECK: .param .align 16 .b8 param0[16]; -; CHECK-DAG: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]}; +; CHECK-DAG: st.param.v2.b32 [param0], {[[E0]], [[E1]]}; ; CHECK-DAG: st.param.b32 [param0+8], [[E2]]; ; CHECK: call.uni ; CHECK: ret; diff --git a/llvm/test/CodeGen/NVPTX/vector-returns.ll b/llvm/test/CodeGen/NVPTX/vector-returns.ll index 956f74392ae130..520736c4cec507 100644 --- a/llvm/test/CodeGen/NVPTX/vector-returns.ll +++ b/llvm/test/CodeGen/NVPTX/vector-returns.ll @@ -10,7 +10,7 @@ define <3 x i64> @long3() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.u64 %rd1, 0; -; CHECK-NEXT: st.param.v2.b64 [func_retval0+0], {%rd1, %rd1}; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd1}; ; CHECK-NEXT: st.param.b64 [func_retval0+16], %rd1; ; CHECK-NEXT: ret; ret <3 x i64> zeroinitializer @@ -23,7 +23,7 @@ define <2 x i64> @long2() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.u64 %rd1, 0; -; CHECK-NEXT: st.param.v2.b64 [func_retval0+0], {%rd1, %rd1}; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd1}; ; CHECK-NEXT: ret; ret <2 x i64> zeroinitializer } @@ -35,7 +35,7 @@ define <1 x i64> @long1() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.u64 %rd1, 0; -; CHECK-NEXT: st.param.b64 [func_retval0+0], %rd1; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd1; ; CHECK-NEXT: ret; ret <1 x i64> zeroinitializer } @@ -47,7 +47,7 @@ define <5 x i32> @int5() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.b32 %r1, 0; -; CHECK-NEXT: st.param.v4.b32 [func_retval0+0], {%r1, %r1, %r1, %r1}; +; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r1, %r1, %r1, %r1}; ; CHECK-NEXT: st.param.b32 [func_retval0+16], %r1; ; CHECK-NEXT: ret; ret <5 x i32> zeroinitializer @@ -60,7 +60,7 @@ define <4 x i32> @int4() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.b32 %r1, 0; -; CHECK-NEXT: st.param.v4.b32 [func_retval0+0], {%r1, %r1, %r1, %r1}; +; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r1, %r1, %r1, %r1}; ; CHECK-NEXT: ret; ret <4 x i32> zeroinitializer } @@ -72,7 +72,7 @@ define <3 x i32> @int3() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.b32 %r1, 0; -; CHECK-NEXT: st.param.v2.b32 [func_retval0+0], {%r1, %r1}; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r1, %r1}; ; CHECK-NEXT: st.param.b32 [func_retval0+8], %r1; ; CHECK-NEXT: ret; ret <3 x i32> zeroinitializer @@ -85,7 +85,7 @@ define <2 x i32> @int2() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.b32 %r1, 0; -; CHECK-NEXT: st.param.v2.b32 [func_retval0+0], {%r1, %r1}; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r1, %r1}; ; CHECK-NEXT: ret; ret <2 x i32> zeroinitializer } @@ -97,7 +97,7 @@ define <1 x i32> @int1() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.b32 %r1, 0; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; ; CHECK-NEXT: ret; ret <1 x i32> zeroinitializer } @@ -109,7 +109,7 @@ define <9 x i16> @short9() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.u16 %rs1, 0; -; CHECK-NEXT: st.param.v4.b16 [func_retval0+0], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.v4.b16 [func_retval0], {%rs1, %rs1, %rs1, %rs1}; ; CHECK-NEXT: st.param.v4.b16 [func_retval0+8], {%rs1, %rs1, %rs1, %rs1}; ; CHECK-NEXT: st.param.b16 [func_retval0+16], %rs1; ; CHECK-NEXT: ret; @@ -123,7 +123,7 @@ define <8 x i16> @short8() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.b32 %r1, 0; -; CHECK-NEXT: st.param.v4.b32 [func_retval0+0], {%r1, %r1, %r1, %r1}; +; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r1, %r1, %r1, %r1}; ; CHECK-NEXT: ret; ret <8 x i16> zeroinitializer } @@ -135,7 +135,7 @@ define <7 x i16> @short7() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.u16 %rs1, 0; -; CHECK-NEXT: st.param.v4.b16 [func_retval0+0], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.v4.b16 [func_retval0], {%rs1, %rs1, %rs1, %rs1}; ; CHECK-NEXT: st.param.v2.b16 [func_retval0+8], {%rs1, %rs1}; ; CHECK-NEXT: st.param.b16 [func_retval0+12], %rs1; ; CHECK-NEXT: ret; @@ -149,7 +149,7 @@ define <5 x i16> @short5() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.u16 %rs1, 0; -; CHECK-NEXT: st.param.v4.b16 [func_retval0+0], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.v4.b16 [func_retval0], {%rs1, %rs1, %rs1, %rs1}; ; CHECK-NEXT: st.param.b16 [func_retval0+8], %rs1; ; CHECK-NEXT: ret; ret <5 x i16> zeroinitializer @@ -162,7 +162,7 @@ define <4 x i16> @short4() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.b32 %r1, 0; -; CHECK-NEXT: st.param.v2.b32 [func_retval0+0], {%r1, %r1}; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r1, %r1}; ; CHECK-NEXT: ret; ret <4 x i16> zeroinitializer } @@ -174,7 +174,7 @@ define <3 x i16> @short3() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.u16 %rs1, 0; -; CHECK-NEXT: st.param.v2.b16 [func_retval0+0], {%rs1, %rs1}; +; CHECK-NEXT: st.param.v2.b16 [func_retval0], {%rs1, %rs1}; ; CHECK-NEXT: st.param.b16 [func_retval0+4], %rs1; ; CHECK-NEXT: ret; ret <3 x i16> zeroinitializer @@ -187,7 +187,7 @@ define <2 x i16> @short2() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.b32 %r1, 0; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; ; CHECK-NEXT: ret; ret <2 x i16> zeroinitializer } @@ -199,7 +199,7 @@ define <1 x i16> @short1() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.u16 %rs1, 0; -; CHECK-NEXT: st.param.b16 [func_retval0+0], %rs1; +; CHECK-NEXT: st.param.b16 [func_retval0], %rs1; ; CHECK-NEXT: ret; ret <1 x i16> zeroinitializer } @@ -211,7 +211,7 @@ define <17 x i8> @byte17() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.u16 %rs1, 0; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+0], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.v4.b8 [func_retval0], {%rs1, %rs1, %rs1, %rs1}; ; CHECK-NEXT: st.param.v4.b8 [func_retval0+4], {%rs1, %rs1, %rs1, %rs1}; ; CHECK-NEXT: st.param.v4.b8 [func_retval0+8], {%rs1, %rs1, %rs1, %rs1}; ; CHECK-NEXT: st.param.v4.b8 [func_retval0+12], {%rs1, %rs1, %rs1, %rs1}; @@ -227,7 +227,7 @@ define <16 x i8> @byte16() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.b32 %r1, 0; -; CHECK-NEXT: st.param.v4.b32 [func_retval0+0], {%r1, %r1, %r1, %r1}; +; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r1, %r1, %r1, %r1}; ; CHECK-NEXT: ret; ret <16 x i8> zeroinitializer } @@ -239,7 +239,7 @@ define <15 x i8> @byte15() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.u16 %rs1, 0; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+0], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.v4.b8 [func_retval0], {%rs1, %rs1, %rs1, %rs1}; ; CHECK-NEXT: st.param.v4.b8 [func_retval0+4], {%rs1, %rs1, %rs1, %rs1}; ; CHECK-NEXT: st.param.v4.b8 [func_retval0+8], {%rs1, %rs1, %rs1, %rs1}; ; CHECK-NEXT: st.param.v2.b8 [func_retval0+12], {%rs1, %rs1}; @@ -255,7 +255,7 @@ define <9 x i8> @byte9() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.u16 %rs1, 0; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+0], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.v4.b8 [func_retval0], {%rs1, %rs1, %rs1, %rs1}; ; CHECK-NEXT: st.param.v4.b8 [func_retval0+4], {%rs1, %rs1, %rs1, %rs1}; ; CHECK-NEXT: st.param.b8 [func_retval0+8], %rs1; ; CHECK-NEXT: ret; @@ -269,7 +269,7 @@ define <8 x i8> @byte8() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.b32 %r1, 0; -; CHECK-NEXT: st.param.v2.b32 [func_retval0+0], {%r1, %r1}; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r1, %r1}; ; CHECK-NEXT: ret; ret <8 x i8> zeroinitializer } @@ -281,7 +281,7 @@ define <7 x i8> @byte7() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.u16 %rs1, 0; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+0], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.v4.b8 [func_retval0], {%rs1, %rs1, %rs1, %rs1}; ; CHECK-NEXT: st.param.v2.b8 [func_retval0+4], {%rs1, %rs1}; ; CHECK-NEXT: st.param.b8 [func_retval0+6], %rs1; ; CHECK-NEXT: ret; @@ -295,7 +295,7 @@ define <5 x i8> @byte5() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.u16 %rs1, 0; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+0], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.v4.b8 [func_retval0], {%rs1, %rs1, %rs1, %rs1}; ; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1; ; CHECK-NEXT: ret; ret <5 x i8> zeroinitializer @@ -308,7 +308,7 @@ define <4 x i8> @byte4() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.b32 %r1, 0; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; ; CHECK-NEXT: ret; ret <4 x i8> zeroinitializer } @@ -320,7 +320,7 @@ define <3 x i8> @byte3() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.b32 %r1, 0; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; ; CHECK-NEXT: ret; ret <3 x i8> zeroinitializer } @@ -332,7 +332,7 @@ define <2 x i8> @byte2() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.b32 %r1, 0; -; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; ; CHECK-NEXT: ret; ret <2 x i8> zeroinitializer } @@ -344,7 +344,7 @@ define <1 x i8> @byte1() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.u16 %rs1, 0; -; CHECK-NEXT: st.param.b8 [func_retval0+0], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; ; CHECK-NEXT: ret; ret <1 x i8> zeroinitializer } @@ -356,7 +356,7 @@ define <17 x i1> @bit17() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.u16 %rs1, 0; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+0], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.v4.b8 [func_retval0], {%rs1, %rs1, %rs1, %rs1}; ; CHECK-NEXT: st.param.v4.b8 [func_retval0+4], {%rs1, %rs1, %rs1, %rs1}; ; CHECK-NEXT: st.param.v4.b8 [func_retval0+8], {%rs1, %rs1, %rs1, %rs1}; ; CHECK-NEXT: st.param.v4.b8 [func_retval0+12], {%rs1, %rs1, %rs1, %rs1}; @@ -372,7 +372,7 @@ define <16 x i1> @bit16() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.u16 %rs1, 0; -; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {%rs1, %rs1}; +; CHECK-NEXT: st.param.v2.b8 [func_retval0], {%rs1, %rs1}; ; CHECK-NEXT: st.param.v2.b8 [func_retval0+2], {%rs1, %rs1}; ; CHECK-NEXT: st.param.v2.b8 [func_retval0+4], {%rs1, %rs1}; ; CHECK-NEXT: st.param.v2.b8 [func_retval0+6], {%rs1, %rs1}; @@ -391,7 +391,7 @@ define <15 x i1> @bit15() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.u16 %rs1, 0; -; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {%rs1, %rs1}; +; CHECK-NEXT: st.param.v2.b8 [func_retval0], {%rs1, %rs1}; ; CHECK-NEXT: st.param.v2.b8 [func_retval0+2], {%rs1, %rs1}; ; CHECK-NEXT: st.param.v2.b8 [func_retval0+4], {%rs1, %rs1}; ; CHECK-NEXT: st.param.v2.b8 [func_retval0+6], {%rs1, %rs1}; @@ -410,7 +410,7 @@ define <9 x i1> @bit9() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.u16 %rs1, 0; -; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {%rs1, %rs1}; +; CHECK-NEXT: st.param.v2.b8 [func_retval0], {%rs1, %rs1}; ; CHECK-NEXT: st.param.v2.b8 [func_retval0+2], {%rs1, %rs1}; ; CHECK-NEXT: st.param.v2.b8 [func_retval0+4], {%rs1, %rs1}; ; CHECK-NEXT: st.param.v2.b8 [func_retval0+6], {%rs1, %rs1}; @@ -426,7 +426,7 @@ define <8 x i1> @bit8() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.u16 %rs1, 0; -; CHECK-NEXT: st.param.b8 [func_retval0+0], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; ; CHECK-NEXT: st.param.b8 [func_retval0+1], %rs1; ; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs1; ; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs1; @@ -445,7 +445,7 @@ define <7 x i1> @bit7() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.u16 %rs1, 0; -; CHECK-NEXT: st.param.b8 [func_retval0+0], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; ; CHECK-NEXT: st.param.b8 [func_retval0+1], %rs1; ; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs1; ; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs1; @@ -463,7 +463,7 @@ define <5 x i1> @bit5() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.u16 %rs1, 0; -; CHECK-NEXT: st.param.b8 [func_retval0+0], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; ; CHECK-NEXT: st.param.b8 [func_retval0+1], %rs1; ; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs1; ; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs1; @@ -479,7 +479,7 @@ define <4 x i1> @bit4() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.u16 %rs1, 0; -; CHECK-NEXT: st.param.b8 [func_retval0+0], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; ; CHECK-NEXT: st.param.b8 [func_retval0+1], %rs1; ; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs1; ; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs1; @@ -494,7 +494,7 @@ define <3 x i1> @bit3() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.u16 %rs1, 0; -; CHECK-NEXT: st.param.b8 [func_retval0+0], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; ; CHECK-NEXT: st.param.b8 [func_retval0+1], %rs1; ; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs1; ; CHECK-NEXT: ret; @@ -508,7 +508,7 @@ define <2 x i1> @bit2() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.u16 %rs1, 0; -; CHECK-NEXT: st.param.b8 [func_retval0+0], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; ; CHECK-NEXT: st.param.b8 [func_retval0+1], %rs1; ; CHECK-NEXT: ret; ret <2 x i1> zeroinitializer @@ -521,7 +521,7 @@ define <1 x i1> @bit1() { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.u16 %rs1, 0; -; CHECK-NEXT: st.param.b8 [func_retval0+0], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; ; CHECK-NEXT: ret; ret <1 x i1> zeroinitializer } diff --git a/llvm/test/CodeGen/SPARC/fmuladd-soft-float.ll b/llvm/test/CodeGen/SPARC/fmuladd-soft-float.ll new file mode 100644 index 00000000000000..a9e666e3c9b4db --- /dev/null +++ b/llvm/test/CodeGen/SPARC/fmuladd-soft-float.ll @@ -0,0 +1,385 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=sparc < %s | FileCheck %s -check-prefix=SOFT-FLOAT-32 +; RUN: llc -mtriple=sparc64 < %s | FileCheck %s -check-prefix=SOFT-FLOAT-64 + +define float @fmuladd_intrinsic_f32(float %a, float %b, float %c) #0 { +; SOFT-FLOAT-32-LABEL: fmuladd_intrinsic_f32: +; SOFT-FLOAT-32: .cfi_startproc +; SOFT-FLOAT-32-NEXT: ! %bb.0: +; SOFT-FLOAT-32-NEXT: save %sp, -96, %sp +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_register %fp +; SOFT-FLOAT-32-NEXT: .cfi_window_save +; SOFT-FLOAT-32-NEXT: .cfi_register %o7, %i7 +; SOFT-FLOAT-32-NEXT: mov %i0, %o0 +; SOFT-FLOAT-32-NEXT: call __mulsf3 +; SOFT-FLOAT-32-NEXT: mov %i1, %o1 +; SOFT-FLOAT-32-NEXT: call __addsf3 +; SOFT-FLOAT-32-NEXT: mov %i2, %o1 +; SOFT-FLOAT-32-NEXT: ret +; SOFT-FLOAT-32-NEXT: restore %g0, %o0, %o0 +; +; SOFT-FLOAT-64-LABEL: fmuladd_intrinsic_f32: +; SOFT-FLOAT-64: .cfi_startproc +; SOFT-FLOAT-64-NEXT: ! %bb.0: +; SOFT-FLOAT-64-NEXT: save %sp, -176, %sp +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_register %fp +; SOFT-FLOAT-64-NEXT: .cfi_window_save +; SOFT-FLOAT-64-NEXT: .cfi_register %o7, %i7 +; SOFT-FLOAT-64-NEXT: srl %i0, 0, %o0 +; SOFT-FLOAT-64-NEXT: call __mulsf3 +; SOFT-FLOAT-64-NEXT: srl %i1, 0, %o1 +; SOFT-FLOAT-64-NEXT: call __addsf3 +; SOFT-FLOAT-64-NEXT: srl %i2, 0, %o1 +; SOFT-FLOAT-64-NEXT: ret +; SOFT-FLOAT-64-NEXT: restore %g0, %o0, %o0 + %result = call float @llvm.fmuladd.f32(float %a, float %b, float %c) + ret float %result +} + +define double @fmuladd_intrinsic_f64(double %a, double %b, double %c) #0 { +; SOFT-FLOAT-32-LABEL: fmuladd_intrinsic_f64: +; SOFT-FLOAT-32: .cfi_startproc +; SOFT-FLOAT-32-NEXT: ! %bb.0: +; SOFT-FLOAT-32-NEXT: save %sp, -96, %sp +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_register %fp +; SOFT-FLOAT-32-NEXT: .cfi_window_save +; SOFT-FLOAT-32-NEXT: .cfi_register %o7, %i7 +; SOFT-FLOAT-32-NEXT: mov %i0, %o0 +; SOFT-FLOAT-32-NEXT: mov %i1, %o1 +; SOFT-FLOAT-32-NEXT: mov %i2, %o2 +; SOFT-FLOAT-32-NEXT: call __muldf3 +; SOFT-FLOAT-32-NEXT: mov %i3, %o3 +; SOFT-FLOAT-32-NEXT: mov %i4, %o2 +; SOFT-FLOAT-32-NEXT: call __adddf3 +; SOFT-FLOAT-32-NEXT: mov %i5, %o3 +; SOFT-FLOAT-32-NEXT: mov %o0, %i0 +; SOFT-FLOAT-32-NEXT: ret +; SOFT-FLOAT-32-NEXT: restore %g0, %o1, %o1 +; +; SOFT-FLOAT-64-LABEL: fmuladd_intrinsic_f64: +; SOFT-FLOAT-64: .cfi_startproc +; SOFT-FLOAT-64-NEXT: ! %bb.0: +; SOFT-FLOAT-64-NEXT: save %sp, -176, %sp +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_register %fp +; SOFT-FLOAT-64-NEXT: .cfi_window_save +; SOFT-FLOAT-64-NEXT: .cfi_register %o7, %i7 +; SOFT-FLOAT-64-NEXT: mov %i0, %o0 +; SOFT-FLOAT-64-NEXT: call __muldf3 +; SOFT-FLOAT-64-NEXT: mov %i1, %o1 +; SOFT-FLOAT-64-NEXT: call __adddf3 +; SOFT-FLOAT-64-NEXT: mov %i2, %o1 +; SOFT-FLOAT-64-NEXT: ret +; SOFT-FLOAT-64-NEXT: restore %g0, %o0, %o0 + %result = call double @llvm.fmuladd.f64(double %a, double %b, double %c) + ret double %result +} + +define float @fmuladd_contract_f32(float %a, float %b, float %c) #0 { +; SOFT-FLOAT-32-LABEL: fmuladd_contract_f32: +; SOFT-FLOAT-32: .cfi_startproc +; SOFT-FLOAT-32-NEXT: ! %bb.0: +; SOFT-FLOAT-32-NEXT: save %sp, -96, %sp +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_register %fp +; SOFT-FLOAT-32-NEXT: .cfi_window_save +; SOFT-FLOAT-32-NEXT: .cfi_register %o7, %i7 +; SOFT-FLOAT-32-NEXT: mov %i0, %o0 +; SOFT-FLOAT-32-NEXT: call __mulsf3 +; SOFT-FLOAT-32-NEXT: mov %i1, %o1 +; SOFT-FLOAT-32-NEXT: call __addsf3 +; SOFT-FLOAT-32-NEXT: mov %i2, %o1 +; SOFT-FLOAT-32-NEXT: ret +; SOFT-FLOAT-32-NEXT: restore %g0, %o0, %o0 +; +; SOFT-FLOAT-64-LABEL: fmuladd_contract_f32: +; SOFT-FLOAT-64: .cfi_startproc +; SOFT-FLOAT-64-NEXT: ! %bb.0: +; SOFT-FLOAT-64-NEXT: save %sp, -176, %sp +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_register %fp +; SOFT-FLOAT-64-NEXT: .cfi_window_save +; SOFT-FLOAT-64-NEXT: .cfi_register %o7, %i7 +; SOFT-FLOAT-64-NEXT: srl %i0, 0, %o0 +; SOFT-FLOAT-64-NEXT: call __mulsf3 +; SOFT-FLOAT-64-NEXT: srl %i1, 0, %o1 +; SOFT-FLOAT-64-NEXT: call __addsf3 +; SOFT-FLOAT-64-NEXT: srl %i2, 0, %o1 +; SOFT-FLOAT-64-NEXT: ret +; SOFT-FLOAT-64-NEXT: restore %g0, %o0, %o0 + %product = fmul contract float %a, %b + %result = fadd contract float %product, %c + ret float %result +} + +define double @fmuladd_contract_f64(double %a, double %b, double %c) #0 { +; SOFT-FLOAT-32-LABEL: fmuladd_contract_f64: +; SOFT-FLOAT-32: .cfi_startproc +; SOFT-FLOAT-32-NEXT: ! %bb.0: +; SOFT-FLOAT-32-NEXT: save %sp, -96, %sp +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_register %fp +; SOFT-FLOAT-32-NEXT: .cfi_window_save +; SOFT-FLOAT-32-NEXT: .cfi_register %o7, %i7 +; SOFT-FLOAT-32-NEXT: mov %i0, %o0 +; SOFT-FLOAT-32-NEXT: mov %i1, %o1 +; SOFT-FLOAT-32-NEXT: mov %i2, %o2 +; SOFT-FLOAT-32-NEXT: call __muldf3 +; SOFT-FLOAT-32-NEXT: mov %i3, %o3 +; SOFT-FLOAT-32-NEXT: mov %i4, %o2 +; SOFT-FLOAT-32-NEXT: call __adddf3 +; SOFT-FLOAT-32-NEXT: mov %i5, %o3 +; SOFT-FLOAT-32-NEXT: mov %o0, %i0 +; SOFT-FLOAT-32-NEXT: ret +; SOFT-FLOAT-32-NEXT: restore %g0, %o1, %o1 +; +; SOFT-FLOAT-64-LABEL: fmuladd_contract_f64: +; SOFT-FLOAT-64: .cfi_startproc +; SOFT-FLOAT-64-NEXT: ! %bb.0: +; SOFT-FLOAT-64-NEXT: save %sp, -176, %sp +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_register %fp +; SOFT-FLOAT-64-NEXT: .cfi_window_save +; SOFT-FLOAT-64-NEXT: .cfi_register %o7, %i7 +; SOFT-FLOAT-64-NEXT: mov %i0, %o0 +; SOFT-FLOAT-64-NEXT: call __muldf3 +; SOFT-FLOAT-64-NEXT: mov %i1, %o1 +; SOFT-FLOAT-64-NEXT: call __adddf3 +; SOFT-FLOAT-64-NEXT: mov %i2, %o1 +; SOFT-FLOAT-64-NEXT: ret +; SOFT-FLOAT-64-NEXT: restore %g0, %o0, %o0 + %product = fmul contract double %a, %b + %result = fadd contract double %product, %c + ret double %result +} + +define <4 x float> @fmuladd_contract_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 { +; SOFT-FLOAT-32-LABEL: fmuladd_contract_v4f32: +; SOFT-FLOAT-32: .cfi_startproc +; SOFT-FLOAT-32-NEXT: ! %bb.0: +; SOFT-FLOAT-32-NEXT: save %sp, -96, %sp +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_register %fp +; SOFT-FLOAT-32-NEXT: .cfi_window_save +; SOFT-FLOAT-32-NEXT: .cfi_register %o7, %i7 +; SOFT-FLOAT-32-NEXT: ld [%fp+100], %l0 +; SOFT-FLOAT-32-NEXT: ld [%fp+104], %l1 +; SOFT-FLOAT-32-NEXT: ld [%fp+108], %l2 +; SOFT-FLOAT-32-NEXT: ld [%fp+112], %l3 +; SOFT-FLOAT-32-NEXT: ld [%fp+96], %l4 +; SOFT-FLOAT-32-NEXT: ld [%fp+92], %l5 +; SOFT-FLOAT-32-NEXT: mov %i0, %o0 +; SOFT-FLOAT-32-NEXT: call __mulsf3 +; SOFT-FLOAT-32-NEXT: mov %i4, %o1 +; SOFT-FLOAT-32-NEXT: mov %o0, %l6 +; SOFT-FLOAT-32-NEXT: mov %i1, %o0 +; SOFT-FLOAT-32-NEXT: call __mulsf3 +; SOFT-FLOAT-32-NEXT: mov %i5, %o1 +; SOFT-FLOAT-32-NEXT: mov %o0, %i1 +; SOFT-FLOAT-32-NEXT: mov %i2, %o0 +; SOFT-FLOAT-32-NEXT: call __mulsf3 +; SOFT-FLOAT-32-NEXT: mov %l5, %o1 +; SOFT-FLOAT-32-NEXT: mov %o0, %i4 +; SOFT-FLOAT-32-NEXT: mov %i3, %o0 +; SOFT-FLOAT-32-NEXT: call __mulsf3 +; SOFT-FLOAT-32-NEXT: mov %l4, %o1 +; SOFT-FLOAT-32-NEXT: call __addsf3 +; SOFT-FLOAT-32-NEXT: mov %l3, %o1 +; SOFT-FLOAT-32-NEXT: mov %o0, %i3 +; SOFT-FLOAT-32-NEXT: mov %i4, %o0 +; SOFT-FLOAT-32-NEXT: call __addsf3 +; SOFT-FLOAT-32-NEXT: mov %l2, %o1 +; SOFT-FLOAT-32-NEXT: mov %o0, %i2 +; SOFT-FLOAT-32-NEXT: mov %i1, %o0 +; SOFT-FLOAT-32-NEXT: call __addsf3 +; SOFT-FLOAT-32-NEXT: mov %l1, %o1 +; SOFT-FLOAT-32-NEXT: mov %o0, %i1 +; SOFT-FLOAT-32-NEXT: mov %l6, %o0 +; SOFT-FLOAT-32-NEXT: call __addsf3 +; SOFT-FLOAT-32-NEXT: mov %l0, %o1 +; SOFT-FLOAT-32-NEXT: ret +; SOFT-FLOAT-32-NEXT: restore %g0, %o0, %o0 +; +; SOFT-FLOAT-64-LABEL: fmuladd_contract_v4f32: +; SOFT-FLOAT-64: .cfi_startproc +; SOFT-FLOAT-64-NEXT: ! %bb.0: +; SOFT-FLOAT-64-NEXT: save %sp, -176, %sp +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_register %fp +; SOFT-FLOAT-64-NEXT: .cfi_window_save +; SOFT-FLOAT-64-NEXT: .cfi_register %o7, %i7 +; SOFT-FLOAT-64-NEXT: ld [%fp+2267], %l0 +; SOFT-FLOAT-64-NEXT: ld [%fp+2259], %l1 +; SOFT-FLOAT-64-NEXT: ld [%fp+2251], %l2 +; SOFT-FLOAT-64-NEXT: ld [%fp+2243], %l3 +; SOFT-FLOAT-64-NEXT: ld [%fp+2227], %l4 +; SOFT-FLOAT-64-NEXT: ld [%fp+2235], %o1 +; SOFT-FLOAT-64-NEXT: call __mulsf3 +; SOFT-FLOAT-64-NEXT: srl %i3, 0, %o0 +; SOFT-FLOAT-64-NEXT: mov %o0, %i3 +; SOFT-FLOAT-64-NEXT: srl %i2, 0, %o0 +; SOFT-FLOAT-64-NEXT: call __mulsf3 +; SOFT-FLOAT-64-NEXT: mov %l4, %o1 +; SOFT-FLOAT-64-NEXT: mov %o0, %i2 +; SOFT-FLOAT-64-NEXT: srl %i1, 0, %o0 +; SOFT-FLOAT-64-NEXT: call __mulsf3 +; SOFT-FLOAT-64-NEXT: srl %i5, 0, %o1 +; SOFT-FLOAT-64-NEXT: mov %o0, %i1 +; SOFT-FLOAT-64-NEXT: srl %i0, 0, %o0 +; SOFT-FLOAT-64-NEXT: call __mulsf3 +; SOFT-FLOAT-64-NEXT: srl %i4, 0, %o1 +; SOFT-FLOAT-64-NEXT: call __addsf3 +; SOFT-FLOAT-64-NEXT: mov %l3, %o1 +; SOFT-FLOAT-64-NEXT: mov %o0, %i0 +; SOFT-FLOAT-64-NEXT: mov %i1, %o0 +; SOFT-FLOAT-64-NEXT: call __addsf3 +; SOFT-FLOAT-64-NEXT: mov %l2, %o1 +; SOFT-FLOAT-64-NEXT: mov %o0, %i1 +; SOFT-FLOAT-64-NEXT: mov %i2, %o0 +; SOFT-FLOAT-64-NEXT: call __addsf3 +; SOFT-FLOAT-64-NEXT: mov %l1, %o1 +; SOFT-FLOAT-64-NEXT: mov %o0, %i2 +; SOFT-FLOAT-64-NEXT: mov %i3, %o0 +; SOFT-FLOAT-64-NEXT: call __addsf3 +; SOFT-FLOAT-64-NEXT: mov %l0, %o1 +; SOFT-FLOAT-64-NEXT: ret +; SOFT-FLOAT-64-NEXT: restore %g0, %o0, %o3 + %product = fmul contract <4 x float> %a, %b + %result = fadd contract <4 x float> %product, %c + ret <4 x float> %result +} + +define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) #0 { +; SOFT-FLOAT-32-LABEL: fmuladd_contract_v4f64: +; SOFT-FLOAT-32: .cfi_startproc +; SOFT-FLOAT-32-NEXT: ! %bb.0: +; SOFT-FLOAT-32-NEXT: save %sp, -128, %sp +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_register %fp +; SOFT-FLOAT-32-NEXT: .cfi_window_save +; SOFT-FLOAT-32-NEXT: .cfi_register %o7, %i7 +; SOFT-FLOAT-32-NEXT: ld [%fp+64], %l6 +; SOFT-FLOAT-32-NEXT: ld [%fp+156], %g2 +; SOFT-FLOAT-32-NEXT: st %g2, [%fp+-4] ! 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: ld [%fp+160], %g2 +; SOFT-FLOAT-32-NEXT: st %g2, [%fp+-8] ! 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: ld [%fp+148], %g2 +; SOFT-FLOAT-32-NEXT: st %g2, [%fp+-12] ! 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: ld [%fp+152], %g2 +; SOFT-FLOAT-32-NEXT: st %g2, [%fp+-16] ! 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: ld [%fp+140], %g2 +; SOFT-FLOAT-32-NEXT: st %g2, [%fp+-20] ! 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: ld [%fp+144], %g2 +; SOFT-FLOAT-32-NEXT: st %g2, [%fp+-24] ! 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: ld [%fp+132], %g2 +; SOFT-FLOAT-32-NEXT: st %g2, [%fp+-28] ! 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: ld [%fp+136], %l7 +; SOFT-FLOAT-32-NEXT: ld [%fp+100], %l0 +; SOFT-FLOAT-32-NEXT: ld [%fp+104], %l1 +; SOFT-FLOAT-32-NEXT: ld [%fp+108], %l2 +; SOFT-FLOAT-32-NEXT: ld [%fp+112], %l3 +; SOFT-FLOAT-32-NEXT: ld [%fp+116], %l4 +; SOFT-FLOAT-32-NEXT: ld [%fp+120], %l5 +; SOFT-FLOAT-32-NEXT: ld [%fp+92], %o0 +; SOFT-FLOAT-32-NEXT: ld [%fp+96], %o1 +; SOFT-FLOAT-32-NEXT: ld [%fp+124], %o2 +; SOFT-FLOAT-32-NEXT: call __muldf3 +; SOFT-FLOAT-32-NEXT: ld [%fp+128], %o3 +; SOFT-FLOAT-32-NEXT: st %o0, [%fp+-32] ! 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: st %o1, [%fp+-36] ! 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: mov %i4, %o0 +; SOFT-FLOAT-32-NEXT: mov %i5, %o1 +; SOFT-FLOAT-32-NEXT: mov %l4, %o2 +; SOFT-FLOAT-32-NEXT: call __muldf3 +; SOFT-FLOAT-32-NEXT: mov %l5, %o3 +; SOFT-FLOAT-32-NEXT: mov %o0, %l4 +; SOFT-FLOAT-32-NEXT: mov %o1, %l5 +; SOFT-FLOAT-32-NEXT: mov %i2, %o0 +; SOFT-FLOAT-32-NEXT: mov %i3, %o1 +; SOFT-FLOAT-32-NEXT: mov %l2, %o2 +; SOFT-FLOAT-32-NEXT: call __muldf3 +; SOFT-FLOAT-32-NEXT: mov %l3, %o3 +; SOFT-FLOAT-32-NEXT: mov %o0, %i4 +; SOFT-FLOAT-32-NEXT: mov %o1, %i5 +; SOFT-FLOAT-32-NEXT: mov %i0, %o0 +; SOFT-FLOAT-32-NEXT: mov %i1, %o1 +; SOFT-FLOAT-32-NEXT: mov %l0, %o2 +; SOFT-FLOAT-32-NEXT: call __muldf3 +; SOFT-FLOAT-32-NEXT: mov %l1, %o3 +; SOFT-FLOAT-32-NEXT: ld [%fp+-28], %o2 ! 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: call __adddf3 +; SOFT-FLOAT-32-NEXT: mov %l7, %o3 +; SOFT-FLOAT-32-NEXT: mov %o0, %i2 +; SOFT-FLOAT-32-NEXT: mov %o1, %i3 +; SOFT-FLOAT-32-NEXT: mov %i4, %o0 +; SOFT-FLOAT-32-NEXT: mov %i5, %o1 +; SOFT-FLOAT-32-NEXT: ld [%fp+-20], %o2 ! 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: call __adddf3 +; SOFT-FLOAT-32-NEXT: ld [%fp+-24], %o3 +; SOFT-FLOAT-32-NEXT: mov %o0, %i4 +; SOFT-FLOAT-32-NEXT: mov %o1, %i5 +; SOFT-FLOAT-32-NEXT: mov %l4, %o0 +; SOFT-FLOAT-32-NEXT: mov %l5, %o1 +; SOFT-FLOAT-32-NEXT: ld [%fp+-12], %o2 ! 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: call __adddf3 +; SOFT-FLOAT-32-NEXT: ld [%fp+-16], %o3 +; SOFT-FLOAT-32-NEXT: mov %o0, %i0 +; SOFT-FLOAT-32-NEXT: mov %o1, %i1 +; SOFT-FLOAT-32-NEXT: ld [%fp+-32], %o0 ! 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: ld [%fp+-36], %o1 ! 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: ld [%fp+-4], %o2 ! 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: call __adddf3 +; SOFT-FLOAT-32-NEXT: ld [%fp+-8], %o3 +; SOFT-FLOAT-32-NEXT: ! kill: def $o0 killed $o0 killed $o0_o1 def $o0_o1 +; SOFT-FLOAT-32-NEXT: ! kill: def $o1 killed $o1 killed $o0_o1 def $o0_o1 +; SOFT-FLOAT-32-NEXT: std %o0, [%l6+24] +; SOFT-FLOAT-32-NEXT: std %i0, [%l6+16] +; SOFT-FLOAT-32-NEXT: std %i4, [%l6+8] +; SOFT-FLOAT-32-NEXT: std %i2, [%l6] +; SOFT-FLOAT-32-NEXT: ret +; SOFT-FLOAT-32-NEXT: restore +; +; SOFT-FLOAT-64-LABEL: fmuladd_contract_v4f64: +; SOFT-FLOAT-64: .cfi_startproc +; SOFT-FLOAT-64-NEXT: ! %bb.0: +; SOFT-FLOAT-64-NEXT: save %sp, -176, %sp +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_register %fp +; SOFT-FLOAT-64-NEXT: .cfi_window_save +; SOFT-FLOAT-64-NEXT: .cfi_register %o7, %i7 +; SOFT-FLOAT-64-NEXT: ldx [%fp+2263], %l0 +; SOFT-FLOAT-64-NEXT: ldx [%fp+2255], %l1 +; SOFT-FLOAT-64-NEXT: ldx [%fp+2247], %l2 +; SOFT-FLOAT-64-NEXT: ldx [%fp+2239], %l3 +; SOFT-FLOAT-64-NEXT: ldx [%fp+2223], %l4 +; SOFT-FLOAT-64-NEXT: ldx [%fp+2231], %o1 +; SOFT-FLOAT-64-NEXT: call __muldf3 +; SOFT-FLOAT-64-NEXT: mov %i3, %o0 +; SOFT-FLOAT-64-NEXT: mov %o0, %i3 +; SOFT-FLOAT-64-NEXT: mov %i2, %o0 +; SOFT-FLOAT-64-NEXT: call __muldf3 +; SOFT-FLOAT-64-NEXT: mov %l4, %o1 +; SOFT-FLOAT-64-NEXT: mov %o0, %i2 +; SOFT-FLOAT-64-NEXT: mov %i1, %o0 +; SOFT-FLOAT-64-NEXT: call __muldf3 +; SOFT-FLOAT-64-NEXT: mov %i5, %o1 +; SOFT-FLOAT-64-NEXT: mov %o0, %i1 +; SOFT-FLOAT-64-NEXT: mov %i0, %o0 +; SOFT-FLOAT-64-NEXT: call __muldf3 +; SOFT-FLOAT-64-NEXT: mov %i4, %o1 +; SOFT-FLOAT-64-NEXT: call __adddf3 +; SOFT-FLOAT-64-NEXT: mov %l3, %o1 +; SOFT-FLOAT-64-NEXT: mov %o0, %i0 +; SOFT-FLOAT-64-NEXT: mov %i1, %o0 +; SOFT-FLOAT-64-NEXT: call __adddf3 +; SOFT-FLOAT-64-NEXT: mov %l2, %o1 +; SOFT-FLOAT-64-NEXT: mov %o0, %i1 +; SOFT-FLOAT-64-NEXT: mov %i2, %o0 +; SOFT-FLOAT-64-NEXT: call __adddf3 +; SOFT-FLOAT-64-NEXT: mov %l1, %o1 +; SOFT-FLOAT-64-NEXT: mov %o0, %i2 +; SOFT-FLOAT-64-NEXT: mov %i3, %o0 +; SOFT-FLOAT-64-NEXT: call __adddf3 +; SOFT-FLOAT-64-NEXT: mov %l0, %o1 +; SOFT-FLOAT-64-NEXT: ret +; SOFT-FLOAT-64-NEXT: restore %g0, %o0, %o3 + %product = fmul contract <4 x double> %a, %b + %result = fadd contract <4 x double> %product, %c + ret <4 x double> %result +} + +attributes #0 = { "use-soft-float"="true" } + +declare float @llvm.fmuladd.f32(float %a, float %b, float %c) +declare double @llvm.fmuladd.f64(double %a, double %b, double %c) diff --git a/llvm/test/CodeGen/SystemZ/fmuladd-soft-float.ll b/llvm/test/CodeGen/SystemZ/fmuladd-soft-float.ll new file mode 100644 index 00000000000000..b01c348b631b88 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/fmuladd-soft-float.ll @@ -0,0 +1,230 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=s390x < %s | FileCheck %s -check-prefix=SOFT-FLOAT + +define float @fmuladd_intrinsic_f32(float %a, float %b, float %c) #0 { +; SOFT-FLOAT-LABEL: fmuladd_intrinsic_f32: +; SOFT-FLOAT: # %bb.0: +; SOFT-FLOAT-NEXT: stmg %r13, %r15, 104(%r15) +; SOFT-FLOAT-NEXT: .cfi_offset %r13, -56 +; SOFT-FLOAT-NEXT: .cfi_offset %r14, -48 +; SOFT-FLOAT-NEXT: .cfi_offset %r15, -40 +; SOFT-FLOAT-NEXT: aghi %r15, -160 +; SOFT-FLOAT-NEXT: .cfi_def_cfa_offset 320 +; SOFT-FLOAT-NEXT: llgfr %r2, %r2 +; SOFT-FLOAT-NEXT: llgfr %r3, %r3 +; SOFT-FLOAT-NEXT: lr %r13, %r4 +; SOFT-FLOAT-NEXT: brasl %r14, __mulsf3@PLT +; SOFT-FLOAT-NEXT: llgfr %r3, %r13 +; SOFT-FLOAT-NEXT: brasl %r14, __addsf3@PLT +; SOFT-FLOAT-NEXT: # kill: def $r2l killed $r2l killed $r2d +; SOFT-FLOAT-NEXT: lmg %r13, %r15, 264(%r15) +; SOFT-FLOAT-NEXT: br %r14 + %result = call float @llvm.fmuladd.f32(float %a, float %b, float %c) + ret float %result +} + +define double @fmuladd_intrinsic_f64(double %a, double %b, double %c) #0 { +; SOFT-FLOAT-LABEL: fmuladd_intrinsic_f64: +; SOFT-FLOAT: # %bb.0: +; SOFT-FLOAT-NEXT: stmg %r13, %r15, 104(%r15) +; SOFT-FLOAT-NEXT: .cfi_offset %r13, -56 +; SOFT-FLOAT-NEXT: .cfi_offset %r14, -48 +; SOFT-FLOAT-NEXT: .cfi_offset %r15, -40 +; SOFT-FLOAT-NEXT: aghi %r15, -160 +; SOFT-FLOAT-NEXT: .cfi_def_cfa_offset 320 +; SOFT-FLOAT-NEXT: lgr %r13, %r4 +; SOFT-FLOAT-NEXT: brasl %r14, __muldf3@PLT +; SOFT-FLOAT-NEXT: lgr %r3, %r13 +; SOFT-FLOAT-NEXT: brasl %r14, __adddf3@PLT +; SOFT-FLOAT-NEXT: lmg %r13, %r15, 264(%r15) +; SOFT-FLOAT-NEXT: br %r14 + %result = call double @llvm.fmuladd.f64(double %a, double %b, double %c) + ret double %result +} + +define float @fmuladd_contract_f32(float %a, float %b, float %c) #0 { +; SOFT-FLOAT-LABEL: fmuladd_contract_f32: +; SOFT-FLOAT: # %bb.0: +; SOFT-FLOAT-NEXT: stmg %r13, %r15, 104(%r15) +; SOFT-FLOAT-NEXT: .cfi_offset %r13, -56 +; SOFT-FLOAT-NEXT: .cfi_offset %r14, -48 +; SOFT-FLOAT-NEXT: .cfi_offset %r15, -40 +; SOFT-FLOAT-NEXT: aghi %r15, -160 +; SOFT-FLOAT-NEXT: .cfi_def_cfa_offset 320 +; SOFT-FLOAT-NEXT: llgfr %r2, %r2 +; SOFT-FLOAT-NEXT: llgfr %r3, %r3 +; SOFT-FLOAT-NEXT: lr %r13, %r4 +; SOFT-FLOAT-NEXT: brasl %r14, __mulsf3@PLT +; SOFT-FLOAT-NEXT: llgfr %r3, %r13 +; SOFT-FLOAT-NEXT: brasl %r14, __addsf3@PLT +; SOFT-FLOAT-NEXT: # kill: def $r2l killed $r2l killed $r2d +; SOFT-FLOAT-NEXT: lmg %r13, %r15, 264(%r15) +; SOFT-FLOAT-NEXT: br %r14 + %product = fmul contract float %a, %b + %result = fadd contract float %product, %c + ret float %result +} + +define double @fmuladd_contract_f64(double %a, double %b, double %c) #0 { +; SOFT-FLOAT-LABEL: fmuladd_contract_f64: +; SOFT-FLOAT: # %bb.0: +; SOFT-FLOAT-NEXT: stmg %r13, %r15, 104(%r15) +; SOFT-FLOAT-NEXT: .cfi_offset %r13, -56 +; SOFT-FLOAT-NEXT: .cfi_offset %r14, -48 +; SOFT-FLOAT-NEXT: .cfi_offset %r15, -40 +; SOFT-FLOAT-NEXT: aghi %r15, -160 +; SOFT-FLOAT-NEXT: .cfi_def_cfa_offset 320 +; SOFT-FLOAT-NEXT: lgr %r13, %r4 +; SOFT-FLOAT-NEXT: brasl %r14, __muldf3@PLT +; SOFT-FLOAT-NEXT: lgr %r3, %r13 +; SOFT-FLOAT-NEXT: brasl %r14, __adddf3@PLT +; SOFT-FLOAT-NEXT: lmg %r13, %r15, 264(%r15) +; SOFT-FLOAT-NEXT: br %r14 + %product = fmul contract double %a, %b + %result = fadd contract double %product, %c + ret double %result +} + +define <4 x float> @fmuladd_contract_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 { +; SOFT-FLOAT-LABEL: fmuladd_contract_v4f32: +; SOFT-FLOAT: # %bb.0: +; SOFT-FLOAT-NEXT: stmg %r7, %r15, 56(%r15) +; SOFT-FLOAT-NEXT: .cfi_offset %r7, -104 +; SOFT-FLOAT-NEXT: .cfi_offset %r8, -96 +; SOFT-FLOAT-NEXT: .cfi_offset %r9, -88 +; SOFT-FLOAT-NEXT: .cfi_offset %r10, -80 +; SOFT-FLOAT-NEXT: .cfi_offset %r11, -72 +; SOFT-FLOAT-NEXT: .cfi_offset %r12, -64 +; SOFT-FLOAT-NEXT: .cfi_offset %r13, -56 +; SOFT-FLOAT-NEXT: .cfi_offset %r14, -48 +; SOFT-FLOAT-NEXT: .cfi_offset %r15, -40 +; SOFT-FLOAT-NEXT: aghi %r15, -176 +; SOFT-FLOAT-NEXT: .cfi_def_cfa_offset 336 +; SOFT-FLOAT-NEXT: llgf %r0, 388(%r15) +; SOFT-FLOAT-NEXT: stg %r0, 168(%r15) # 8-byte Folded Spill +; SOFT-FLOAT-NEXT: llgf %r0, 380(%r15) +; SOFT-FLOAT-NEXT: stg %r0, 160(%r15) # 8-byte Folded Spill +; SOFT-FLOAT-NEXT: llgf %r11, 372(%r15) +; SOFT-FLOAT-NEXT: llgf %r10, 364(%r15) +; SOFT-FLOAT-NEXT: llgf %r8, 340(%r15) +; SOFT-FLOAT-NEXT: llgf %r0, 356(%r15) +; SOFT-FLOAT-NEXT: llgf %r7, 348(%r15) +; SOFT-FLOAT-NEXT: llgfr %r1, %r5 +; SOFT-FLOAT-NEXT: lr %r9, %r4 +; SOFT-FLOAT-NEXT: lr %r13, %r3 +; SOFT-FLOAT-NEXT: lr %r12, %r2 +; SOFT-FLOAT-NEXT: lgr %r2, %r1 +; SOFT-FLOAT-NEXT: lgr %r3, %r0 +; SOFT-FLOAT-NEXT: brasl %r14, __mulsf3@PLT +; SOFT-FLOAT-NEXT: llgfr %r0, %r9 +; SOFT-FLOAT-NEXT: lgr %r9, %r2 +; SOFT-FLOAT-NEXT: lgr %r2, %r0 +; SOFT-FLOAT-NEXT: lgr %r3, %r7 +; SOFT-FLOAT-NEXT: brasl %r14, __mulsf3@PLT +; SOFT-FLOAT-NEXT: llgfr %r0, %r13 +; SOFT-FLOAT-NEXT: lgr %r13, %r2 +; SOFT-FLOAT-NEXT: lgr %r2, %r0 +; SOFT-FLOAT-NEXT: lgr %r3, %r8 +; SOFT-FLOAT-NEXT: brasl %r14, __mulsf3@PLT +; SOFT-FLOAT-NEXT: llgfr %r0, %r12 +; SOFT-FLOAT-NEXT: llgfr %r3, %r6 +; SOFT-FLOAT-NEXT: lgr %r12, %r2 +; SOFT-FLOAT-NEXT: lgr %r2, %r0 +; SOFT-FLOAT-NEXT: brasl %r14, __mulsf3@PLT +; SOFT-FLOAT-NEXT: lgr %r3, %r10 +; SOFT-FLOAT-NEXT: brasl %r14, __addsf3@PLT +; SOFT-FLOAT-NEXT: lgr %r10, %r2 +; SOFT-FLOAT-NEXT: lgr %r2, %r12 +; SOFT-FLOAT-NEXT: lgr %r3, %r11 +; SOFT-FLOAT-NEXT: brasl %r14, __addsf3@PLT +; SOFT-FLOAT-NEXT: lgr %r12, %r2 +; SOFT-FLOAT-NEXT: lgr %r2, %r13 +; SOFT-FLOAT-NEXT: lg %r3, 160(%r15) # 8-byte Folded Reload +; SOFT-FLOAT-NEXT: brasl %r14, __addsf3@PLT +; SOFT-FLOAT-NEXT: lgr %r13, %r2 +; SOFT-FLOAT-NEXT: lgr %r2, %r9 +; SOFT-FLOAT-NEXT: lg %r3, 168(%r15) # 8-byte Folded Reload +; SOFT-FLOAT-NEXT: brasl %r14, __addsf3@PLT +; SOFT-FLOAT-NEXT: lgr %r5, %r2 +; SOFT-FLOAT-NEXT: lr %r2, %r10 +; SOFT-FLOAT-NEXT: lr %r3, %r12 +; SOFT-FLOAT-NEXT: lr %r4, %r13 +; SOFT-FLOAT-NEXT: # kill: def $r5l killed $r5l killed $r5d +; SOFT-FLOAT-NEXT: lmg %r7, %r15, 232(%r15) +; SOFT-FLOAT-NEXT: br %r14 + %product = fmul contract <4 x float> %a, %b + %result = fadd contract <4 x float> %product, %c + ret <4 x float> %result +} + +define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) #0 { +; SOFT-FLOAT-LABEL: fmuladd_contract_v4f64: +; SOFT-FLOAT: # %bb.0: +; SOFT-FLOAT-NEXT: stmg %r6, %r15, 48(%r15) +; SOFT-FLOAT-NEXT: .cfi_offset %r6, -112 +; SOFT-FLOAT-NEXT: .cfi_offset %r7, -104 +; SOFT-FLOAT-NEXT: .cfi_offset %r8, -96 +; SOFT-FLOAT-NEXT: .cfi_offset %r9, -88 +; SOFT-FLOAT-NEXT: .cfi_offset %r10, -80 +; SOFT-FLOAT-NEXT: .cfi_offset %r11, -72 +; SOFT-FLOAT-NEXT: .cfi_offset %r12, -64 +; SOFT-FLOAT-NEXT: .cfi_offset %r13, -56 +; SOFT-FLOAT-NEXT: .cfi_offset %r14, -48 +; SOFT-FLOAT-NEXT: .cfi_offset %r15, -40 +; SOFT-FLOAT-NEXT: aghi %r15, -184 +; SOFT-FLOAT-NEXT: .cfi_def_cfa_offset 344 +; SOFT-FLOAT-NEXT: mvc 176(8,%r15), 24(%r4) # 8-byte Folded Spill +; SOFT-FLOAT-NEXT: mvc 168(8,%r15), 16(%r4) # 8-byte Folded Spill +; SOFT-FLOAT-NEXT: mvc 160(8,%r15), 8(%r4) # 8-byte Folded Spill +; SOFT-FLOAT-NEXT: lg %r10, 0(%r4) +; SOFT-FLOAT-NEXT: lg %r9, 0(%r2) +; SOFT-FLOAT-NEXT: lg %r8, 0(%r3) +; SOFT-FLOAT-NEXT: lg %r7, 8(%r2) +; SOFT-FLOAT-NEXT: lg %r6, 8(%r3) +; SOFT-FLOAT-NEXT: lg %r13, 16(%r2) +; SOFT-FLOAT-NEXT: lg %r2, 24(%r2) +; SOFT-FLOAT-NEXT: lg %r0, 24(%r3) +; SOFT-FLOAT-NEXT: lg %r12, 16(%r3) +; SOFT-FLOAT-NEXT: lgr %r3, %r0 +; SOFT-FLOAT-NEXT: brasl %r14, __muldf3@PLT +; SOFT-FLOAT-NEXT: lgr %r11, %r2 +; SOFT-FLOAT-NEXT: lgr %r2, %r13 +; SOFT-FLOAT-NEXT: lgr %r3, %r12 +; SOFT-FLOAT-NEXT: brasl %r14, __muldf3@PLT +; SOFT-FLOAT-NEXT: lgr %r13, %r2 +; SOFT-FLOAT-NEXT: lgr %r2, %r7 +; SOFT-FLOAT-NEXT: lgr %r3, %r6 +; SOFT-FLOAT-NEXT: brasl %r14, __muldf3@PLT +; SOFT-FLOAT-NEXT: lgr %r12, %r2 +; SOFT-FLOAT-NEXT: lgr %r2, %r9 +; SOFT-FLOAT-NEXT: lgr %r3, %r8 +; SOFT-FLOAT-NEXT: brasl %r14, __muldf3@PLT +; SOFT-FLOAT-NEXT: lgr %r3, %r10 +; SOFT-FLOAT-NEXT: brasl %r14, __adddf3@PLT +; SOFT-FLOAT-NEXT: lgr %r10, %r2 +; SOFT-FLOAT-NEXT: lgr %r2, %r12 +; SOFT-FLOAT-NEXT: lg %r3, 160(%r15) # 8-byte Folded Reload +; SOFT-FLOAT-NEXT: brasl %r14, __adddf3@PLT +; SOFT-FLOAT-NEXT: lgr %r12, %r2 +; SOFT-FLOAT-NEXT: lgr %r2, %r13 +; SOFT-FLOAT-NEXT: lg %r3, 168(%r15) # 8-byte Folded Reload +; SOFT-FLOAT-NEXT: brasl %r14, __adddf3@PLT +; SOFT-FLOAT-NEXT: lgr %r13, %r2 +; SOFT-FLOAT-NEXT: lgr %r2, %r11 +; SOFT-FLOAT-NEXT: lg %r3, 176(%r15) # 8-byte Folded Reload +; SOFT-FLOAT-NEXT: brasl %r14, __adddf3@PLT +; SOFT-FLOAT-NEXT: lgr %r5, %r2 +; SOFT-FLOAT-NEXT: lgr %r2, %r10 +; SOFT-FLOAT-NEXT: lgr %r3, %r12 +; SOFT-FLOAT-NEXT: lgr %r4, %r13 +; SOFT-FLOAT-NEXT: lmg %r6, %r15, 232(%r15) +; SOFT-FLOAT-NEXT: br %r14 + %product = fmul contract <4 x double> %a, %b + %result = fadd contract <4 x double> %product, %c + ret <4 x double> %result +} + +attributes #0 = { "use-soft-float"="true" } + +declare float @llvm.fmuladd.f32(float %a, float %b, float %c) +declare double @llvm.fmuladd.f64(double %a, double %b, double %c) diff --git a/llvm/test/CodeGen/X86/fmuladd-soft-float.ll b/llvm/test/CodeGen/X86/fmuladd-soft-float.ll new file mode 100644 index 00000000000000..ccb2f37590b0ad --- /dev/null +++ b/llvm/test/CodeGen/X86/fmuladd-soft-float.ll @@ -0,0 +1,1777 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=i386 < %s | FileCheck %s -check-prefix=SOFT-FLOAT-32 +; RUN: llc -mtriple=i386 -mattr +fma < %s | FileCheck %s -check-prefix=SOFT-FLOAT-32-FMA +; RUN: llc -mtriple=i386 -mattr +fma4 < %s | FileCheck %s -check-prefix=SOFT-FLOAT-32-FMA4 +; RUN: llc -mtriple=x86_64 < %s | FileCheck %s -check-prefix=SOFT-FLOAT-64 +; RUN: llc -mtriple=x86_64 -mattr +fma < %s | FileCheck %s -check-prefix=SOFT-FLOAT-64-FMA +; RUN: llc -mtriple=x86_64 -mattr +fma4 < %s | FileCheck %s -check-prefix=SOFT-FLOAT-64-FMA4 + +define float @fmuladd_intrinsic_f32(float %a, float %b, float %c) #0 { +; SOFT-FLOAT-32-LABEL: fmuladd_intrinsic_f32: +; SOFT-FLOAT-32: # %bb.0: +; SOFT-FLOAT-32-NEXT: pushl %esi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-NEXT: .cfi_offset %esi, -8 +; SOFT-FLOAT-32-NEXT: movl {{[0-9]+}}(%esp), %esi +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __mulsf3 +; SOFT-FLOAT-32-NEXT: addl $8, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-NEXT: pushl %esi +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %eax +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __addsf3 +; SOFT-FLOAT-32-NEXT: addl $8, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-NEXT: popl %esi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: retl +; +; SOFT-FLOAT-32-FMA-LABEL: fmuladd_intrinsic_f32: +; SOFT-FLOAT-32-FMA: # %bb.0: +; SOFT-FLOAT-32-FMA-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA-NEXT: .cfi_offset %esi, -8 +; SOFT-FLOAT-32-FMA-NEXT: movl {{[0-9]+}}(%esp), %esi +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __mulsf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %eax +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __addsf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA-NEXT: popl %esi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: retl +; +; SOFT-FLOAT-32-FMA4-LABEL: fmuladd_intrinsic_f32: +; SOFT-FLOAT-32-FMA4: # %bb.0: +; SOFT-FLOAT-32-FMA4-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_offset %esi, -8 +; SOFT-FLOAT-32-FMA4-NEXT: movl {{[0-9]+}}(%esp), %esi +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __mulsf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %eax +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __addsf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA4-NEXT: popl %esi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: retl +; +; SOFT-FLOAT-64-LABEL: fmuladd_intrinsic_f32: +; SOFT-FLOAT-64: # %bb.0: +; SOFT-FLOAT-64-NEXT: pushq %rbx +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-NEXT: .cfi_offset %rbx, -16 +; SOFT-FLOAT-64-NEXT: movl %edx, %ebx +; SOFT-FLOAT-64-NEXT: callq __mulsf3@PLT +; SOFT-FLOAT-64-NEXT: movl %eax, %edi +; SOFT-FLOAT-64-NEXT: movl %ebx, %esi +; SOFT-FLOAT-64-NEXT: callq __addsf3@PLT +; SOFT-FLOAT-64-NEXT: popq %rbx +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-64-NEXT: retq +; +; SOFT-FLOAT-64-FMA-LABEL: fmuladd_intrinsic_f32: +; SOFT-FLOAT-64-FMA: # %bb.0: +; SOFT-FLOAT-64-FMA-NEXT: pushq %rbx +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %rbx, -16 +; SOFT-FLOAT-64-FMA-NEXT: movl %edx, %ebx +; SOFT-FLOAT-64-FMA-NEXT: callq __mulsf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movl %eax, %edi +; SOFT-FLOAT-64-FMA-NEXT: movl %ebx, %esi +; SOFT-FLOAT-64-FMA-NEXT: callq __addsf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: popq %rbx +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-64-FMA-NEXT: retq +; +; SOFT-FLOAT-64-FMA4-LABEL: fmuladd_intrinsic_f32: +; SOFT-FLOAT-64-FMA4: # %bb.0: +; SOFT-FLOAT-64-FMA4-NEXT: pushq %rbx +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %rbx, -16 +; SOFT-FLOAT-64-FMA4-NEXT: movl %edx, %ebx +; SOFT-FLOAT-64-FMA4-NEXT: callq __mulsf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movl %eax, %edi +; SOFT-FLOAT-64-FMA4-NEXT: movl %ebx, %esi +; SOFT-FLOAT-64-FMA4-NEXT: callq __addsf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: popq %rbx +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-64-FMA4-NEXT: retq + %result = call float @llvm.fmuladd.f32(float %a, float %b, float %c) + ret float %result +} + +define double @fmuladd_intrinsic_f64(double %a, double %b, double %c) #0 { +; SOFT-FLOAT-32-LABEL: fmuladd_intrinsic_f64: +; SOFT-FLOAT-32: # %bb.0: +; SOFT-FLOAT-32-NEXT: pushl %edi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-NEXT: pushl %esi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 12 +; SOFT-FLOAT-32-NEXT: .cfi_offset %esi, -12 +; SOFT-FLOAT-32-NEXT: .cfi_offset %edi, -8 +; SOFT-FLOAT-32-NEXT: movl {{[0-9]+}}(%esp), %esi +; SOFT-FLOAT-32-NEXT: movl {{[0-9]+}}(%esp), %edi +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __muldf3 +; SOFT-FLOAT-32-NEXT: addl $16, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-NEXT: pushl %edi +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %esi +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %edx +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %eax +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __adddf3 +; SOFT-FLOAT-32-NEXT: addl $16, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-NEXT: popl %esi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-NEXT: popl %edi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: retl +; +; SOFT-FLOAT-32-FMA-LABEL: fmuladd_intrinsic_f64: +; SOFT-FLOAT-32-FMA: # %bb.0: +; SOFT-FLOAT-32-FMA-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 12 +; SOFT-FLOAT-32-FMA-NEXT: .cfi_offset %esi, -12 +; SOFT-FLOAT-32-FMA-NEXT: .cfi_offset %edi, -8 +; SOFT-FLOAT-32-FMA-NEXT: movl {{[0-9]+}}(%esp), %esi +; SOFT-FLOAT-32-FMA-NEXT: movl {{[0-9]+}}(%esp), %edi +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __muldf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %edx +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %eax +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __adddf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA-NEXT: popl %esi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA-NEXT: popl %edi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: retl +; +; SOFT-FLOAT-32-FMA4-LABEL: fmuladd_intrinsic_f64: +; SOFT-FLOAT-32-FMA4: # %bb.0: +; SOFT-FLOAT-32-FMA4-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 12 +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_offset %esi, -12 +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_offset %edi, -8 +; SOFT-FLOAT-32-FMA4-NEXT: movl {{[0-9]+}}(%esp), %esi +; SOFT-FLOAT-32-FMA4-NEXT: movl {{[0-9]+}}(%esp), %edi +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __muldf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %edx +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %eax +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __adddf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA4-NEXT: popl %esi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA4-NEXT: popl %edi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: retl +; +; SOFT-FLOAT-64-LABEL: fmuladd_intrinsic_f64: +; SOFT-FLOAT-64: # %bb.0: +; SOFT-FLOAT-64-NEXT: pushq %rbx +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-NEXT: .cfi_offset %rbx, -16 +; SOFT-FLOAT-64-NEXT: movq %rdx, %rbx +; SOFT-FLOAT-64-NEXT: callq __muldf3@PLT +; SOFT-FLOAT-64-NEXT: movq %rax, %rdi +; SOFT-FLOAT-64-NEXT: movq %rbx, %rsi +; SOFT-FLOAT-64-NEXT: callq __adddf3@PLT +; SOFT-FLOAT-64-NEXT: popq %rbx +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-64-NEXT: retq +; +; SOFT-FLOAT-64-FMA-LABEL: fmuladd_intrinsic_f64: +; SOFT-FLOAT-64-FMA: # %bb.0: +; SOFT-FLOAT-64-FMA-NEXT: pushq %rbx +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %rbx, -16 +; SOFT-FLOAT-64-FMA-NEXT: movq %rdx, %rbx +; SOFT-FLOAT-64-FMA-NEXT: callq __muldf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %rdi +; SOFT-FLOAT-64-FMA-NEXT: movq %rbx, %rsi +; SOFT-FLOAT-64-FMA-NEXT: callq __adddf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: popq %rbx +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-64-FMA-NEXT: retq +; +; SOFT-FLOAT-64-FMA4-LABEL: fmuladd_intrinsic_f64: +; SOFT-FLOAT-64-FMA4: # %bb.0: +; SOFT-FLOAT-64-FMA4-NEXT: pushq %rbx +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %rbx, -16 +; SOFT-FLOAT-64-FMA4-NEXT: movq %rdx, %rbx +; SOFT-FLOAT-64-FMA4-NEXT: callq __muldf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %rdi +; SOFT-FLOAT-64-FMA4-NEXT: movq %rbx, %rsi +; SOFT-FLOAT-64-FMA4-NEXT: callq __adddf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: popq %rbx +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-64-FMA4-NEXT: retq + %result = call double @llvm.fmuladd.f64(double %a, double %b, double %c) + ret double %result +} + +define float @fmuladd_contract_f32(float %a, float %b, float %c) #0 { +; SOFT-FLOAT-32-LABEL: fmuladd_contract_f32: +; SOFT-FLOAT-32: # %bb.0: +; SOFT-FLOAT-32-NEXT: pushl %esi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-NEXT: .cfi_offset %esi, -8 +; SOFT-FLOAT-32-NEXT: movl {{[0-9]+}}(%esp), %esi +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __mulsf3 +; SOFT-FLOAT-32-NEXT: addl $8, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-NEXT: pushl %esi +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %eax +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __addsf3 +; SOFT-FLOAT-32-NEXT: addl $8, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-NEXT: popl %esi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: retl +; +; SOFT-FLOAT-32-FMA-LABEL: fmuladd_contract_f32: +; SOFT-FLOAT-32-FMA: # %bb.0: +; SOFT-FLOAT-32-FMA-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA-NEXT: .cfi_offset %esi, -8 +; SOFT-FLOAT-32-FMA-NEXT: movl {{[0-9]+}}(%esp), %esi +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __mulsf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %eax +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __addsf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA-NEXT: popl %esi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: retl +; +; SOFT-FLOAT-32-FMA4-LABEL: fmuladd_contract_f32: +; SOFT-FLOAT-32-FMA4: # %bb.0: +; SOFT-FLOAT-32-FMA4-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_offset %esi, -8 +; SOFT-FLOAT-32-FMA4-NEXT: movl {{[0-9]+}}(%esp), %esi +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __mulsf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %eax +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __addsf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA4-NEXT: popl %esi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: retl +; +; SOFT-FLOAT-64-LABEL: fmuladd_contract_f32: +; SOFT-FLOAT-64: # %bb.0: +; SOFT-FLOAT-64-NEXT: pushq %rbx +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-NEXT: .cfi_offset %rbx, -16 +; SOFT-FLOAT-64-NEXT: movl %edx, %ebx +; SOFT-FLOAT-64-NEXT: callq __mulsf3@PLT +; SOFT-FLOAT-64-NEXT: movl %eax, %edi +; SOFT-FLOAT-64-NEXT: movl %ebx, %esi +; SOFT-FLOAT-64-NEXT: callq __addsf3@PLT +; SOFT-FLOAT-64-NEXT: popq %rbx +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-64-NEXT: retq +; +; SOFT-FLOAT-64-FMA-LABEL: fmuladd_contract_f32: +; SOFT-FLOAT-64-FMA: # %bb.0: +; SOFT-FLOAT-64-FMA-NEXT: pushq %rbx +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %rbx, -16 +; SOFT-FLOAT-64-FMA-NEXT: movl %edx, %ebx +; SOFT-FLOAT-64-FMA-NEXT: callq __mulsf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movl %eax, %edi +; SOFT-FLOAT-64-FMA-NEXT: movl %ebx, %esi +; SOFT-FLOAT-64-FMA-NEXT: callq __addsf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: popq %rbx +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-64-FMA-NEXT: retq +; +; SOFT-FLOAT-64-FMA4-LABEL: fmuladd_contract_f32: +; SOFT-FLOAT-64-FMA4: # %bb.0: +; SOFT-FLOAT-64-FMA4-NEXT: pushq %rbx +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %rbx, -16 +; SOFT-FLOAT-64-FMA4-NEXT: movl %edx, %ebx +; SOFT-FLOAT-64-FMA4-NEXT: callq __mulsf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movl %eax, %edi +; SOFT-FLOAT-64-FMA4-NEXT: movl %ebx, %esi +; SOFT-FLOAT-64-FMA4-NEXT: callq __addsf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: popq %rbx +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-64-FMA4-NEXT: retq + %product = fmul contract float %a, %b + %result = fadd contract float %product, %c + ret float %result +} + +define double @fmuladd_contract_f64(double %a, double %b, double %c) #0 { +; SOFT-FLOAT-32-LABEL: fmuladd_contract_f64: +; SOFT-FLOAT-32: # %bb.0: +; SOFT-FLOAT-32-NEXT: pushl %edi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-NEXT: pushl %esi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 12 +; SOFT-FLOAT-32-NEXT: .cfi_offset %esi, -12 +; SOFT-FLOAT-32-NEXT: .cfi_offset %edi, -8 +; SOFT-FLOAT-32-NEXT: movl {{[0-9]+}}(%esp), %esi +; SOFT-FLOAT-32-NEXT: movl {{[0-9]+}}(%esp), %edi +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __muldf3 +; SOFT-FLOAT-32-NEXT: addl $16, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-NEXT: pushl %edi +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %esi +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %edx +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %eax +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __adddf3 +; SOFT-FLOAT-32-NEXT: addl $16, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-NEXT: popl %esi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-NEXT: popl %edi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: retl +; +; SOFT-FLOAT-32-FMA-LABEL: fmuladd_contract_f64: +; SOFT-FLOAT-32-FMA: # %bb.0: +; SOFT-FLOAT-32-FMA-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 12 +; SOFT-FLOAT-32-FMA-NEXT: .cfi_offset %esi, -12 +; SOFT-FLOAT-32-FMA-NEXT: .cfi_offset %edi, -8 +; SOFT-FLOAT-32-FMA-NEXT: movl {{[0-9]+}}(%esp), %esi +; SOFT-FLOAT-32-FMA-NEXT: movl {{[0-9]+}}(%esp), %edi +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __muldf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %edx +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %eax +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __adddf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA-NEXT: popl %esi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA-NEXT: popl %edi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: retl +; +; SOFT-FLOAT-32-FMA4-LABEL: fmuladd_contract_f64: +; SOFT-FLOAT-32-FMA4: # %bb.0: +; SOFT-FLOAT-32-FMA4-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 12 +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_offset %esi, -12 +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_offset %edi, -8 +; SOFT-FLOAT-32-FMA4-NEXT: movl {{[0-9]+}}(%esp), %esi +; SOFT-FLOAT-32-FMA4-NEXT: movl {{[0-9]+}}(%esp), %edi +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __muldf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %edx +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %eax +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __adddf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA4-NEXT: popl %esi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA4-NEXT: popl %edi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: retl +; +; SOFT-FLOAT-64-LABEL: fmuladd_contract_f64: +; SOFT-FLOAT-64: # %bb.0: +; SOFT-FLOAT-64-NEXT: pushq %rbx +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-NEXT: .cfi_offset %rbx, -16 +; SOFT-FLOAT-64-NEXT: movq %rdx, %rbx +; SOFT-FLOAT-64-NEXT: callq __muldf3@PLT +; SOFT-FLOAT-64-NEXT: movq %rax, %rdi +; SOFT-FLOAT-64-NEXT: movq %rbx, %rsi +; SOFT-FLOAT-64-NEXT: callq __adddf3@PLT +; SOFT-FLOAT-64-NEXT: popq %rbx +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-64-NEXT: retq +; +; SOFT-FLOAT-64-FMA-LABEL: fmuladd_contract_f64: +; SOFT-FLOAT-64-FMA: # %bb.0: +; SOFT-FLOAT-64-FMA-NEXT: pushq %rbx +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %rbx, -16 +; SOFT-FLOAT-64-FMA-NEXT: movq %rdx, %rbx +; SOFT-FLOAT-64-FMA-NEXT: callq __muldf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %rdi +; SOFT-FLOAT-64-FMA-NEXT: movq %rbx, %rsi +; SOFT-FLOAT-64-FMA-NEXT: callq __adddf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: popq %rbx +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-64-FMA-NEXT: retq +; +; SOFT-FLOAT-64-FMA4-LABEL: fmuladd_contract_f64: +; SOFT-FLOAT-64-FMA4: # %bb.0: +; SOFT-FLOAT-64-FMA4-NEXT: pushq %rbx +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %rbx, -16 +; SOFT-FLOAT-64-FMA4-NEXT: movq %rdx, %rbx +; SOFT-FLOAT-64-FMA4-NEXT: callq __muldf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %rdi +; SOFT-FLOAT-64-FMA4-NEXT: movq %rbx, %rsi +; SOFT-FLOAT-64-FMA4-NEXT: callq __adddf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: popq %rbx +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-64-FMA4-NEXT: retq + %product = fmul contract double %a, %b + %result = fadd contract double %product, %c + ret double %result +} + +define <4 x float> @fmuladd_contract_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 { +; SOFT-FLOAT-32-LABEL: fmuladd_contract_v4f32: +; SOFT-FLOAT-32: # %bb.0: +; SOFT-FLOAT-32-NEXT: pushl %ebp +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-NEXT: pushl %ebx +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 12 +; SOFT-FLOAT-32-NEXT: pushl %edi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-32-NEXT: pushl %esi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 20 +; SOFT-FLOAT-32-NEXT: pushl %eax +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-32-NEXT: .cfi_offset %esi, -20 +; SOFT-FLOAT-32-NEXT: .cfi_offset %edi, -16 +; SOFT-FLOAT-32-NEXT: .cfi_offset %ebx, -12 +; SOFT-FLOAT-32-NEXT: .cfi_offset %ebp, -8 +; SOFT-FLOAT-32-NEXT: movl {{[0-9]+}}(%esp), %esi +; SOFT-FLOAT-32-NEXT: movl {{[0-9]+}}(%esp), %ebp +; SOFT-FLOAT-32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; SOFT-FLOAT-32-NEXT: movl {{[0-9]+}}(%esp), %edi +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __mulsf3 +; SOFT-FLOAT-32-NEXT: addl $8, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-NEXT: movl %eax, (%esp) # 4-byte Spill +; SOFT-FLOAT-32-NEXT: pushl %edi +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %ebx +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __mulsf3 +; SOFT-FLOAT-32-NEXT: addl $8, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-NEXT: movl %eax, %ebx +; SOFT-FLOAT-32-NEXT: pushl %ebp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __mulsf3 +; SOFT-FLOAT-32-NEXT: addl $8, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-NEXT: movl %eax, %edi +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __mulsf3 +; SOFT-FLOAT-32-NEXT: addl $8, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %eax +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __addsf3 +; SOFT-FLOAT-32-NEXT: addl $8, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-NEXT: movl %eax, %ebp +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %edi +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __addsf3 +; SOFT-FLOAT-32-NEXT: addl $8, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-NEXT: movl %eax, %edi +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %ebx +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __addsf3 +; SOFT-FLOAT-32-NEXT: addl $8, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-NEXT: movl %eax, %ebx +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __addsf3 +; SOFT-FLOAT-32-NEXT: addl $8, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-NEXT: movl %eax, 12(%esi) +; SOFT-FLOAT-32-NEXT: movl %ebx, 8(%esi) +; SOFT-FLOAT-32-NEXT: movl %edi, 4(%esi) +; SOFT-FLOAT-32-NEXT: movl %ebp, (%esi) +; SOFT-FLOAT-32-NEXT: movl %esi, %eax +; SOFT-FLOAT-32-NEXT: addl $4, %esp +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 20 +; SOFT-FLOAT-32-NEXT: popl %esi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-32-NEXT: popl %edi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 12 +; SOFT-FLOAT-32-NEXT: popl %ebx +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-NEXT: popl %ebp +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: retl $4 +; +; SOFT-FLOAT-32-FMA-LABEL: fmuladd_contract_v4f32: +; SOFT-FLOAT-32-FMA: # %bb.0: +; SOFT-FLOAT-32-FMA-NEXT: pushl %ebp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA-NEXT: pushl %ebx +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 12 +; SOFT-FLOAT-32-FMA-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-32-FMA-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 20 +; SOFT-FLOAT-32-FMA-NEXT: pushl %eax +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-32-FMA-NEXT: .cfi_offset %esi, -20 +; SOFT-FLOAT-32-FMA-NEXT: .cfi_offset %edi, -16 +; SOFT-FLOAT-32-FMA-NEXT: .cfi_offset %ebx, -12 +; SOFT-FLOAT-32-FMA-NEXT: .cfi_offset %ebp, -8 +; SOFT-FLOAT-32-FMA-NEXT: movl {{[0-9]+}}(%esp), %esi +; SOFT-FLOAT-32-FMA-NEXT: movl {{[0-9]+}}(%esp), %ebp +; SOFT-FLOAT-32-FMA-NEXT: movl {{[0-9]+}}(%esp), %ebx +; SOFT-FLOAT-32-FMA-NEXT: movl {{[0-9]+}}(%esp), %edi +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __mulsf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA-NEXT: movl %eax, (%esp) # 4-byte Spill +; SOFT-FLOAT-32-FMA-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %ebx +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __mulsf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA-NEXT: movl %eax, %ebx +; SOFT-FLOAT-32-FMA-NEXT: pushl %ebp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __mulsf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA-NEXT: movl %eax, %edi +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __mulsf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %eax +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __addsf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA-NEXT: movl %eax, %ebp +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __addsf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA-NEXT: movl %eax, %edi +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %ebx +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __addsf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA-NEXT: movl %eax, %ebx +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __addsf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA-NEXT: movl %eax, 12(%esi) +; SOFT-FLOAT-32-FMA-NEXT: movl %ebx, 8(%esi) +; SOFT-FLOAT-32-FMA-NEXT: movl %edi, 4(%esi) +; SOFT-FLOAT-32-FMA-NEXT: movl %ebp, (%esi) +; SOFT-FLOAT-32-FMA-NEXT: movl %esi, %eax +; SOFT-FLOAT-32-FMA-NEXT: addl $4, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 20 +; SOFT-FLOAT-32-FMA-NEXT: popl %esi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-32-FMA-NEXT: popl %edi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 12 +; SOFT-FLOAT-32-FMA-NEXT: popl %ebx +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA-NEXT: popl %ebp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: retl $4 +; +; SOFT-FLOAT-32-FMA4-LABEL: fmuladd_contract_v4f32: +; SOFT-FLOAT-32-FMA4: # %bb.0: +; SOFT-FLOAT-32-FMA4-NEXT: pushl %ebp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %ebx +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 12 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 20 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %eax +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_offset %esi, -20 +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_offset %edi, -16 +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_offset %ebx, -12 +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_offset %ebp, -8 +; SOFT-FLOAT-32-FMA4-NEXT: movl {{[0-9]+}}(%esp), %esi +; SOFT-FLOAT-32-FMA4-NEXT: movl {{[0-9]+}}(%esp), %ebp +; SOFT-FLOAT-32-FMA4-NEXT: movl {{[0-9]+}}(%esp), %ebx +; SOFT-FLOAT-32-FMA4-NEXT: movl {{[0-9]+}}(%esp), %edi +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __mulsf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA4-NEXT: movl %eax, (%esp) # 4-byte Spill +; SOFT-FLOAT-32-FMA4-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %ebx +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __mulsf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA4-NEXT: movl %eax, %ebx +; SOFT-FLOAT-32-FMA4-NEXT: pushl %ebp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __mulsf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA4-NEXT: movl %eax, %edi +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __mulsf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %eax +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __addsf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA4-NEXT: movl %eax, %ebp +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __addsf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA4-NEXT: movl %eax, %edi +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %ebx +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __addsf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA4-NEXT: movl %eax, %ebx +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __addsf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA4-NEXT: movl %eax, 12(%esi) +; SOFT-FLOAT-32-FMA4-NEXT: movl %ebx, 8(%esi) +; SOFT-FLOAT-32-FMA4-NEXT: movl %edi, 4(%esi) +; SOFT-FLOAT-32-FMA4-NEXT: movl %ebp, (%esi) +; SOFT-FLOAT-32-FMA4-NEXT: movl %esi, %eax +; SOFT-FLOAT-32-FMA4-NEXT: addl $4, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 20 +; SOFT-FLOAT-32-FMA4-NEXT: popl %esi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-32-FMA4-NEXT: popl %edi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 12 +; SOFT-FLOAT-32-FMA4-NEXT: popl %ebx +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA4-NEXT: popl %ebp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: retl $4 +; +; SOFT-FLOAT-64-LABEL: fmuladd_contract_v4f32: +; SOFT-FLOAT-64: # %bb.0: +; SOFT-FLOAT-64-NEXT: pushq %rbp +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-NEXT: pushq %r15 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-64-NEXT: pushq %r14 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 32 +; SOFT-FLOAT-64-NEXT: pushq %r13 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 40 +; SOFT-FLOAT-64-NEXT: pushq %r12 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 48 +; SOFT-FLOAT-64-NEXT: pushq %rbx +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 56 +; SOFT-FLOAT-64-NEXT: pushq %rax +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 64 +; SOFT-FLOAT-64-NEXT: .cfi_offset %rbx, -56 +; SOFT-FLOAT-64-NEXT: .cfi_offset %r12, -48 +; SOFT-FLOAT-64-NEXT: .cfi_offset %r13, -40 +; SOFT-FLOAT-64-NEXT: .cfi_offset %r14, -32 +; SOFT-FLOAT-64-NEXT: .cfi_offset %r15, -24 +; SOFT-FLOAT-64-NEXT: .cfi_offset %rbp, -16 +; SOFT-FLOAT-64-NEXT: movl %r9d, %r13d +; SOFT-FLOAT-64-NEXT: movl %ecx, %ebp +; SOFT-FLOAT-64-NEXT: movl %edx, %r14d +; SOFT-FLOAT-64-NEXT: movl %esi, %r12d +; SOFT-FLOAT-64-NEXT: movq %rdi, %rbx +; SOFT-FLOAT-64-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-NEXT: movl %r8d, %edi +; SOFT-FLOAT-64-NEXT: callq __mulsf3@PLT +; SOFT-FLOAT-64-NEXT: movl %eax, %r15d +; SOFT-FLOAT-64-NEXT: movl %ebp, %edi +; SOFT-FLOAT-64-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-NEXT: callq __mulsf3@PLT +; SOFT-FLOAT-64-NEXT: movl %eax, %ebp +; SOFT-FLOAT-64-NEXT: movl %r14d, %edi +; SOFT-FLOAT-64-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-NEXT: callq __mulsf3@PLT +; SOFT-FLOAT-64-NEXT: movl %eax, %r14d +; SOFT-FLOAT-64-NEXT: movl %r12d, %edi +; SOFT-FLOAT-64-NEXT: movl %r13d, %esi +; SOFT-FLOAT-64-NEXT: callq __mulsf3@PLT +; SOFT-FLOAT-64-NEXT: movl %eax, %edi +; SOFT-FLOAT-64-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-NEXT: callq __addsf3@PLT +; SOFT-FLOAT-64-NEXT: movl %eax, %r12d +; SOFT-FLOAT-64-NEXT: movl %r14d, %edi +; SOFT-FLOAT-64-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-NEXT: callq __addsf3@PLT +; SOFT-FLOAT-64-NEXT: movl %eax, %r14d +; SOFT-FLOAT-64-NEXT: movl %ebp, %edi +; SOFT-FLOAT-64-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-NEXT: callq __addsf3@PLT +; SOFT-FLOAT-64-NEXT: movl %eax, %ebp +; SOFT-FLOAT-64-NEXT: movl %r15d, %edi +; SOFT-FLOAT-64-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-NEXT: callq __addsf3@PLT +; SOFT-FLOAT-64-NEXT: movl %eax, 12(%rbx) +; SOFT-FLOAT-64-NEXT: movl %ebp, 8(%rbx) +; SOFT-FLOAT-64-NEXT: movl %r14d, 4(%rbx) +; SOFT-FLOAT-64-NEXT: movl %r12d, (%rbx) +; SOFT-FLOAT-64-NEXT: movq %rbx, %rax +; SOFT-FLOAT-64-NEXT: addq $8, %rsp +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 56 +; SOFT-FLOAT-64-NEXT: popq %rbx +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 48 +; SOFT-FLOAT-64-NEXT: popq %r12 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 40 +; SOFT-FLOAT-64-NEXT: popq %r13 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 32 +; SOFT-FLOAT-64-NEXT: popq %r14 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-64-NEXT: popq %r15 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-NEXT: popq %rbp +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-64-NEXT: retq +; +; SOFT-FLOAT-64-FMA-LABEL: fmuladd_contract_v4f32: +; SOFT-FLOAT-64-FMA: # %bb.0: +; SOFT-FLOAT-64-FMA-NEXT: pushq %rbp +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-FMA-NEXT: pushq %r15 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-64-FMA-NEXT: pushq %r14 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 32 +; SOFT-FLOAT-64-FMA-NEXT: pushq %r13 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 40 +; SOFT-FLOAT-64-FMA-NEXT: pushq %r12 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 48 +; SOFT-FLOAT-64-FMA-NEXT: pushq %rbx +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 56 +; SOFT-FLOAT-64-FMA-NEXT: pushq %rax +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 64 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %rbx, -56 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %r12, -48 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %r13, -40 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %r14, -32 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %r15, -24 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %rbp, -16 +; SOFT-FLOAT-64-FMA-NEXT: movl %r9d, %r13d +; SOFT-FLOAT-64-FMA-NEXT: movl %ecx, %ebp +; SOFT-FLOAT-64-FMA-NEXT: movl %edx, %r14d +; SOFT-FLOAT-64-FMA-NEXT: movl %esi, %r12d +; SOFT-FLOAT-64-FMA-NEXT: movq %rdi, %rbx +; SOFT-FLOAT-64-FMA-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-FMA-NEXT: movl %r8d, %edi +; SOFT-FLOAT-64-FMA-NEXT: callq __mulsf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movl %eax, %r15d +; SOFT-FLOAT-64-FMA-NEXT: movl %ebp, %edi +; SOFT-FLOAT-64-FMA-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-FMA-NEXT: callq __mulsf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movl %eax, %ebp +; SOFT-FLOAT-64-FMA-NEXT: movl %r14d, %edi +; SOFT-FLOAT-64-FMA-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-FMA-NEXT: callq __mulsf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movl %eax, %r14d +; SOFT-FLOAT-64-FMA-NEXT: movl %r12d, %edi +; SOFT-FLOAT-64-FMA-NEXT: movl %r13d, %esi +; SOFT-FLOAT-64-FMA-NEXT: callq __mulsf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movl %eax, %edi +; SOFT-FLOAT-64-FMA-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-FMA-NEXT: callq __addsf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movl %eax, %r12d +; SOFT-FLOAT-64-FMA-NEXT: movl %r14d, %edi +; SOFT-FLOAT-64-FMA-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-FMA-NEXT: callq __addsf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movl %eax, %r14d +; SOFT-FLOAT-64-FMA-NEXT: movl %ebp, %edi +; SOFT-FLOAT-64-FMA-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-FMA-NEXT: callq __addsf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movl %eax, %ebp +; SOFT-FLOAT-64-FMA-NEXT: movl %r15d, %edi +; SOFT-FLOAT-64-FMA-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-FMA-NEXT: callq __addsf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movl %eax, 12(%rbx) +; SOFT-FLOAT-64-FMA-NEXT: movl %ebp, 8(%rbx) +; SOFT-FLOAT-64-FMA-NEXT: movl %r14d, 4(%rbx) +; SOFT-FLOAT-64-FMA-NEXT: movl %r12d, (%rbx) +; SOFT-FLOAT-64-FMA-NEXT: movq %rbx, %rax +; SOFT-FLOAT-64-FMA-NEXT: addq $8, %rsp +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 56 +; SOFT-FLOAT-64-FMA-NEXT: popq %rbx +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 48 +; SOFT-FLOAT-64-FMA-NEXT: popq %r12 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 40 +; SOFT-FLOAT-64-FMA-NEXT: popq %r13 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 32 +; SOFT-FLOAT-64-FMA-NEXT: popq %r14 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-64-FMA-NEXT: popq %r15 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-FMA-NEXT: popq %rbp +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-64-FMA-NEXT: retq +; +; SOFT-FLOAT-64-FMA4-LABEL: fmuladd_contract_v4f32: +; SOFT-FLOAT-64-FMA4: # %bb.0: +; SOFT-FLOAT-64-FMA4-NEXT: pushq %rbp +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-FMA4-NEXT: pushq %r15 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-64-FMA4-NEXT: pushq %r14 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 32 +; SOFT-FLOAT-64-FMA4-NEXT: pushq %r13 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 40 +; SOFT-FLOAT-64-FMA4-NEXT: pushq %r12 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 48 +; SOFT-FLOAT-64-FMA4-NEXT: pushq %rbx +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 56 +; SOFT-FLOAT-64-FMA4-NEXT: pushq %rax +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 64 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %rbx, -56 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %r12, -48 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %r13, -40 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %r14, -32 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %r15, -24 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %rbp, -16 +; SOFT-FLOAT-64-FMA4-NEXT: movl %r9d, %r13d +; SOFT-FLOAT-64-FMA4-NEXT: movl %ecx, %ebp +; SOFT-FLOAT-64-FMA4-NEXT: movl %edx, %r14d +; SOFT-FLOAT-64-FMA4-NEXT: movl %esi, %r12d +; SOFT-FLOAT-64-FMA4-NEXT: movq %rdi, %rbx +; SOFT-FLOAT-64-FMA4-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-FMA4-NEXT: movl %r8d, %edi +; SOFT-FLOAT-64-FMA4-NEXT: callq __mulsf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movl %eax, %r15d +; SOFT-FLOAT-64-FMA4-NEXT: movl %ebp, %edi +; SOFT-FLOAT-64-FMA4-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-FMA4-NEXT: callq __mulsf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movl %eax, %ebp +; SOFT-FLOAT-64-FMA4-NEXT: movl %r14d, %edi +; SOFT-FLOAT-64-FMA4-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-FMA4-NEXT: callq __mulsf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movl %eax, %r14d +; SOFT-FLOAT-64-FMA4-NEXT: movl %r12d, %edi +; SOFT-FLOAT-64-FMA4-NEXT: movl %r13d, %esi +; SOFT-FLOAT-64-FMA4-NEXT: callq __mulsf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movl %eax, %edi +; SOFT-FLOAT-64-FMA4-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-FMA4-NEXT: callq __addsf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movl %eax, %r12d +; SOFT-FLOAT-64-FMA4-NEXT: movl %r14d, %edi +; SOFT-FLOAT-64-FMA4-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-FMA4-NEXT: callq __addsf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movl %eax, %r14d +; SOFT-FLOAT-64-FMA4-NEXT: movl %ebp, %edi +; SOFT-FLOAT-64-FMA4-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-FMA4-NEXT: callq __addsf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movl %eax, %ebp +; SOFT-FLOAT-64-FMA4-NEXT: movl %r15d, %edi +; SOFT-FLOAT-64-FMA4-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-FMA4-NEXT: callq __addsf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movl %eax, 12(%rbx) +; SOFT-FLOAT-64-FMA4-NEXT: movl %ebp, 8(%rbx) +; SOFT-FLOAT-64-FMA4-NEXT: movl %r14d, 4(%rbx) +; SOFT-FLOAT-64-FMA4-NEXT: movl %r12d, (%rbx) +; SOFT-FLOAT-64-FMA4-NEXT: movq %rbx, %rax +; SOFT-FLOAT-64-FMA4-NEXT: addq $8, %rsp +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 56 +; SOFT-FLOAT-64-FMA4-NEXT: popq %rbx +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 48 +; SOFT-FLOAT-64-FMA4-NEXT: popq %r12 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 40 +; SOFT-FLOAT-64-FMA4-NEXT: popq %r13 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 32 +; SOFT-FLOAT-64-FMA4-NEXT: popq %r14 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-64-FMA4-NEXT: popq %r15 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-FMA4-NEXT: popq %rbp +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-64-FMA4-NEXT: retq + %product = fmul contract <4 x float> %a, %b + %result = fadd contract <4 x float> %product, %c + ret <4 x float> %result +} + +define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) #0 { +; SOFT-FLOAT-32-LABEL: fmuladd_contract_v4f64: +; SOFT-FLOAT-32: # %bb.0: +; SOFT-FLOAT-32-NEXT: pushl %ebp +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-NEXT: pushl %ebx +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 12 +; SOFT-FLOAT-32-NEXT: pushl %edi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-32-NEXT: pushl %esi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 20 +; SOFT-FLOAT-32-NEXT: subl $16, %esp +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 36 +; SOFT-FLOAT-32-NEXT: .cfi_offset %esi, -20 +; SOFT-FLOAT-32-NEXT: .cfi_offset %edi, -16 +; SOFT-FLOAT-32-NEXT: .cfi_offset %ebx, -12 +; SOFT-FLOAT-32-NEXT: .cfi_offset %ebp, -8 +; SOFT-FLOAT-32-NEXT: movl {{[0-9]+}}(%esp), %edi +; SOFT-FLOAT-32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; SOFT-FLOAT-32-NEXT: movl {{[0-9]+}}(%esp), %esi +; SOFT-FLOAT-32-NEXT: movl {{[0-9]+}}(%esp), %ebp +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __muldf3 +; SOFT-FLOAT-32-NEXT: addl $16, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; SOFT-FLOAT-32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; SOFT-FLOAT-32-NEXT: pushl %ebp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %esi +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %ebx +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %edi +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __muldf3 +; SOFT-FLOAT-32-NEXT: addl $16, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-NEXT: movl %eax, %esi +; SOFT-FLOAT-32-NEXT: movl %edx, %ebp +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __muldf3 +; SOFT-FLOAT-32-NEXT: addl $16, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-NEXT: movl %eax, %edi +; SOFT-FLOAT-32-NEXT: movl %edx, %ebx +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __muldf3 +; SOFT-FLOAT-32-NEXT: addl $16, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %edx +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %eax +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __adddf3 +; SOFT-FLOAT-32-NEXT: addl $16, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; SOFT-FLOAT-32-NEXT: movl %edx, (%esp) # 4-byte Spill +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %ebx +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %edi +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __adddf3 +; SOFT-FLOAT-32-NEXT: addl $16, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-NEXT: movl %eax, %edi +; SOFT-FLOAT-32-NEXT: movl %edx, %ebx +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %ebp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %esi +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __adddf3 +; SOFT-FLOAT-32-NEXT: addl $16, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-NEXT: movl %eax, %ebp +; SOFT-FLOAT-32-NEXT: movl %edx, %esi +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __adddf3 +; SOFT-FLOAT-32-NEXT: addl $16, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; SOFT-FLOAT-32-NEXT: movl %edx, 28(%ecx) +; SOFT-FLOAT-32-NEXT: movl %eax, 24(%ecx) +; SOFT-FLOAT-32-NEXT: movl %esi, 20(%ecx) +; SOFT-FLOAT-32-NEXT: movl %ebp, 16(%ecx) +; SOFT-FLOAT-32-NEXT: movl %ebx, 12(%ecx) +; SOFT-FLOAT-32-NEXT: movl %edi, 8(%ecx) +; SOFT-FLOAT-32-NEXT: movl (%esp), %eax # 4-byte Reload +; SOFT-FLOAT-32-NEXT: movl %eax, 4(%ecx) +; SOFT-FLOAT-32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; SOFT-FLOAT-32-NEXT: movl %eax, (%ecx) +; SOFT-FLOAT-32-NEXT: movl %ecx, %eax +; SOFT-FLOAT-32-NEXT: addl $16, %esp +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 20 +; SOFT-FLOAT-32-NEXT: popl %esi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-32-NEXT: popl %edi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 12 +; SOFT-FLOAT-32-NEXT: popl %ebx +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-NEXT: popl %ebp +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: retl $4 +; +; SOFT-FLOAT-32-FMA-LABEL: fmuladd_contract_v4f64: +; SOFT-FLOAT-32-FMA: # %bb.0: +; SOFT-FLOAT-32-FMA-NEXT: pushl %ebp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA-NEXT: pushl %ebx +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 12 +; SOFT-FLOAT-32-FMA-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-32-FMA-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 20 +; SOFT-FLOAT-32-FMA-NEXT: subl $16, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 36 +; SOFT-FLOAT-32-FMA-NEXT: .cfi_offset %esi, -20 +; SOFT-FLOAT-32-FMA-NEXT: .cfi_offset %edi, -16 +; SOFT-FLOAT-32-FMA-NEXT: .cfi_offset %ebx, -12 +; SOFT-FLOAT-32-FMA-NEXT: .cfi_offset %ebp, -8 +; SOFT-FLOAT-32-FMA-NEXT: movl {{[0-9]+}}(%esp), %edi +; SOFT-FLOAT-32-FMA-NEXT: movl {{[0-9]+}}(%esp), %ebx +; SOFT-FLOAT-32-FMA-NEXT: movl {{[0-9]+}}(%esp), %esi +; SOFT-FLOAT-32-FMA-NEXT: movl {{[0-9]+}}(%esp), %ebp +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __muldf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; SOFT-FLOAT-32-FMA-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; SOFT-FLOAT-32-FMA-NEXT: pushl %ebp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %ebx +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __muldf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA-NEXT: movl %eax, %esi +; SOFT-FLOAT-32-FMA-NEXT: movl %edx, %ebp +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __muldf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA-NEXT: movl %eax, %edi +; SOFT-FLOAT-32-FMA-NEXT: movl %edx, %ebx +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __muldf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %edx +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %eax +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __adddf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; SOFT-FLOAT-32-FMA-NEXT: movl %edx, (%esp) # 4-byte Spill +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %ebx +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __adddf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA-NEXT: movl %eax, %edi +; SOFT-FLOAT-32-FMA-NEXT: movl %edx, %ebx +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %ebp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __adddf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA-NEXT: movl %eax, %ebp +; SOFT-FLOAT-32-FMA-NEXT: movl %edx, %esi +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __adddf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA-NEXT: movl {{[0-9]+}}(%esp), %ecx +; SOFT-FLOAT-32-FMA-NEXT: movl %edx, 28(%ecx) +; SOFT-FLOAT-32-FMA-NEXT: movl %eax, 24(%ecx) +; SOFT-FLOAT-32-FMA-NEXT: movl %esi, 20(%ecx) +; SOFT-FLOAT-32-FMA-NEXT: movl %ebp, 16(%ecx) +; SOFT-FLOAT-32-FMA-NEXT: movl %ebx, 12(%ecx) +; SOFT-FLOAT-32-FMA-NEXT: movl %edi, 8(%ecx) +; SOFT-FLOAT-32-FMA-NEXT: movl (%esp), %eax # 4-byte Reload +; SOFT-FLOAT-32-FMA-NEXT: movl %eax, 4(%ecx) +; SOFT-FLOAT-32-FMA-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; SOFT-FLOAT-32-FMA-NEXT: movl %eax, (%ecx) +; SOFT-FLOAT-32-FMA-NEXT: movl %ecx, %eax +; SOFT-FLOAT-32-FMA-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 20 +; SOFT-FLOAT-32-FMA-NEXT: popl %esi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-32-FMA-NEXT: popl %edi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 12 +; SOFT-FLOAT-32-FMA-NEXT: popl %ebx +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA-NEXT: popl %ebp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: retl $4 +; +; SOFT-FLOAT-32-FMA4-LABEL: fmuladd_contract_v4f64: +; SOFT-FLOAT-32-FMA4: # %bb.0: +; SOFT-FLOAT-32-FMA4-NEXT: pushl %ebp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %ebx +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 12 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 20 +; SOFT-FLOAT-32-FMA4-NEXT: subl $16, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 36 +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_offset %esi, -20 +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_offset %edi, -16 +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_offset %ebx, -12 +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_offset %ebp, -8 +; SOFT-FLOAT-32-FMA4-NEXT: movl {{[0-9]+}}(%esp), %edi +; SOFT-FLOAT-32-FMA4-NEXT: movl {{[0-9]+}}(%esp), %ebx +; SOFT-FLOAT-32-FMA4-NEXT: movl {{[0-9]+}}(%esp), %esi +; SOFT-FLOAT-32-FMA4-NEXT: movl {{[0-9]+}}(%esp), %ebp +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __muldf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA4-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; SOFT-FLOAT-32-FMA4-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; SOFT-FLOAT-32-FMA4-NEXT: pushl %ebp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %ebx +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __muldf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA4-NEXT: movl %eax, %esi +; SOFT-FLOAT-32-FMA4-NEXT: movl %edx, %ebp +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __muldf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA4-NEXT: movl %eax, %edi +; SOFT-FLOAT-32-FMA4-NEXT: movl %edx, %ebx +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __muldf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %edx +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %eax +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __adddf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA4-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; SOFT-FLOAT-32-FMA4-NEXT: movl %edx, (%esp) # 4-byte Spill +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %ebx +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __adddf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA4-NEXT: movl %eax, %edi +; SOFT-FLOAT-32-FMA4-NEXT: movl %edx, %ebx +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %ebp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __adddf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA4-NEXT: movl %eax, %ebp +; SOFT-FLOAT-32-FMA4-NEXT: movl %edx, %esi +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __adddf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA4-NEXT: movl {{[0-9]+}}(%esp), %ecx +; SOFT-FLOAT-32-FMA4-NEXT: movl %edx, 28(%ecx) +; SOFT-FLOAT-32-FMA4-NEXT: movl %eax, 24(%ecx) +; SOFT-FLOAT-32-FMA4-NEXT: movl %esi, 20(%ecx) +; SOFT-FLOAT-32-FMA4-NEXT: movl %ebp, 16(%ecx) +; SOFT-FLOAT-32-FMA4-NEXT: movl %ebx, 12(%ecx) +; SOFT-FLOAT-32-FMA4-NEXT: movl %edi, 8(%ecx) +; SOFT-FLOAT-32-FMA4-NEXT: movl (%esp), %eax # 4-byte Reload +; SOFT-FLOAT-32-FMA4-NEXT: movl %eax, 4(%ecx) +; SOFT-FLOAT-32-FMA4-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; SOFT-FLOAT-32-FMA4-NEXT: movl %eax, (%ecx) +; SOFT-FLOAT-32-FMA4-NEXT: movl %ecx, %eax +; SOFT-FLOAT-32-FMA4-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 20 +; SOFT-FLOAT-32-FMA4-NEXT: popl %esi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-32-FMA4-NEXT: popl %edi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 12 +; SOFT-FLOAT-32-FMA4-NEXT: popl %ebx +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA4-NEXT: popl %ebp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: retl $4 +; +; SOFT-FLOAT-64-LABEL: fmuladd_contract_v4f64: +; SOFT-FLOAT-64: # %bb.0: +; SOFT-FLOAT-64-NEXT: pushq %rbp +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-NEXT: pushq %r15 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-64-NEXT: pushq %r14 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 32 +; SOFT-FLOAT-64-NEXT: pushq %r13 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 40 +; SOFT-FLOAT-64-NEXT: pushq %r12 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 48 +; SOFT-FLOAT-64-NEXT: pushq %rbx +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 56 +; SOFT-FLOAT-64-NEXT: pushq %rax +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 64 +; SOFT-FLOAT-64-NEXT: .cfi_offset %rbx, -56 +; SOFT-FLOAT-64-NEXT: .cfi_offset %r12, -48 +; SOFT-FLOAT-64-NEXT: .cfi_offset %r13, -40 +; SOFT-FLOAT-64-NEXT: .cfi_offset %r14, -32 +; SOFT-FLOAT-64-NEXT: .cfi_offset %r15, -24 +; SOFT-FLOAT-64-NEXT: .cfi_offset %rbp, -16 +; SOFT-FLOAT-64-NEXT: movq %rcx, %r14 +; SOFT-FLOAT-64-NEXT: movq %rdx, %r15 +; SOFT-FLOAT-64-NEXT: movq %rsi, %r12 +; SOFT-FLOAT-64-NEXT: movq %rdi, %rbx +; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rbp +; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-NEXT: movq %r8, %rdi +; SOFT-FLOAT-64-NEXT: callq __muldf3@PLT +; SOFT-FLOAT-64-NEXT: movq %rax, %r13 +; SOFT-FLOAT-64-NEXT: movq %r14, %rdi +; SOFT-FLOAT-64-NEXT: movq %rbp, %rsi +; SOFT-FLOAT-64-NEXT: callq __muldf3@PLT +; SOFT-FLOAT-64-NEXT: movq %rax, %r14 +; SOFT-FLOAT-64-NEXT: movq %r15, %rdi +; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-NEXT: callq __muldf3@PLT +; SOFT-FLOAT-64-NEXT: movq %rax, %r15 +; SOFT-FLOAT-64-NEXT: movq %r12, %rdi +; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-NEXT: callq __muldf3@PLT +; SOFT-FLOAT-64-NEXT: movq %rax, %rdi +; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-NEXT: callq __adddf3@PLT +; SOFT-FLOAT-64-NEXT: movq %rax, %r12 +; SOFT-FLOAT-64-NEXT: movq %r15, %rdi +; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-NEXT: callq __adddf3@PLT +; SOFT-FLOAT-64-NEXT: movq %rax, %r15 +; SOFT-FLOAT-64-NEXT: movq %r14, %rdi +; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-NEXT: callq __adddf3@PLT +; SOFT-FLOAT-64-NEXT: movq %rax, %r14 +; SOFT-FLOAT-64-NEXT: movq %r13, %rdi +; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-NEXT: callq __adddf3@PLT +; SOFT-FLOAT-64-NEXT: movq %rax, 24(%rbx) +; SOFT-FLOAT-64-NEXT: movq %r14, 16(%rbx) +; SOFT-FLOAT-64-NEXT: movq %r15, 8(%rbx) +; SOFT-FLOAT-64-NEXT: movq %r12, (%rbx) +; SOFT-FLOAT-64-NEXT: movq %rbx, %rax +; SOFT-FLOAT-64-NEXT: addq $8, %rsp +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 56 +; SOFT-FLOAT-64-NEXT: popq %rbx +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 48 +; SOFT-FLOAT-64-NEXT: popq %r12 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 40 +; SOFT-FLOAT-64-NEXT: popq %r13 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 32 +; SOFT-FLOAT-64-NEXT: popq %r14 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-64-NEXT: popq %r15 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-NEXT: popq %rbp +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-64-NEXT: retq +; +; SOFT-FLOAT-64-FMA-LABEL: fmuladd_contract_v4f64: +; SOFT-FLOAT-64-FMA: # %bb.0: +; SOFT-FLOAT-64-FMA-NEXT: pushq %rbp +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-FMA-NEXT: pushq %r15 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-64-FMA-NEXT: pushq %r14 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 32 +; SOFT-FLOAT-64-FMA-NEXT: pushq %r13 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 40 +; SOFT-FLOAT-64-FMA-NEXT: pushq %r12 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 48 +; SOFT-FLOAT-64-FMA-NEXT: pushq %rbx +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 56 +; SOFT-FLOAT-64-FMA-NEXT: pushq %rax +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 64 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %rbx, -56 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %r12, -48 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %r13, -40 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %r14, -32 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %r15, -24 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %rbp, -16 +; SOFT-FLOAT-64-FMA-NEXT: movq %rcx, %r14 +; SOFT-FLOAT-64-FMA-NEXT: movq %rdx, %r15 +; SOFT-FLOAT-64-FMA-NEXT: movq %rsi, %r12 +; SOFT-FLOAT-64-FMA-NEXT: movq %rdi, %rbx +; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rbp +; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-FMA-NEXT: movq %r8, %rdi +; SOFT-FLOAT-64-FMA-NEXT: callq __muldf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r13 +; SOFT-FLOAT-64-FMA-NEXT: movq %r14, %rdi +; SOFT-FLOAT-64-FMA-NEXT: movq %rbp, %rsi +; SOFT-FLOAT-64-FMA-NEXT: callq __muldf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r14 +; SOFT-FLOAT-64-FMA-NEXT: movq %r15, %rdi +; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-FMA-NEXT: callq __muldf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r15 +; SOFT-FLOAT-64-FMA-NEXT: movq %r12, %rdi +; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-FMA-NEXT: callq __muldf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %rdi +; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-FMA-NEXT: callq __adddf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r12 +; SOFT-FLOAT-64-FMA-NEXT: movq %r15, %rdi +; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-FMA-NEXT: callq __adddf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r15 +; SOFT-FLOAT-64-FMA-NEXT: movq %r14, %rdi +; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-FMA-NEXT: callq __adddf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r14 +; SOFT-FLOAT-64-FMA-NEXT: movq %r13, %rdi +; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-FMA-NEXT: callq __adddf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movq %rax, 24(%rbx) +; SOFT-FLOAT-64-FMA-NEXT: movq %r14, 16(%rbx) +; SOFT-FLOAT-64-FMA-NEXT: movq %r15, 8(%rbx) +; SOFT-FLOAT-64-FMA-NEXT: movq %r12, (%rbx) +; SOFT-FLOAT-64-FMA-NEXT: movq %rbx, %rax +; SOFT-FLOAT-64-FMA-NEXT: addq $8, %rsp +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 56 +; SOFT-FLOAT-64-FMA-NEXT: popq %rbx +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 48 +; SOFT-FLOAT-64-FMA-NEXT: popq %r12 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 40 +; SOFT-FLOAT-64-FMA-NEXT: popq %r13 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 32 +; SOFT-FLOAT-64-FMA-NEXT: popq %r14 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-64-FMA-NEXT: popq %r15 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-FMA-NEXT: popq %rbp +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-64-FMA-NEXT: retq +; +; SOFT-FLOAT-64-FMA4-LABEL: fmuladd_contract_v4f64: +; SOFT-FLOAT-64-FMA4: # %bb.0: +; SOFT-FLOAT-64-FMA4-NEXT: pushq %rbp +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-FMA4-NEXT: pushq %r15 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-64-FMA4-NEXT: pushq %r14 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 32 +; SOFT-FLOAT-64-FMA4-NEXT: pushq %r13 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 40 +; SOFT-FLOAT-64-FMA4-NEXT: pushq %r12 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 48 +; SOFT-FLOAT-64-FMA4-NEXT: pushq %rbx +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 56 +; SOFT-FLOAT-64-FMA4-NEXT: pushq %rax +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 64 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %rbx, -56 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %r12, -48 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %r13, -40 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %r14, -32 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %r15, -24 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %rbp, -16 +; SOFT-FLOAT-64-FMA4-NEXT: movq %rcx, %r14 +; SOFT-FLOAT-64-FMA4-NEXT: movq %rdx, %r15 +; SOFT-FLOAT-64-FMA4-NEXT: movq %rsi, %r12 +; SOFT-FLOAT-64-FMA4-NEXT: movq %rdi, %rbx +; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rbp +; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-FMA4-NEXT: movq %r8, %rdi +; SOFT-FLOAT-64-FMA4-NEXT: callq __muldf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r13 +; SOFT-FLOAT-64-FMA4-NEXT: movq %r14, %rdi +; SOFT-FLOAT-64-FMA4-NEXT: movq %rbp, %rsi +; SOFT-FLOAT-64-FMA4-NEXT: callq __muldf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r14 +; SOFT-FLOAT-64-FMA4-NEXT: movq %r15, %rdi +; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-FMA4-NEXT: callq __muldf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r15 +; SOFT-FLOAT-64-FMA4-NEXT: movq %r12, %rdi +; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-FMA4-NEXT: callq __muldf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %rdi +; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-FMA4-NEXT: callq __adddf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r12 +; SOFT-FLOAT-64-FMA4-NEXT: movq %r15, %rdi +; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-FMA4-NEXT: callq __adddf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r15 +; SOFT-FLOAT-64-FMA4-NEXT: movq %r14, %rdi +; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-FMA4-NEXT: callq __adddf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r14 +; SOFT-FLOAT-64-FMA4-NEXT: movq %r13, %rdi +; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-FMA4-NEXT: callq __adddf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, 24(%rbx) +; SOFT-FLOAT-64-FMA4-NEXT: movq %r14, 16(%rbx) +; SOFT-FLOAT-64-FMA4-NEXT: movq %r15, 8(%rbx) +; SOFT-FLOAT-64-FMA4-NEXT: movq %r12, (%rbx) +; SOFT-FLOAT-64-FMA4-NEXT: movq %rbx, %rax +; SOFT-FLOAT-64-FMA4-NEXT: addq $8, %rsp +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 56 +; SOFT-FLOAT-64-FMA4-NEXT: popq %rbx +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 48 +; SOFT-FLOAT-64-FMA4-NEXT: popq %r12 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 40 +; SOFT-FLOAT-64-FMA4-NEXT: popq %r13 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 32 +; SOFT-FLOAT-64-FMA4-NEXT: popq %r14 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-64-FMA4-NEXT: popq %r15 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-FMA4-NEXT: popq %rbp +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-64-FMA4-NEXT: retq + %product = fmul contract <4 x double> %a, %b + %result = fadd contract <4 x double> %product, %c + ret <4 x double> %result +} + +attributes #0 = { "use-soft-float"="true" } + +declare float @llvm.fmuladd.f32(float %a, float %b, float %c) +declare double @llvm.fmuladd.f64(double %a, double %b, double %c) diff --git a/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll b/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll index 116ab7e3978cf7..31517939a4b75c 100644 --- a/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll +++ b/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll @@ -10,7 +10,7 @@ ; CHECK: .loc 1 5 3 // t.c:5:3 ; CHECK: { // callseq 0, 0 ; CHECK: .param .b64 param0; -; CHECK: st.param.b64 [param0+0], %rd1; +; CHECK: st.param.b64 [param0], %rd1; ; CHECK: call.uni ; CHECK: escape_foo, ; CHECK: ( diff --git a/llvm/test/Transforms/NaryReassociate/NVPTX/nary-slsr.ll b/llvm/test/Transforms/NaryReassociate/NVPTX/nary-slsr.ll index c9c1406a0fa8ad..face96f85975ac 100644 --- a/llvm/test/Transforms/NaryReassociate/NVPTX/nary-slsr.ll +++ b/llvm/test/Transforms/NaryReassociate/NVPTX/nary-slsr.ll @@ -22,7 +22,7 @@ define void @nary_reassociate_after_slsr(i32 %a, i32 %b, i32 %c) { %abc = add i32 %ab, %c call void @foo(i32 %abc) ; CHECK: call void @foo(i32 %abc) -; PTX: st.param.b32 [param0+0], [[abc:%r[0-9]+]]; +; PTX: st.param.b32 [param0], [[abc:%r[0-9]+]]; %b2 = shl i32 %b, 1 %ab2 = add i32 %a, %b2 @@ -31,7 +31,7 @@ define void @nary_reassociate_after_slsr(i32 %a, i32 %b, i32 %c) { ; PTX: add.s32 [[ab2c:%r[0-9]+]], [[abc]], [[b]] call void @foo(i32 %ab2c) ; CHECK-NEXT: call void @foo(i32 %ab2c) -; PTX: st.param.b32 [param0+0], [[ab2c]]; +; PTX: st.param.b32 [param0], [[ab2c]]; %b3 = mul i32 %b, 3 %ab3 = add i32 %a, %b3 @@ -40,7 +40,7 @@ define void @nary_reassociate_after_slsr(i32 %a, i32 %b, i32 %c) { ; PTX: add.s32 [[ab3c:%r[0-9]+]], [[ab2c]], [[b]] call void @foo(i32 %ab3c) ; CHECK-NEXT: call void @foo(i32 %ab3c) -; PTX: st.param.b32 [param0+0], [[ab3c]]; +; PTX: st.param.b32 [param0], [[ab3c]]; ret void } diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/nvptx-basic.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/nvptx-basic.ll.expected index 5c9af3bb44da2a..a64364019de15e 100644 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/nvptx-basic.ll.expected +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/nvptx-basic.ll.expected @@ -23,10 +23,10 @@ define dso_local void @caller_St8x4(ptr nocapture noundef readonly byval(%struct ; CHECK-NEXT: ld.param.u64 %rd3, [caller_St8x4_param_0+8]; ; CHECK-NEXT: st.u64 [%SP+8], %rd3; ; CHECK-NEXT: ld.param.u64 %rd4, [caller_St8x4_param_0]; -; CHECK-NEXT: st.u64 [%SP+0], %rd4; +; CHECK-NEXT: st.u64 [%SP], %rd4; ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[32]; -; CHECK-NEXT: st.param.v2.b64 [param0+0], {%rd4, %rd3}; +; CHECK-NEXT: st.param.v2.b64 [param0], {%rd4, %rd3}; ; CHECK-NEXT: st.param.v2.b64 [param0+16], {%rd2, %rd1}; ; CHECK-NEXT: .param .align 16 .b8 retval0[32]; ; CHECK-NEXT: call.uni (retval0), @@ -34,7 +34,7 @@ define dso_local void @caller_St8x4(ptr nocapture noundef readonly byval(%struct ; CHECK-NEXT: ( ; CHECK-NEXT: param0 ; CHECK-NEXT: ); -; CHECK-NEXT: ld.param.v2.b64 {%rd5, %rd6}, [retval0+0]; +; CHECK-NEXT: ld.param.v2.b64 {%rd5, %rd6}, [retval0]; ; CHECK-NEXT: ld.param.v2.b64 {%rd7, %rd8}, [retval0+16]; ; CHECK-NEXT: } // callseq 0 ; CHECK-NEXT: st.u64 [%r1], %rd5; @@ -66,7 +66,7 @@ define internal fastcc [4 x i64] @callee_St8x4(ptr nocapture noundef readonly by ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [callee_St8x4_param_0]; ; CHECK-NEXT: ld.param.v2.u64 {%rd3, %rd4}, [callee_St8x4_param_0+16]; -; CHECK-NEXT: st.param.v2.b64 [func_retval0+0], {%rd1, %rd2}; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2}; ; CHECK-NEXT: st.param.v2.b64 [func_retval0+16], {%rd3, %rd4}; ; CHECK-NEXT: ret; %1 = load i64, ptr %in, align 8 diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/hwasan/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/hwasan/BUILD.gn index f453dde0ea93eb..e39d8114d1f473 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/hwasan/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/hwasan/BUILD.gn @@ -64,10 +64,16 @@ source_set("sources") { "hwasan_type_test.cpp", ] if (current_cpu == "arm64") { - sources += [ "hwasan_setjmp_aarch64.S" ] + sources += [ + "hwasan_setjmp_aarch64.S", + "hwasan_tag_mismatch_aarch64.S", + ] } if (current_cpu == "riscv64") { - sources += [ "hwasan_setjmp_riscv64.S" ] + sources += [ + "hwasan_setjmp_riscv64.S", + "hwasan_tag_mismatch_riscv64.S", + ] } if (current_cpu == "x64") { sources += [ "hwasan_setjmp_x86_64.S" ] diff --git a/mlir/docs/Dialects/Linalg/_index.md b/mlir/docs/Dialects/Linalg/_index.md index fbd1a451dc094e..976f0fd3c7e911 100644 --- a/mlir/docs/Dialects/Linalg/_index.md +++ b/mlir/docs/Dialects/Linalg/_index.md @@ -667,7 +667,7 @@ directly. This facility is currently in flight and is intended to subsume the above when ready. See the C++ class to YAML mapping traits in -`mlir-mlinalg-ods-yaml-gen.cpp` as the source of truth for the schema. +`mlir-linalg-ods-yaml-gen.cpp` as the source of truth for the schema. Most of the above documentation roughly applies to this path and will be ported as migration continues. diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml index 8cb698096ef5b7..bf2f26de26e9ed 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml @@ -3114,6 +3114,143 @@ structured_op: !LinalgStructuredOpConfig - !ScalarExpression scalar_arg: KZp --- !LinalgOpConfig +metadata: !LinalgOpMetadata + name: conv_2d_nchw_fchw_q + cpp_class_name: Conv2DNchwFchwQOp + doc: |- + Performs 2-D convolution with zero point offsets. + + Layout: + * Input: NCHW. + * Kernel: FCHW. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. This includes the zero + point offsets common to quantized operations. + implements: + - LinalgConvolutionOpInterface +structured_op: !LinalgStructuredOpConfig + args: + - !LinalgOperandDefConfig + name: I + kind: input_tensor + type_var: T1 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0, + s1, s2 * s3 + s4 * s5, s6 * s7 + s8 * s9)> + - !LinalgOperandDefConfig + name: K + kind: input_tensor + type_var: T2 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s10, + s1, s4, s8)> + - !LinalgOperandDefConfig + name: IZp + kind: scalar + type_var: I32 + - !LinalgOperandDefConfig + name: KZp + kind: scalar + type_var: I32 + - !LinalgOperandDefConfig + name: O + kind: output_tensor + type_var: U + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0, + s10, s2, s6)> + - !LinalgOperandDefConfig + name: strides + kind: index_attr + index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> + (s3, s7)> + default_indices: + - 1 + - 1 + - !LinalgOperandDefConfig + name: dilations + kind: index_attr + index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> + (s5, s9)> + default_indices: + - 1 + - 1 + indexing_maps: !LinalgIndexingMapsConfig + static_indexing_maps: + - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8, + s9, s10] -> (d0, d4, d2 * s3 + d5 * s5, d3 * s7 + d6 * s9)> + - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8, + s9, s10] -> (d1, d4, d5, d6)> + - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8, + s9, s10] -> ()> + - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8, + s9, s10] -> ()> + - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8, + s9, s10] -> (d0, d1, d2, d3)> + iterator_types: + - parallel + - parallel + - parallel + - parallel + - reduction + - reduction + - reduction + assignments: + - !ScalarAssign + arg: O + value: !ScalarExpression + scalar_fn: + kind: binary + fn_name: add + operands: + - !ScalarExpression + scalar_arg: O + - !ScalarExpression + scalar_fn: + kind: binary + fn_name: mul + operands: + - !ScalarExpression + scalar_fn: + kind: binary + fn_name: sub + operands: + - !ScalarExpression + scalar_fn: + kind: type + fn_name: cast_signed + type_var: U + operands: + - !ScalarExpression + scalar_arg: I + - !ScalarExpression + scalar_fn: + kind: type + fn_name: cast_signed + type_var: U + operands: + - !ScalarExpression + scalar_arg: IZp + - !ScalarExpression + scalar_fn: + kind: binary + fn_name: sub + operands: + - !ScalarExpression + scalar_fn: + kind: type + fn_name: cast_signed + type_var: U + operands: + - !ScalarExpression + scalar_arg: K + - !ScalarExpression + scalar_fn: + kind: type + fn_name: cast_signed + type_var: U + operands: + - !ScalarExpression + scalar_arg: KZp +--- !LinalgOpConfig metadata: !LinalgOpMetadata name: conv_2d_nchw_fchw cpp_class_name: Conv2DNchwFchwOp diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBarrierOps.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBarrierOps.td index 1ebea94fced0a3..14593305490661 100644 --- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBarrierOps.td +++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBarrierOps.td @@ -54,7 +54,7 @@ def SPIRV_ControlBarrierOp : SPIRV_Op<"ControlBarrier", []> { #### Example: ```mlir - spirv.ControlBarrier "Workgroup", "Device", "Acquire|UniformMemory" + spirv.ControlBarrier , , ``` }]; diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVMiscOps.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVMiscOps.td index 71ecabfb444bd0..022cbbbb6720fb 100644 --- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVMiscOps.td +++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVMiscOps.td @@ -1,4 +1,4 @@ -//===-- SPIRVBarrierOps.td - MLIR SPIR-V Barrier Ops -------*- tablegen -*-===// +//===-- SPIRVMiscOps.td - MLIR SPIR-V Misc Ops -------------*- tablegen -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp b/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp index 74c169c9a7e76a..f28473a108e1b5 100644 --- a/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp +++ b/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp @@ -1024,6 +1024,71 @@ class ReturnValuePattern : public SPIRVToLLVMConversion { } }; +static LLVM::LLVMFuncOp lookupOrCreateSPIRVFn(Operation *symbolTable, + StringRef name, + ArrayRef paramTypes, + Type resultType) { + auto func = dyn_cast_or_null( + SymbolTable::lookupSymbolIn(symbolTable, name)); + if (func) + return func; + + OpBuilder b(symbolTable->getRegion(0)); + func = b.create( + symbolTable->getLoc(), name, + LLVM::LLVMFunctionType::get(resultType, paramTypes)); + func.setCConv(LLVM::cconv::CConv::SPIR_FUNC); + func.setConvergent(true); + func.setNoUnwind(true); + func.setWillReturn(true); + return func; +} + +static LLVM::CallOp createSPIRVBuiltinCall(Location loc, OpBuilder &builder, + LLVM::LLVMFuncOp func, + ValueRange args) { + auto call = builder.create(loc, func, args); + call.setCConv(func.getCConv()); + call.setConvergentAttr(func.getConvergentAttr()); + call.setNoUnwindAttr(func.getNoUnwindAttr()); + call.setWillReturnAttr(func.getWillReturnAttr()); + return call; +} + +class ControlBarrierPattern + : public SPIRVToLLVMConversion { +public: + using SPIRVToLLVMConversion::SPIRVToLLVMConversion; + + LogicalResult + matchAndRewrite(spirv::ControlBarrierOp controlBarrierOp, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + constexpr StringLiteral funcName = "_Z22__spirv_ControlBarrieriii"; + Operation *symbolTable = + controlBarrierOp->getParentWithTrait(); + + Type i32 = rewriter.getI32Type(); + + Type voidTy = rewriter.getType(); + LLVM::LLVMFuncOp func = + lookupOrCreateSPIRVFn(symbolTable, funcName, {i32, i32, i32}, voidTy); + + Location loc = controlBarrierOp->getLoc(); + Value execution = rewriter.create( + loc, i32, static_cast(adaptor.getExecutionScope())); + Value memory = rewriter.create( + loc, i32, static_cast(adaptor.getMemoryScope())); + Value semantics = rewriter.create( + loc, i32, static_cast(adaptor.getMemorySemantics())); + + auto call = createSPIRVBuiltinCall(loc, rewriter, func, + {execution, memory, semantics}); + + rewriter.replaceOp(controlBarrierOp, call); + return success(); + } +}; + /// Converts `spirv.mlir.loop` to LLVM dialect. All blocks within selection /// should be reachable for conversion to succeed. The structure of the loop in /// LLVM dialect will be the following: @@ -1648,7 +1713,10 @@ void mlir::populateSPIRVToLLVMConversionPatterns( ShiftPattern, // Return ops - ReturnPattern, ReturnValuePattern>(patterns.getContext(), typeConverter); + ReturnPattern, ReturnValuePattern, + + // Barrier ops + ControlBarrierPattern>(patterns.getContext(), typeConverter); patterns.add(clientAPI, patterns.getContext(), typeConverter); diff --git a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py index e4a6ec7487bb2f..b45fecd0ee1457 100644 --- a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py +++ b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py @@ -876,6 +876,35 @@ def conv_2d_nhwc_fhwc_q( ) * (TypeFn.cast_signed(U, K[D.f, D.kh, D.kw, D.c]) - TypeFn.cast_signed(U, KZp)) +@linalg_structured_op +def conv_2d_nchw_fchw_q( + I=TensorDef(T1, S.N, S.C, S.OH * S.SH + S.KH * S.DH, S.OW * S.SW + S.KW * S.DW), + K=TensorDef(T2, S.F, S.C, S.KH, S.KW), + IZp=ScalarDef(I32), + KZp=ScalarDef(I32), + O=TensorDef(U, S.N, S.F, S.OH, S.OW, output=True), + strides=IndexAttrDef(S.SH, S.SW, default=[1, 1]), + dilations=IndexAttrDef(S.DH, S.DW, default=[1, 1]), +): + """Performs 2-D convolution with zero point offsets. + + Layout: + * Input: NCHW. + * Kernel: FCHW. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. This includes the zero + point offsets common to quantized operations. + """ + implements(ConvolutionOpInterface) + domain(D.n, D.f, D.oh, D.ow, D.c, D.kh, D.kw) + O[D.n, D.f, D.oh, D.ow] += ( + TypeFn.cast_signed( + U, I[D.n, D.c, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW] + ) + - TypeFn.cast_signed(U, IZp) + ) * (TypeFn.cast_signed(U, K[D.f, D.c, D.kh, D.kw]) - TypeFn.cast_signed(U, KZp)) + @linalg_structured_op def conv_2d_nchw_fchw( I=TensorDef(T1, S.N, S.C, S.OH * S.SH + S.KH * S.DH, S.OW * S.SW + S.KW * S.DW), diff --git a/mlir/test/Conversion/SPIRVToLLVM/barrier-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/barrier-ops-to-llvm.mlir new file mode 100644 index 00000000000000..d53afeeea15d10 --- /dev/null +++ b/mlir/test/Conversion/SPIRVToLLVM/barrier-ops-to-llvm.mlir @@ -0,0 +1,23 @@ +// RUN: mlir-opt -convert-spirv-to-llvm %s | FileCheck %s + +//===----------------------------------------------------------------------===// +// spirv.ControlBarrierOp +//===----------------------------------------------------------------------===// + +// CHECK: llvm.func spir_funccc @_Z22__spirv_ControlBarrieriii(i32, i32, i32) attributes {convergent, no_unwind, will_return} + +// CHECK-LABEL: @control_barrier +spirv.func @control_barrier() "None" { + // CHECK: [[EXECUTION:%.*]] = llvm.mlir.constant(2 : i32) : i32 + // CHECK: [[MEMORY:%.*]] = llvm.mlir.constant(2 : i32) : i32 + // CHECK: [[SEMANTICS:%.*]] = llvm.mlir.constant(768 : i32) : i32 + // CHECK: llvm.call spir_funccc @_Z22__spirv_ControlBarrieriii([[EXECUTION]], [[MEMORY]], [[SEMANTICS]]) {convergent, no_unwind, will_return} : (i32, i32, i32) -> () + spirv.ControlBarrier , , + + // CHECK: [[EXECUTION:%.*]] = llvm.mlir.constant(2 : i32) : i32 + // CHECK: [[MEMORY:%.*]] = llvm.mlir.constant(2 : i32) : i32 + // CHECK: [[SEMANTICS:%.*]] = llvm.mlir.constant(256 : i32) : i32 + // CHECK: llvm.call spir_funccc @_Z22__spirv_ControlBarrieriii([[EXECUTION]], [[MEMORY]], [[SEMANTICS]]) {convergent, no_unwind, will_return} : (i32, i32, i32) -> () + spirv.ControlBarrier , , + spirv.Return +} diff --git a/mlir/test/Dialect/Linalg/roundtrip.mlir b/mlir/test/Dialect/Linalg/roundtrip.mlir index 146e9780b8ebbe..1b8969bd115595 100644 --- a/mlir/test/Dialect/Linalg/roundtrip.mlir +++ b/mlir/test/Dialect/Linalg/roundtrip.mlir @@ -664,3 +664,33 @@ func.func @winograd_output_dyn(%arg0: tensor<6x6x?x?x?x?xf32>, %arg1: tensor) outs(%arg1 : tensor) -> tensor + +// ----- + +func.func @conv2d_channel_first_q(%img: tensor<100x3x224x224xi32>, %filt: tensor<64x3x5x5xi32>, %a: i32, %b: i32) -> tensor<100x64x220x220xi32> { + %init = arith.constant dense<0> : tensor<100x64x220x220xi32> + %1 = linalg.conv_2d_nchw_fchw_q {dilations = dense<1> : tensor<2xi64>, + strides = dense<1> : tensor<2xi64>} + ins(%img, %filt, %a, %b : tensor<100x3x224x224xi32>, tensor<64x3x5x5xi32>, i32, i32) + outs(%init : tensor<100x64x220x220xi32>) -> tensor<100x64x220x220xi32> + return %1 : tensor<100x64x220x220xi32> +} + +// CHECK-LABEL: func @conv2d_channel_first_q( +// CHECK: %[[arg0:[a-zA-z0-9]*]]: tensor<100x3x224x224xi32>, %[[arg1:[a-zA-z0-9]*]]: tensor<64x3x5x5xi32>, %[[arg2:[a-zA-z0-9]*]]: i32, %[[arg3:[a-zA-z0-9]*]]: i32) +// CHECK: linalg.conv_2d_nchw_fchw_q {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%[[arg0]], %[[arg1]], %[[arg2]], %[[arg3]] : tensor<100x3x224x224xi32>, tensor<64x3x5x5xi32>, i32, i32) outs(%{{.*}} : tensor<100x64x220x220xi32>) -> tensor<100x64x220x220xi32> + +// ----- + +func.func @conv2d_channel_first_q_promote(%img: tensor<100x3x224x224xi8>, %filt: tensor<64x3x5x5xi8>, %a: i8, %b: i8) -> tensor<100x64x220x220xi32> { + %init = arith.constant dense<0> : tensor<100x64x220x220xi32> + %1 = linalg.conv_2d_nchw_fchw_q {dilations = dense<1> : tensor<2xi64>, + strides = dense<1> : tensor<2xi64>} + ins(%img, %filt, %a, %b : tensor<100x3x224x224xi8>, tensor<64x3x5x5xi8>, i8, i8) + outs(%init : tensor<100x64x220x220xi32>) -> tensor<100x64x220x220xi32> + return %1 : tensor<100x64x220x220xi32> +} + +// CHECK-LABEL: func @conv2d_channel_first_q_promote( +// CHECK: %[[arg0:[a-zA-z0-9]*]]: tensor<100x3x224x224xi8>, %[[arg1:[a-zA-z0-9]*]]: tensor<64x3x5x5xi8>, %[[arg2:[a-zA-z0-9]*]]: i8, %[[arg3:[a-zA-z0-9]*]]: i8) +// CHECK: linalg.conv_2d_nchw_fchw_q {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%[[arg0]], %[[arg1]], %[[arg2]], %[[arg3]] : tensor<100x3x224x224xi8>, tensor<64x3x5x5xi8>, i8, i8) outs(%{{.*}} : tensor<100x64x220x220xi32>) -> tensor<100x64x220x220xi32>