diff --git a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp index d079951221d082..c94cae04523980 100644 --- a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp @@ -108,10 +108,10 @@ struct StreamState { return StreamState{L, Opened, ES}; } static StreamState getClosed(const FnDescription *L) { - return StreamState{L, Closed}; + return StreamState{L, Closed, {}}; } static StreamState getOpenFailed(const FnDescription *L) { - return StreamState{L, OpenFailed}; + return StreamState{L, OpenFailed, {}}; } void Profile(llvm::FoldingSetNodeID &ID) const { diff --git a/lldb/test/API/functionalities/breakpoint/step_over_breakpoint/TestStepOverBreakpoint.py b/lldb/test/API/functionalities/breakpoint/step_over_breakpoint/TestStepOverBreakpoint.py index b20490f3cefdcc..931326b322911b 100644 --- a/lldb/test/API/functionalities/breakpoint/step_over_breakpoint/TestStepOverBreakpoint.py +++ b/lldb/test/API/functionalities/breakpoint/step_over_breakpoint/TestStepOverBreakpoint.py @@ -51,6 +51,7 @@ def setUp(self): self.thread = lldbutil.get_one_thread_stopped_at_breakpoint(self.process, self.breakpoint1) self.assertIsNotNone(self.thread, "Didn't stop at breakpoint 1.") + @skipIfReproducer def test_step_instruction(self): # Count instructions between breakpoint_1 and breakpoint_4 contextList = self.target.FindFunctions('main', lldb.eFunctionNameTypeAuto) diff --git a/lldb/test/API/functionalities/deleted-executable/TestDeletedExecutable.py b/lldb/test/API/functionalities/deleted-executable/TestDeletedExecutable.py index ed17d9b36b6b0f..78f3feae6ff637 100644 --- a/lldb/test/API/functionalities/deleted-executable/TestDeletedExecutable.py +++ b/lldb/test/API/functionalities/deleted-executable/TestDeletedExecutable.py @@ -20,6 +20,7 @@ class TestDeletedExecutable(TestBase): triple=no_match('aarch64-.*-android')) # determining the architecture of the process fails @expectedFailureNetBSD + @skipIfReproducer # File synchronization is not supported during replay. def test(self): self.build() exe = self.getBuildArtifact("a.out") diff --git a/lldb/test/API/functionalities/load_unload/TestLoadUnload.py b/lldb/test/API/functionalities/load_unload/TestLoadUnload.py index 7188fa32a154e9..e0013ccd93fa60 100644 --- a/lldb/test/API/functionalities/load_unload/TestLoadUnload.py +++ b/lldb/test/API/functionalities/load_unload/TestLoadUnload.py @@ -95,6 +95,7 @@ def setSvr4Support(self, enabled): @not_remote_testsuite_ready @skipIfWindows # Windows doesn't have dlopen and friends, dynamic libraries work differently @expectedFailureNetBSD + @skipIfReproducer # VFS is a snapshot. def test_modules_search_paths(self): """Test target modules list after loading a different copy of the library libd.dylib, and verifies that it works with 'target modules search-paths add'.""" if self.platformIsDarwin(): diff --git a/lldb/test/API/functionalities/postmortem/elf-core/TestLinuxCore.py b/lldb/test/API/functionalities/postmortem/elf-core/TestLinuxCore.py index 63bb02e5eb60f3..e0046f7108898e 100644 --- a/lldb/test/API/functionalities/postmortem/elf-core/TestLinuxCore.py +++ b/lldb/test/API/functionalities/postmortem/elf-core/TestLinuxCore.py @@ -38,29 +38,34 @@ class LinuxCoreTestCase(TestBase): @skipIf(triple='^mips') @skipIfLLVMTargetMissing("AArch64") + @skipIfReproducer # lldb::FileSP used in typemap cannot be instrumented. def test_aarch64(self): """Test that lldb can read the process information from an aarch64 linux core file.""" self.do_test("linux-aarch64", self._aarch64_pid, self._aarch64_regions, "a.out") @skipIf(triple='^mips') @skipIfLLVMTargetMissing("X86") + @skipIfReproducer # lldb::FileSP used in typemap cannot be instrumented. def test_i386(self): """Test that lldb can read the process information from an i386 linux core file.""" self.do_test("linux-i386", self._i386_pid, self._i386_regions, "a.out") @skipIfLLVMTargetMissing("Mips") + @skipIfReproducer # lldb::FileSP used in typemap cannot be instrumented. def test_mips_o32(self): """Test that lldb can read the process information from an MIPS O32 linux core file.""" self.do_test("linux-mipsel-gnuabio32", self._mips_o32_pid, self._mips_regions, "linux-mipsel-gn") @skipIfLLVMTargetMissing("Mips") + @skipIfReproducer # lldb::FileSP used in typemap cannot be instrumented. def test_mips_n32(self): """Test that lldb can read the process information from an MIPS N32 linux core file """ self.do_test("linux-mips64el-gnuabin32", self._mips64_n32_pid, self._mips_regions, "linux-mips64el-") @skipIfLLVMTargetMissing("Mips") + @skipIfReproducer # lldb::FileSP used in typemap cannot be instrumented. def test_mips_n64(self): """Test that lldb can read the process information from an MIPS N64 linux core file """ self.do_test("linux-mips64el-gnuabi64", self._mips64_n64_pid, @@ -68,6 +73,7 @@ def test_mips_n64(self): @skipIf(triple='^mips') @skipIfLLVMTargetMissing("PowerPC") + @skipIfReproducer # lldb::FileSP used in typemap cannot be instrumented. def test_ppc64le(self): """Test that lldb can read the process information from an ppc64le linux core file.""" self.do_test("linux-ppc64le", self._ppc64le_pid, self._ppc64le_regions, @@ -75,6 +81,7 @@ def test_ppc64le(self): @skipIf(triple='^mips') @skipIfLLVMTargetMissing("X86") + @skipIfReproducer # lldb::FileSP used in typemap cannot be instrumented. def test_x86_64(self): """Test that lldb can read the process information from an x86_64 linux core file.""" self.do_test("linux-x86_64", self._x86_64_pid, self._x86_64_regions, @@ -82,6 +89,7 @@ def test_x86_64(self): @skipIf(triple='^mips') @skipIfLLVMTargetMissing("SystemZ") + @skipIfReproducer # lldb::FileSP used in typemap cannot be instrumented. def test_s390x(self): """Test that lldb can read the process information from an s390x linux core file.""" self.do_test("linux-s390x", self._s390x_pid, self._s390x_regions, @@ -89,6 +97,7 @@ def test_s390x(self): @skipIf(triple='^mips') @skipIfLLVMTargetMissing("X86") + @skipIfReproducer # lldb::FileSP used in typemap cannot be instrumented. def test_same_pid_running(self): """Test that we read the information from the core correctly even if we have a running process with the same PID around""" @@ -117,6 +126,7 @@ def test_same_pid_running(self): @skipIf(triple='^mips') @skipIfLLVMTargetMissing("X86") + @skipIfReproducer # lldb::FileSP used in typemap cannot be instrumented. def test_two_cores_same_pid(self): """Test that we handle the situation if we have two core files with the same PID around""" @@ -197,6 +207,7 @@ def test_FPR_SSE(self): @skipIf(triple='^mips') @skipIfLLVMTargetMissing("X86") + @skipIfReproducer # lldb::FileSP used in typemap cannot be instrumented. def test_i386_sysroot(self): """Test that lldb can find the exe for an i386 linux core file using the sysroot.""" @@ -221,6 +232,7 @@ def test_i386_sysroot(self): @skipIf(triple='^mips') @skipIfLLVMTargetMissing("X86") @skipIfWindows + @skipIfReproducer # lldb::FileSP used in typemap cannot be instrumented. def test_x86_64_sysroot(self): """Test that sysroot has more priority then local filesystem.""" diff --git a/lldb/test/API/functionalities/postmortem/netbsd-core/TestNetBSDCore.py b/lldb/test/API/functionalities/postmortem/netbsd-core/TestNetBSDCore.py index f967a57e4ea719..6ecd2673534474 100644 --- a/lldb/test/API/functionalities/postmortem/netbsd-core/TestNetBSDCore.py +++ b/lldb/test/API/functionalities/postmortem/netbsd-core/TestNetBSDCore.py @@ -159,11 +159,13 @@ def check_stack(self, process, pid, filename): self.check_backtrace(thread, filename, backtrace) @skipIfLLVMTargetMissing("AArch64") + @skipIfReproducer # lldb::FileSP used in typemap cannot be instrumented. def test_aarch64(self): """Test single-threaded aarch64 core dump.""" self.do_test("1lwp_SIGSEGV.aarch64", pid=8339, region_count=32) @skipIfLLVMTargetMissing("X86") + @skipIfReproducer # lldb::FileSP used in typemap cannot be instrumented. def test_amd64(self): """Test single-threaded amd64 core dump.""" self.do_test("1lwp_SIGSEGV.amd64", pid=693, region_count=21) @@ -189,11 +191,13 @@ def check_stack(self, process, pid, filename): self.assertEqual(thread.GetStopReasonDataAtIndex(0), 0) @skipIfLLVMTargetMissing("AArch64") + @skipIfReproducer # lldb::FileSP used in typemap cannot be instrumented. def test_aarch64(self): """Test double-threaded aarch64 core dump where thread 2 is signalled.""" self.do_test("2lwp_t2_SIGSEGV.aarch64", pid=14142, region_count=31) @skipIfLLVMTargetMissing("X86") + @skipIfReproducer # lldb::FileSP used in typemap cannot be instrumented. def test_amd64(self): """Test double-threaded amd64 core dump where thread 2 is signalled.""" self.do_test("2lwp_t2_SIGSEGV.amd64", pid=622, region_count=24) @@ -219,11 +223,13 @@ def check_stack(self, process, pid, filename): self.assertEqual(thread.GetStopReasonDataAtIndex(0), signal.SIGSEGV) @skipIfLLVMTargetMissing("AArch64") + @skipIfReproducer # lldb::FileSP used in typemap cannot be instrumented. def test_aarch64(self): """Test double-threaded aarch64 core dump where process is signalled.""" self.do_test("2lwp_process_SIGSEGV.aarch64", pid=1403, region_count=30) @skipIfLLVMTargetMissing("X86") + @skipIfReproducer # lldb::FileSP used in typemap cannot be instrumented. def test_amd64(self): """Test double-threaded amd64 core dump where process is signalled.""" self.do_test("2lwp_process_SIGSEGV.amd64", pid=665, region_count=24) diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h index 115bca1d32192c..118fd236bee486 100644 --- a/llvm/include/llvm/Analysis/InlineAdvisor.h +++ b/llvm/include/llvm/Analysis/InlineAdvisor.h @@ -123,12 +123,12 @@ class InlineAdvisor { /// This must be called when the Inliner pass is entered, to allow the /// InlineAdvisor update internal state, as result of function passes run /// between Inliner pass runs (for the same module). - virtual void OnPassEntry() {} + virtual void onPassEntry() {} /// This must be called when the Inliner pass is exited, as function passes /// may be run subsequently. This allows an implementation of InlineAdvisor /// to prepare for a partial update. - virtual void OnPassExit() {} + virtual void onPassExit() {} protected: InlineAdvisor() = default; @@ -163,7 +163,7 @@ class DefaultInlineAdvisor : public InlineAdvisor { std::unique_ptr getAdvice(CallBase &CB, FunctionAnalysisManager &FAM) override; - void OnPassExit() override { freeDeletedFunctions(); } + void onPassExit() override { freeDeletedFunctions(); } InlineParams Params; }; diff --git a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp index 64a8ff31624ce8..a0ed5eea065152 100644 --- a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp +++ b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp @@ -673,9 +673,9 @@ struct VectorInfo { ElementInfo *EI; /// Vector Type - VectorType *const VTy; + FixedVectorType *const VTy; - VectorInfo(VectorType *VTy) + VectorInfo(FixedVectorType *VTy) : BB(nullptr), PV(nullptr), LIs(), Is(), SVI(nullptr), VTy(VTy) { EI = new ElementInfo[VTy->getNumElements()]; } @@ -735,7 +735,7 @@ struct VectorInfo { if (!Op) return false; - VectorType *VTy = dyn_cast(Op->getType()); + FixedVectorType *VTy = dyn_cast(Op->getType()); if (!VTy) return false; @@ -785,8 +785,8 @@ struct VectorInfo { /// \returns false if no sensible information can be gathered. static bool computeFromSVI(ShuffleVectorInst *SVI, VectorInfo &Result, const DataLayout &DL) { - VectorType *ArgTy = dyn_cast(SVI->getOperand(0)->getType()); - assert(ArgTy && "ShuffleVector Operand is not a VectorType"); + FixedVectorType *ArgTy = + cast(SVI->getOperand(0)->getType()); // Compute the left hand vector information. VectorInfo LHS(ArgTy); @@ -1201,7 +1201,7 @@ bool InterleavedLoadCombineImpl::combine(std::list &InterleavedLoad, Type *ETy = InterleavedLoad.front().SVI->getType()->getElementType(); unsigned ElementsPerSVI = InterleavedLoad.front().SVI->getType()->getNumElements(); - VectorType *ILTy = VectorType::get(ETy, Factor * ElementsPerSVI); + FixedVectorType *ILTy = FixedVectorType::get(ETy, Factor * ElementsPerSVI); SmallVector Indices; for (unsigned i = 0; i < Factor; i++) @@ -1265,8 +1265,11 @@ bool InterleavedLoadCombineImpl::run() { for (BasicBlock &BB : F) { for (Instruction &I : BB) { if (auto SVI = dyn_cast(&I)) { + // We don't support scalable vectors in this pass. + if (isa(SVI->getType())) + continue; - Candidates.emplace_back(SVI->getType()); + Candidates.emplace_back(cast(SVI->getType())); if (!VectorInfo::computeFromSVI(SVI, Candidates.back(), DL)) { Candidates.pop_back(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index f0e81c56bc1aee..7229f1793a9dbe 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -1135,15 +1135,20 @@ bool AMDGPURegisterBankInfo::applyMappingWideLoad(MachineInstr &MI, MachineIRBuilder B(MI); - unsigned SplitElts = - MaxNonSmrdLoadSize / LoadTy.getScalarType().getSizeInBits(); - const LLT LoadSplitTy = LLT::vector(SplitElts, LoadTy.getScalarType()); + unsigned NumSplitParts = LoadTy.getSizeInBits() / MaxNonSmrdLoadSize; + const LLT LoadSplitTy = LoadTy.divide(NumSplitParts); ApplyRegBankMapping O(*this, MRI, &AMDGPU::VGPRRegBank); GISelObserverWrapper Observer(&O); B.setChangeObserver(Observer); LegalizerHelper Helper(B.getMF(), Observer, B); - if (Helper.fewerElementsVector(MI, 0, LoadSplitTy) != LegalizerHelper::Legalized) - return false; + + if (LoadTy.isVector()) { + if (Helper.fewerElementsVector(MI, 0, LoadSplitTy) != LegalizerHelper::Legalized) + return false; + } else { + if (Helper.narrowScalar(MI, 0, LoadSplitTy) != LegalizerHelper::Legalized) + return false; + } MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank); return true; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 5c0a67ef61a6f7..e99dec44c65b89 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -4712,13 +4712,6 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND, Target = BR->getOperand(1); } - // FIXME: This changes the types of the intrinsics instead of introducing new - // nodes with the correct types. - // e.g. llvm.amdgcn.loop - - // eg: i1,ch = llvm.amdgcn.loop t0, TargetConstant:i32<6271>, t3 - // => t9: ch = llvm.amdgcn.loop t0, TargetConstant:i32<6271>, t3, BasicBlock:ch - unsigned CFNode = isCFIntrinsic(Intr); if (CFNode == 0) { // This is a uniform branch so we don't need to legalize. diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 0ce6f317722438..7a8b5249255f9c 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -4474,7 +4474,8 @@ static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, else IndexNodes[I] = DAG.getUNDEF(MVT::i32); SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes); - return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], Ops[1], Op2); + return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], + (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2); } namespace { diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyDebugFixup.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyDebugFixup.cpp index 8f1f77e23b8e31..655e30a29eff45 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyDebugFixup.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyDebugFixup.cpp @@ -86,9 +86,9 @@ bool WebAssemblyDebugFixup::runOnMachineFunction(MachineFunction &MF) { // Search for register rather than assume it is on top (which it // typically is if it appears right after the def), since // DBG_VALUE's may shift under some circumstances. - size_t Depth = 0; - for (auto &Elem : Stack) { + for (auto &Elem : reverse(Stack)) { if (MO.getReg() == Elem.Reg) { + auto Depth = static_cast(&Elem - &Stack[0]); LLVM_DEBUG(dbgs() << "Debug Value VReg " << MO.getReg() << " -> Stack Relative " << Depth << "\n"); MO.ChangeToTargetIndex(WebAssembly::TI_OPERAND_STACK, Depth); @@ -98,7 +98,6 @@ bool WebAssemblyDebugFixup::runOnMachineFunction(MachineFunction &MF) { Elem.DebugValue = &MI; break; } - Depth++; } // If the Reg was not found, we have a DBG_VALUE outside of its // def-use range, and we leave it unmodified as reg, which means diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp index 770ca2ea913094..862385d044815b 100644 --- a/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/llvm/lib/Transforms/IPO/Inliner.cpp @@ -696,9 +696,9 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, ProfileSummaryInfo *PSI = MAMProxy.getCachedResult(M); InlineAdvisor &Advisor = getAdvisor(MAMProxy, M); - Advisor.OnPassEntry(); + Advisor.onPassEntry(); - auto AdvisorOnExit = make_scope_exit([&] { Advisor.OnPassExit(); }); + auto AdvisorOnExit = make_scope_exit([&] { Advisor.onPassExit(); }); if (!ImportedFunctionsStats && InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) { @@ -808,7 +808,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, return FAM.getResult(F); }; - // Now process as many calls as we have within this caller in the sequnece. + // Now process as many calls as we have within this caller in the sequence. // We bail out as soon as the caller has to change so we can update the // call graph and prepare the context of that new caller. bool DidInline = false; diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index ae4ef97b2fd0f6..545413c1fe035c 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -1698,13 +1698,14 @@ DIExpression *llvm::salvageDebugInfoImpl(Instruction &I, }; if (auto *CI = dyn_cast(&I)) { - // No-op casts and zexts are irrelevant for debug info. - if (CI->isNoopCast(DL) || isa(&I)) + // No-op casts are irrelevant for debug info. + if (CI->isNoopCast(DL)) return SrcDIExpr; Type *Type = CI->getType(); - // Casts other than Trunc or SExt to scalar types cannot be salvaged. - if (Type->isVectorTy() || (!isa(&I) && !isa(&I))) + // Casts other than Trunc, SExt, or ZExt to scalar types cannot be salvaged. + if (Type->isVectorTy() || + !(isa(&I) || isa(&I) || isa(&I))) return nullptr; Value *FromValue = CI->getOperand(0); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir index 998094d622dac0..53302f9554e392 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s @@ -8,6 +9,7 @@ %tmp2 = load <8 x i32>, <8 x i32> addrspace(1)* %global.not.uniform.v8i32 ret void } + define amdgpu_kernel void @load_global_v4i64_non_uniform(<4 x i64> addrspace(1)* %in) { %tmp0 = call i32 @llvm.amdgcn.workitem.id.x() #0 %global.not.uniform.v4i64 = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tmp0 @@ -36,6 +38,21 @@ %tmp2 = load <8 x i32>, <8 x i32> addrspace(4)* %constant.not.uniform.v8i32 ret void } + + define amdgpu_kernel void @load_constant_i256_non_uniform(i256 addrspace(4)* %in) { + %tmp0 = call i32 @llvm.amdgcn.workitem.id.x() #0 + %constant.not.uniform = getelementptr i256, i256 addrspace(4)* %in, i32 %tmp0 + %tmp2 = load i256, i256 addrspace(4)* %constant.not.uniform + ret void + } + + define amdgpu_kernel void @load_constant_v16i16_non_uniform(<16 x i16> addrspace(4)* %in) { + %tmp0 = call i32 @llvm.amdgcn.workitem.id.x() #0 + %constant.not.uniform = getelementptr <16 x i16>, <16 x i16> addrspace(4)* %in, i32 %tmp0 + %tmp2 = load <16 x i16>, <16 x i16> addrspace(4)* %constant.not.uniform + ret void + } + define amdgpu_kernel void @load_constant_v4i64_non_uniform(<4 x i64> addrspace(4)* %in) { %tmp0 = call i32 @llvm.amdgcn.workitem.id.x() #0 %constant.not.uniform.v4i64 = getelementptr <4 x i64>, <4 x i64> addrspace(4)* %in, i32 %tmp0 @@ -56,6 +73,7 @@ } define amdgpu_kernel void @load_constant_v8i32_uniform() {ret void} + define amdgpu_kernel void @load_constant_v16i16_uniform() {ret void} define amdgpu_kernel void @load_constant_v4i64_uniform() {ret void} define amdgpu_kernel void @load_constant_v16i32_uniform() {ret void} define amdgpu_kernel void @load_constant_v8i64_uniform() {ret void} @@ -84,12 +102,13 @@ body: | bb.0: liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_global_v8i32_non_uniform - ; CHECK: [[PTR:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD0:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR]](p1) :: (load 16 from %ir.global.not.uniform.v8i32, align 32, addrspace 1) - ; CHECK: [[OFFSET16:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[GEP16:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[PTR]], [[OFFSET16]](s64) - ; CHECK: [[LOAD16:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[GEP16]](p1) :: (load 16 from %ir.global.not.uniform.v8i32 + 16, align 32, addrspace 1) - ; CHECK: %1:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD0]](<4 x s32>), [[LOAD16]](<4 x s32>) + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16 from %ir.global.not.uniform.v8i32, align 32, addrspace 1) + ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 16 from %ir.global.not.uniform.v8i32 + 16, align 32, addrspace 1) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<8 x s32>) = G_LOAD %0 :: (load 32 from %ir.global.not.uniform.v8i32) ... @@ -101,13 +120,15 @@ legalized: true body: | bb.0: liveins: $sgpr0_sgpr1 - ; CHECK: [[PTR:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD0:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR]](p1) :: (load 16 from %ir.global.not.uniform.v4i64, align 32, addrspace 1) - ; CHECK: [[OFFSET16:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[GEP16:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[PTR]], [[OFFSET16]](s64) - ; CHECK: [[LOAD16:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[GEP16]](p1) :: (load 16 from %ir.global.not.uniform.v4i64 + 16, align 32, addrspace 1) - ; CHECK: %1:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[LOAD0]](<2 x s64>), [[LOAD16]](<2 x s64>) + ; CHECK-LABEL: name: load_global_v4i64_non_uniform + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16 from %ir.global.not.uniform.v4i64, align 32, addrspace 1) + ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD]](p1) :: (load 16 from %ir.global.not.uniform.v4i64 + 16, align 32, addrspace 1) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<4 x s64>) = G_LOAD %0 :: (load 32 from %ir.global.not.uniform.v4i64) ... @@ -120,18 +141,19 @@ body: | bb.0: liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_global_v16i32_non_uniform - ; CHECK: [[PTR:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD0:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR]](p1) :: (load 16 from %ir.global.not.uniform.v16i32, align 64, addrspace 1) - ; CHECK: [[OFFSET16:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[GEP16:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[PTR]], [[OFFSET16]](s64) - ; CHECK: [[LOAD16:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[GEP16]](p1) :: (load 16 from %ir.global.not.uniform.v16i32 + 16, align 64, addrspace 1) - ; CHECK: [[OFFSET32:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 - ; CHECK: [[GEP32:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[PTR]], [[OFFSET32]](s64) - ; CHECK: [[LOAD32:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[GEP32]](p1) :: (load 16 from %ir.global.not.uniform.v16i32 + 32, align 64, addrspace 1) - ; CHECK: [[OFFSET48:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 - ; CHECK: [[GEP48:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[PTR]], [[OFFSET48]](s64) - ; CHECK: [[LOAD48:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[GEP48]](p1) :: (load 16 from %ir.global.not.uniform.v16i32 + 48, align 64, addrspace 1) - ; CHECK: %1:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[LOAD0]](<4 x s32>), [[LOAD16]](<4 x s32>), [[LOAD32]](<4 x s32>), [[LOAD48]](<4 x s32>) + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16 from %ir.global.not.uniform.v16i32, align 64, addrspace 1) + ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 16 from %ir.global.not.uniform.v16i32 + 16, align 64, addrspace 1) + ; CHECK: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 + ; CHECK: [[PTR_ADD1:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK: [[LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load 16 from %ir.global.not.uniform.v16i32 + 32, align 64, addrspace 1) + ; CHECK: [[C2:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 + ; CHECK: [[PTR_ADD2:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK: [[LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load 16 from %ir.global.not.uniform.v16i32 + 48, align 64, addrspace 1) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<16 x s32>) = G_LOAD %0 :: (load 64 from %ir.global.not.uniform.v16i32) ... @@ -167,7 +189,8 @@ body: | bb.0: liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_global_v8i32_uniform - ; CHECK: (<8 x s32>) = G_LOAD %0(p1) :: (invariant load 32, addrspace 1) + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_LOAD [[COPY]](p1) :: (invariant load 32, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<8 x s32>) = G_LOAD %0 :: (invariant load 32, addrspace 1) ... @@ -180,7 +203,8 @@ body: | bb.0: liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_global_v4i64_uniform - ; CHECK: (<4 x s64>) = G_LOAD %0(p1) :: (invariant load 32, addrspace 1) + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<4 x s64>) = G_LOAD [[COPY]](p1) :: (invariant load 32, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<4 x s64>) = G_LOAD %0 :: (invariant load 32, addrspace 1) ... @@ -193,7 +217,8 @@ body: | bb.0: liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_global_v16i32_uniform - ; CHECK: (<16 x s32>) = G_LOAD %0(p1) :: (invariant load 64, addrspace 1) + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_LOAD [[COPY]](p1) :: (invariant load 64, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<16 x s32>) = G_LOAD %0 :: (invariant load 64, addrspace 1) ... @@ -206,7 +231,8 @@ body: | bb.0: liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_global_v8i64_uniform - ; CHECK: (<8 x s64>) = G_LOAD %0(p1) :: (invariant load 64, addrspace 1) + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<8 x s64>) = G_LOAD [[COPY]](p1) :: (invariant load 64, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<8 x s64>) = G_LOAD %0 :: (invariant load 64, addrspace 1) ... @@ -219,16 +245,56 @@ body: | bb.0: liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v8i32_non_uniform - ; CHECK: [[PTR:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD0:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR]](p4) :: (load 16 from %ir.constant.not.uniform.v8i32, align 32, addrspace 4) - ; CHECK: [[OFFSET16:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[GEP16:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PTR]], [[OFFSET16]](s64) - ; CHECK: [[LOAD16:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[GEP16]](p4) :: (load 16 from %ir.constant.not.uniform.v8i32 + 16, align 32, addrspace 4) - ; CHECK: %1:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD0]](<4 x s32>), [[LOAD16]](<4 x s32>) + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16 from %ir.constant.not.uniform.v8i32, align 32, addrspace 4) + ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load 16 from %ir.constant.not.uniform.v8i32 + 16, align 32, addrspace 4) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<8 x s32>) = G_LOAD %0 :: (load 32 from %ir.constant.not.uniform.v8i32) ... +--- +name: load_constant_i256_non_uniform +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + ; CHECK-LABEL: name: load_constant_i256_non_uniform + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY]](p4) :: (load 16 from %ir.constant.not.uniform, align 32, addrspace 4) + ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(s128) = G_LOAD [[PTR_ADD]](p4) :: (load 16 from %ir.constant.not.uniform + 16, align 32, addrspace 4) + ; CHECK: [[MV:%[0-9]+]]:vgpr(s256) = G_MERGE_VALUES [[LOAD]](s128), [[LOAD1]](s128) + %0:_(p4) = COPY $sgpr0_sgpr1 + %1:_(s256) = G_LOAD %0 :: (load 32 from %ir.constant.not.uniform) +... + +--- +name: load_constant_v16i16_non_uniform +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: load_constant_v16i16_non_uniform + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load 16 from %ir.constant.not.uniform, align 32, addrspace 4) + ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[PTR_ADD]](p4) :: (load 16 from %ir.constant.not.uniform + 16, align 32, addrspace 4) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s16>) = G_CONCAT_VECTORS [[LOAD]](<8 x s16>), [[LOAD1]](<8 x s16>) + %0:_(p4) = COPY $sgpr0_sgpr1 + %1:_(<16 x s16>) = G_LOAD %0 :: (load 32 from %ir.constant.not.uniform) +... + --- name: load_constant_v4i64_non_uniform legalized: true @@ -237,12 +303,13 @@ body: | bb.0: liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v4i64_non_uniform - ; CHECK: [[PTR:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD0:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR]](p4) :: (load 16 from %ir.constant.not.uniform.v4i64, align 32, addrspace 4) - ; CHECK: [[OFFSET16:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[GEP16:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PTR]], [[OFFSET16]](s64) - ; CHECK: [[LOAD16:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[GEP16]](p4) :: (load 16 from %ir.constant.not.uniform.v4i64 + 16, align 32, addrspace 4) - ; CHECK: %1:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[LOAD0]](<2 x s64>), [[LOAD16]](<2 x s64>) + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load 16 from %ir.constant.not.uniform.v4i64, align 32, addrspace 4) + ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD]](p4) :: (load 16 from %ir.constant.not.uniform.v4i64 + 16, align 32, addrspace 4) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<4 x s64>) = G_LOAD %0 :: (load 32 from %ir.constant.not.uniform.v4i64) ... @@ -255,18 +322,19 @@ body: | bb.0: liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v16i32_non_uniform - ; CHECK: [[PTR:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD0:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR]](p4) :: (load 16 from %ir.constant.not.uniform.v16i32, align 64, addrspace 4) - ; CHECK: [[OFFSET16:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[GEP16:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PTR]], [[OFFSET16]](s64) - ; CHECK: [[LOAD16:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[GEP16]](p4) :: (load 16 from %ir.constant.not.uniform.v16i32 + 16, align 64, addrspace 4) - ; CHECK: [[OFFSET32:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 - ; CHECK: [[GEP32:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PTR]], [[OFFSET32]](s64) - ; CHECK: [[LOAD32:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[GEP32]](p4) :: (load 16 from %ir.constant.not.uniform.v16i32 + 32, align 64, addrspace 4) - ; CHECK: [[OFFSET48:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 - ; CHECK: [[GEP48:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PTR]], [[OFFSET48]](s64) - ; CHECK: [[LOAD48:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[GEP48]](p4) :: (load 16 from %ir.constant.not.uniform.v16i32 + 48, align 64, addrspace 4) - ; CHECK: %1:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[LOAD0]](<4 x s32>), [[LOAD16]](<4 x s32>), [[LOAD32]](<4 x s32>), [[LOAD48]](<4 x s32>) + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16 from %ir.constant.not.uniform.v16i32, align 64, addrspace 4) + ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load 16 from %ir.constant.not.uniform.v16i32 + 16, align 64, addrspace 4) + ; CHECK: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 + ; CHECK: [[PTR_ADD1:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK: [[LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (load 16 from %ir.constant.not.uniform.v16i32 + 32, align 64, addrspace 4) + ; CHECK: [[C2:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 + ; CHECK: [[PTR_ADD2:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK: [[LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD2]](p4) :: (load 16 from %ir.constant.not.uniform.v16i32 + 48, align 64, addrspace 4) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<16 x s32>) = G_LOAD %0 :: (load 64 from %ir.constant.not.uniform.v16i32) ... @@ -279,18 +347,19 @@ body: | bb.0: liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v8i64_non_uniform - ; CHECK: [[PTR:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD0:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR]](p4) :: (load 16 from %ir.constant.not.uniform.v8i64, align 64, addrspace 4) - ; CHECK: [[OFFSET16:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[GEP16:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PTR]], [[OFFSET16]](s64) - ; CHECK: [[LOAD16:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[GEP16]](p4) :: (load 16 from %ir.constant.not.uniform.v8i64 + 16, align 64, addrspace 4) - ; CHECK: [[OFFSET32:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 - ; CHECK: [[GEP32:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PTR]], [[OFFSET32]](s64) - ; CHECK: [[LOAD32:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[GEP32]](p4) :: (load 16 from %ir.constant.not.uniform.v8i64 + 32, align 64, addrspace 4) - ; CHECK: [[OFFSET48:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 - ; CHECK: [[GEP48:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PTR]], [[OFFSET48]](s64) - ; CHECK: [[LOAD48:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[GEP48]](p4) :: (load 16 from %ir.constant.not.uniform.v8i64 + 48, align 64, addrspace 4) - ; CHECK: %1:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[LOAD0]](<2 x s64>), [[LOAD16]](<2 x s64>), [[LOAD32]](<2 x s64>), [[LOAD48]](<2 x s64>) + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load 16 from %ir.constant.not.uniform.v8i64, align 64, addrspace 4) + ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD]](p4) :: (load 16 from %ir.constant.not.uniform.v8i64 + 16, align 64, addrspace 4) + ; CHECK: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 + ; CHECK: [[PTR_ADD1:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK: [[LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD1]](p4) :: (load 16 from %ir.constant.not.uniform.v8i64 + 32, align 64, addrspace 4) + ; CHECK: [[C2:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 + ; CHECK: [[PTR_ADD2:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK: [[LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD2]](p4) :: (load 16 from %ir.constant.not.uniform.v8i64 + 48, align 64, addrspace 4) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>), [[LOAD2]](<2 x s64>), [[LOAD3]](<2 x s64>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<8 x s64>) = G_LOAD %0 :: (load 64 from %ir.constant.not.uniform.v8i64) ... @@ -303,11 +372,26 @@ body: | bb.0: liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v8i32_uniform - ; CHECK: (<8 x s32>) = G_LOAD %0(p4) :: (load 32, addrspace 4) + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<8 x s32>) = G_LOAD %0 :: (load 32, addrspace 4) ... +--- +name: load_constant_v16i16_uniform +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + ; CHECK-LABEL: name: load_constant_v16i16_uniform + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<16 x s16>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) + %0:_(p4) = COPY $sgpr0_sgpr1 + %1:_(<16 x s16>) = G_LOAD %0 :: (load 32, addrspace 4) +... + --- name: load_constant_v4i64_uniform legalized: true @@ -316,7 +400,8 @@ body: | bb.0: liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v4i64_uniform - ; CHECK: (<4 x s64>) = G_LOAD %0(p4) :: (load 32, addrspace 4) + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<4 x s64>) = G_LOAD %0 :: (load 32, addrspace 4) ... @@ -329,7 +414,8 @@ body: | bb.0: liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v16i32_uniform - ; CHECK: (<16 x s32>) = G_LOAD %0(p4) :: (load 64, addrspace 4) + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load 64, addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<16 x s32>) = G_LOAD %0 :: (load 64, addrspace 4) ... @@ -342,7 +428,8 @@ body: | bb.0: liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v8i64_uniform - ; CHECK: (<8 x s64>) = G_LOAD %0(p4) :: (load 64, addrspace 4) + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<8 x s64>) = G_LOAD [[COPY]](p4) :: (load 64, addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<8 x s64>) = G_LOAD %0 :: (load 64, addrspace 4) ... @@ -353,11 +440,11 @@ legalized: true body: | bb.0: liveins: $sgpr0 - ; CHECK-LABEL: load_local_uniform - ; CHECK: %0:sgpr(p3) = COPY $sgpr0 - ; CHECK: %2:vgpr(p3) = COPY %0(p3) - ; CHECK: %1:vgpr(s32) = G_LOAD %2(p3) :: (load 4, addrspace 3) + ; CHECK-LABEL: name: load_local_uniform + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p3) :: (load 4, addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = G_LOAD %0 :: (load 4, addrspace 3) @@ -368,11 +455,11 @@ legalized: true body: | bb.0: liveins: $sgpr0 - ; CHECK-LABEL: load_region_uniform - ; CHECK: %0:sgpr(p3) = COPY $sgpr0 - ; CHECK: %2:vgpr(p3) = COPY %0(p3) - ; CHECK: %1:vgpr(s32) = G_LOAD %2(p3) :: (load 4, addrspace 5) + ; CHECK-LABEL: name: load_region_uniform + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p3) :: (load 4, addrspace 5) %0:_(p3) = COPY $sgpr0 %1:_(s32) = G_LOAD %0 :: (load 4, addrspace 5) @@ -386,9 +473,9 @@ body: | bb.0: liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: extload_constant_i8_to_i32_uniform - ; CHECK: %0:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: %2:vgpr(p4) = COPY %0(p4) - ; CHECK: %1:vgpr(s32) = G_LOAD %2(p4) :: (load 1, addrspace 4) + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load 1, addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load 1, addrspace 4, align 1) ... @@ -401,10 +488,10 @@ body: | bb.0: liveins: $sgpr0_sgpr1 - ; CHECK-LABEL: name: extload_global_i8_to_i32_uniform{{$}} - ; CHECK: %0:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: %2:vgpr(p4) = COPY %0(p4) - ; CHECK: %1:vgpr(s32) = G_LOAD %2(p4) :: (load 1, addrspace 1) + ; CHECK-LABEL: name: extload_global_i8_to_i32_uniform + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load 1, addrspace 1) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load 1, addrspace 1, align 1) ... @@ -416,11 +503,11 @@ legalized: true body: | bb.0: liveins: $sgpr0_sgpr1 - ; CHECK-LABEL: name: extload_constant_i16_to_i32_uniform - ; CHECK: %0:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: %2:vgpr(p4) = COPY %0(p4) - ; CHECK: %1:vgpr(s32) = G_LOAD %2(p4) :: (load 2, addrspace 4) + ; CHECK-LABEL: name: extload_constant_i16_to_i32_uniform + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load 2, addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load 2, addrspace 4, align 2) ... @@ -432,11 +519,11 @@ legalized: true body: | bb.0: liveins: $sgpr0_sgpr1 - ; CHECK-LABEL: name: extload_global_i16_to_i32_uniform - ; CHECK: %0:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: %2:vgpr(p4) = COPY %0(p4) - ; CHECK: %1:vgpr(s32) = G_LOAD %2(p4) :: (load 2, addrspace 1) + ; CHECK-LABEL: name: extload_global_i16_to_i32_uniform + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load 2, addrspace 1) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load 2, addrspace 1, align 2) ... @@ -449,8 +536,8 @@ body: | bb.0: liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_i32_uniform_align4 - ; CHECK: %0:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: %1:sgpr(s32) = G_LOAD %0(p4) :: (load 4, addrspace 4) + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load 4, addrspace 4, align 4) ... @@ -462,11 +549,11 @@ legalized: true body: | bb.0: liveins: $sgpr0_sgpr1 - ; CHECK-LABEL: name: load_constant_i32_uniform_align2 - ; CHECK: %0:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: %2:vgpr(p4) = COPY %0(p4) - ; CHECK: %1:vgpr(s32) = G_LOAD %2(p4) :: (load 4, align 2, addrspace 4) + ; CHECK-LABEL: name: load_constant_i32_uniform_align2 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load 4, align 2, addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load 4, addrspace 4, align 2) ... @@ -480,9 +567,9 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_i32_uniform_align1 - ; CHECK: %0:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: %2:vgpr(p4) = COPY %0(p4) - ; CHECK: %1:vgpr(s32) = G_LOAD %2(p4) :: (load 4, align 1, addrspace 4) + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load 4, align 1, addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load 4, addrspace 4, align 1) ... @@ -513,10 +600,13 @@ body: | liveins: $vgpr0_vgpr1 ; CHECK-LABEL: name: load_constant_v8i32_vgpr_crash - ; CHECK: %0:vgpr(p4) = COPY $vgpr0_vgpr1 - ; CHECK: vgpr(<4 x s32>) = G_LOAD %0(p4) - ; CHECK: vgpr(<4 x s32>) = G_LOAD - ; CHECK: G_CONCAT_VECTORS + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(p4) = COPY $vgpr0_vgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 32, addrspace 4) + ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load 16 + 16, addrspace 4) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<8 x s32>) = G_LOAD %0 :: (load 32, addrspace 4) ... @@ -527,14 +617,26 @@ legalized: true tracksRegLiveness: true body: | + ; CHECK-LABEL: name: load_constant_v8i32_vgpr_crash_loop_phi + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(p4) = COPY $sgpr2_sgpr3 + ; CHECK: G_BR %bb.1 + ; CHECK: bb.1: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: [[PHI:%[0-9]+]]:vgpr(p4) = G_PHI [[COPY]](p4), %bb.0, %3(p4), %bb.1 + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PHI]](p4) :: (load 16, align 32, addrspace 4) + ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PHI]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load 16 + 16, addrspace 4) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(p4) = COPY [[COPY1]](p4) + ; CHECK: G_BR %bb.1 bb.0: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 - ; CHECK-LABEL: name: load_constant_v8i32_vgpr_crash_loop_phi - ; CHECK: G_PHI - ; CHECK: vgpr(<4 x s32>) = G_LOAD - ; CHECK: vgpr(<4 x s32>) = G_LOAD - ; CHECK: G_CONCAT_VECTORS %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(p4) = COPY $sgpr2_sgpr3 diff --git a/llvm/test/CodeGen/SystemZ/vec-perm-14.ll b/llvm/test/CodeGen/SystemZ/vec-perm-14.ll new file mode 100644 index 00000000000000..0cf3c6ef7a064c --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/vec-perm-14.ll @@ -0,0 +1,27 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s +; +; Test that only one vperm of the vector compare is needed for both extracts. + +define void @fun() { +; CHECK-LABEL: fun +; CHECK: vperm +; CHECK-NOT: vperm +bb: + %tmp = load <4 x i8>, <4 x i8>* undef + %tmp1 = icmp eq <4 x i8> zeroinitializer, %tmp + %tmp2 = extractelement <4 x i1> %tmp1, i32 0 + br i1 %tmp2, label %bb1, label %bb2 + +bb1: + unreachable + +bb2: + %tmp3 = extractelement <4 x i1> %tmp1, i32 1 + br i1 %tmp3, label %bb3, label %bb4 + +bb3: + unreachable + +bb4: + unreachable +} diff --git a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll index 7cea2ff8eb9c06..b7cbac89db31e9 100644 --- a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll @@ -5377,12 +5377,12 @@ define void @constrained_vector_fptrunc_v3f64(<3 x double>* %src, <3 x float>* % ; SZ13-LABEL: constrained_vector_fptrunc_v3f64: ; SZ13: # %bb.0: # %entry ; SZ13-NEXT: vl %v1, 0(%r2), 4 +; SZ13-NEXT: ld %f0, 16(%r2) ; SZ13-NEXT: vledb %v1, %v1, 0, 0 ; SZ13-NEXT: larl %r1, .LCPI97_0 -; SZ13-NEXT: ld %f0, 16(%r2) -; SZ13-NEXT: vl %v2, 0(%r1), 3 -; SZ13-NEXT: vperm %v1, %v1, %v0, %v2 ; SZ13-NEXT: ledbra %f0, 0, %f0, 0 +; SZ13-NEXT: vl %v2, 0(%r1), 3 +; SZ13-NEXT: vperm %v1, %v1, %v1, %v2 ; SZ13-NEXT: ste %f0, 8(%r3) ; SZ13-NEXT: vsteg %v1, 0(%r3), 0 ; SZ13-NEXT: br %r14 diff --git a/llvm/test/Transforms/InstCombine/cast-mul-select.ll b/llvm/test/Transforms/InstCombine/cast-mul-select.ll index f82d2fd285fe8f..e68f3830b5a9d5 100644 --- a/llvm/test/Transforms/InstCombine/cast-mul-select.ll +++ b/llvm/test/Transforms/InstCombine/cast-mul-select.ll @@ -13,8 +13,8 @@ define i32 @mul(i32 %x, i32 %y) { ; we preserve the debug information in the resulting ; instruction. ; DBGINFO-LABEL: @mul( -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 %x -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 %y +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 %x, {{.*}} !DIExpression(DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_stack_value)) +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 %y, {{.*}} !DIExpression(DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_stack_value)) ; DBGINFO-NEXT: [[C:%.*]] = mul i32 {{.*}} ; DBGINFO-NEXT: [[D:%.*]] = and i32 {{.*}} ; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 [[C]] @@ -175,7 +175,7 @@ exit: ; Check that we don't drop debug info when a zext is removed. define i1 @foo(i1 zeroext %b) { ; DBGINFO-LABEL: @foo( -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i1 %b +; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i1 %b, {{.*}} !DIExpression(DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_stack_value)) ; DBGINFO-NEXT: ret i1 %b %frombool = zext i1 %b to i8 diff --git a/mlir/CMakeLists.txt b/mlir/CMakeLists.txt index 3671a97395d4ec..7c2c5978c44e62 100644 --- a/mlir/CMakeLists.txt +++ b/mlir/CMakeLists.txt @@ -34,6 +34,10 @@ add_definitions(-DMLIR_CUDA_CONVERSIONS_ENABLED=${MLIR_CUDA_CONVERSIONS_ENABLED} set(MLIR_CUDA_RUNNER_ENABLED 0 CACHE BOOL "Enable building the mlir CUDA runner") set(MLIR_VULKAN_RUNNER_ENABLED 0 CACHE BOOL "Enable building the mlir Vulkan runner") +option(MLIR_INCLUDE_TESTS + "Generate build targets for the MLIR unit tests." + ${LLVM_INCLUDE_TESTS}) + include_directories( "include") include_directories( ${MLIR_INCLUDE_DIR}) @@ -44,8 +48,11 @@ add_subdirectory(tools/mlir-tblgen) add_subdirectory(include/mlir) add_subdirectory(lib) -add_subdirectory(unittests) -add_subdirectory(test) +if (MLIR_INCLUDE_TESTS) + add_definitions(-DMLIR_INCLUDE_TESTS) + add_subdirectory(unittests) + add_subdirectory(test) +endif() # Tools needs to come late to ensure that MLIR_ALL_LIBS is populated. # Generally things after this point may depend on MLIR_ALL_LIBS or libMLIR.so. add_subdirectory(tools) diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h index 70c3f00f52161b..e93977185fb356 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -11,6 +11,7 @@ #include "mlir/Dialect/Linalg/Utils/Utils.h" #include "mlir/IR/PatternMatch.h" +#include "llvm/ADT/SmallBitVector.h" namespace mlir { namespace linalg { @@ -97,6 +98,28 @@ struct LinalgPromotionOptions { operandsToPromote->insert(operands.begin(), operands.end()); return *this; } + /// If ith element of `useFullTiles` is true the full view should be used for + /// the promoted buffer of the ith operand in `operandsToPromote`. Otherwise + /// the partial view will be used. + /// The decision is defaulted to `useFullTileBuffersDefault` when + /// `useFullTileBuffers` is None and for operands missing from + /// `useFullTileBuffers`. + Optional useFullTileBuffers = None; + LinalgPromotionOptions &setUseFullTileBuffers(ArrayRef useFullTiles) { + unsigned size = useFullTiles.size(); + llvm::SmallBitVector tmp(size, false); + for (unsigned i = 0; i < size; ++i) + tmp[i] = useFullTiles[i]; + useFullTileBuffers = tmp; + return *this; + } + /// If true all operands unspecified by `useFullTileBuffers` will use the full + /// view, otherwise the partial view. + bool useFullTileBuffersDefault = false; + LinalgPromotionOptions &useFullTileBuffersByDefault() { + useFullTileBuffersDefault = true; + return *this; + } /// Allow the use of dynamicaly-sized buffers. bool dynamicBuffers = false; LinalgPromotionOptions &setDynamicBuffers(unsigned dynamic) { diff --git a/mlir/include/mlir/Dialect/Vector/EDSC/Intrinsics.h b/mlir/include/mlir/Dialect/Vector/EDSC/Intrinsics.h index 7fa9099a6a90ff..6b5c4be7b2f409 100644 --- a/mlir/include/mlir/Dialect/Vector/EDSC/Intrinsics.h +++ b/mlir/include/mlir/Dialect/Vector/EDSC/Intrinsics.h @@ -16,6 +16,9 @@ namespace intrinsics { using vector_broadcast = ValueBuilder; using vector_contract = ValueBuilder; +using vector_insert = ValueBuilder; +using vector_fma = ValueBuilder; +using vector_extract = ValueBuilder; using vector_matmul = ValueBuilder; using vector_print = OperationBuilder; using vector_transfer_read = ValueBuilder; diff --git a/mlir/include/mlir/Dialect/Vector/VectorOps.h b/mlir/include/mlir/Dialect/Vector/VectorOps.h index a3376d53fc9590..6394fae2137507 100644 --- a/mlir/include/mlir/Dialect/Vector/VectorOps.h +++ b/mlir/include/mlir/Dialect/Vector/VectorOps.h @@ -13,6 +13,7 @@ #ifndef MLIR_DIALECT_VECTOR_VECTOROPS_H #define MLIR_DIALECT_VECTOR_VECTOROPS_H +#include "mlir/IR/AffineMap.h" #include "mlir/IR/Attributes.h" #include "mlir/IR/Dialect.h" #include "mlir/IR/OpDefinition.h" @@ -71,6 +72,14 @@ IntegerType getVectorSubscriptType(Builder &builder); /// the integer type required for subscripts in the vector dialect. ArrayAttr getVectorSubscriptAttr(Builder &b, ArrayRef values); +namespace impl { +/// Build the default minor identity map suitable for a vector transfer. This +/// also handles the case memref<... x vector<...>> -> vector<...> in which the +/// rank of the identity map must take the vector element type into account. +AffineMap getTransferMinorIdentityMap(MemRefType memRefType, + VectorType vectorType); +} // namespace impl + #define GET_OP_CLASSES #include "mlir/Dialect/Vector/VectorOps.h.inc" diff --git a/mlir/include/mlir/Dialect/Vector/VectorOps.td b/mlir/include/mlir/Dialect/Vector/VectorOps.td index 4c71a168dae737..29e72857b291e4 100644 --- a/mlir/include/mlir/Dialect/Vector/VectorOps.td +++ b/mlir/include/mlir/Dialect/Vector/VectorOps.td @@ -863,17 +863,35 @@ def Vector_ExtractStridedSliceOp : let assemblyFormat = "$vector attr-dict `:` type($vector) `to` type(results)"; } +def Vector_TransferOpUtils { + code extraTransferDeclaration = [{ + static StringRef getMaskedAttrName() { return "masked"; } + static StringRef getPermutationMapAttrName() { return "permutation_map"; } + bool isMaskedDim(unsigned dim) { + return !masked() || + masked()->cast()[dim].cast().getValue(); + } + MemRefType getMemRefType() { + return memref().getType().cast(); + } + VectorType getVectorType() { + return vector().getType().cast(); + } + }]; +} + def Vector_TransferReadOp : Vector_Op<"transfer_read">, Arguments<(ins AnyMemRef:$memref, Variadic:$indices, - AffineMapAttr:$permutation_map, AnyType:$padding)>, + AffineMapAttr:$permutation_map, AnyType:$padding, + OptionalAttr:$masked)>, Results<(outs AnyVector:$vector)> { let summary = "Reads a supervector from memory into an SSA vector value."; let description = [{ - The `vector.transfer_read` op performs a blocking read from a slice within - a [MemRef](../LangRef.md#memref-type) supplied as its first operand + The `vector.transfer_read` op performs a read from a slice within a + [MemRef](../LangRef.md#memref-type) supplied as its first operand into a [vector](../LangRef.md#vector-type) of the same base elemental type. A memref operand with vector element type, must have its vector element @@ -881,18 +899,31 @@ def Vector_TransferReadOp : memref<3x2x6x4x3xf32>, vector<1x1x4x3xf32>). The slice is further defined by a full-rank index within the MemRef, - supplied as the operands `2 .. 1 + rank(memref)`. The permutation_map - [attribute](../LangRef.md#attributes) is an + supplied as the operands `2 .. 1 + rank(memref)`. + + The permutation_map [attribute](../LangRef.md#attributes) is an [affine-map](Affine.md#affine-maps) which specifies the transposition on the - slice to match the vector shape. The size of the slice is specified by the - size of the vector, given as the return type. An `ssa-value` of the same - elemental type as the MemRef is provided as the last operand to specify - padding in the case of out-of-bounds accesses. This operation is called - 'read' by opposition to 'load' because the super-vector granularity is - generally not representable with a single hardware register. - A `vector.transfer_read` is thus a mid-level - abstraction that supports super-vectorization with non-effecting padding for - full-tile-only code. + slice to match the vector shape. The permutation map may be implicit and + ommitted from parsing and printing if it is the canonical minor identity map + (i.e. if it does not permute or broadcast any dimension). + + The size of the slice is specified by the size of the vector, given as the + return type. + + An `ssa-value` of the same elemental type as the MemRef is provided as the + last operand to specify padding in the case of out-of-bounds accesses. + + An optional boolean array attribute is provided to specify which dimensions + of the transfer need masking. When a dimension is specified as not requiring + masking, the `vector.transfer_read` may be lowered to simple loads. The + absence of this `masked` attribute signifies that all dimensions of the + transfer need to be masked. + + This operation is called 'read' by opposition to 'load' because the + super-vector granularity is generally not representable with a single + hardware register. A `vector.transfer_read` is thus a mid-level abstraction + that supports super-vectorization with non-effecting padding for full-tile + only operations. More precisely, let's dive deeper into the permutation_map for the following MLIR: @@ -995,19 +1026,27 @@ def Vector_TransferReadOp : }]; let builders = [ - // Builder that sets permutation map and padding to 'getMinorIdentityMap' - // and zero, respectively, by default. + // Builder that sets padding to zero. + OpBuilder<"OpBuilder &builder, OperationState &result, VectorType vector, " + "Value memref, ValueRange indices, AffineMap permutationMap, " + "ArrayRef maybeMasked = {}">, + // Builder that sets permutation map (resp. padding) to + // 'getMinorIdentityMap' (resp. zero). OpBuilder<"OpBuilder &builder, OperationState &result, VectorType vector, " - "Value memref, ValueRange indices"> + "Value memref, ValueRange indices, " + "ArrayRef maybeMasked = {}"> ]; - let extraClassDeclaration = [{ - MemRefType getMemRefType() { - return memref().getType().cast(); - } - VectorType getVectorType() { - return vector().getType().cast(); - } + let extraClassDeclaration = Vector_TransferOpUtils.extraTransferDeclaration # + [{ + /// Build the default minor identity map suitable for a vector transfer. + /// This also handles the case memref<... x vector<...>> -> vector<...> in + /// which the rank of the identity map must take the vector element type + /// into account. + static AffineMap getTransferMinorIdentityMap( + MemRefType memRefType, VectorType vectorType) { + return impl::getTransferMinorIdentityMap(memRefType, vectorType); + } }]; } @@ -1015,12 +1054,13 @@ def Vector_TransferWriteOp : Vector_Op<"transfer_write">, Arguments<(ins AnyVector:$vector, AnyMemRef:$memref, Variadic:$indices, - AffineMapAttr:$permutation_map)> { + AffineMapAttr:$permutation_map, + OptionalAttr:$masked)> { let summary = "The vector.transfer_write op writes a supervector to memory."; let description = [{ - The `vector.transfer_write` performs a blocking write from a + The `vector.transfer_write` op performs a write from a [vector](../LangRef.md#vector-type), supplied as its first operand, into a slice within a [MemRef](../LangRef.md#memref-type) of the same base elemental type, supplied as its second operand. @@ -1031,12 +1071,24 @@ def Vector_TransferWriteOp : The slice is further defined by a full-rank index within the MemRef, supplied as the operands `3 .. 2 + rank(memref)`. + The permutation_map [attribute](../LangRef.md#attributes) is an [affine-map](Affine.md#affine-maps) which specifies the transposition on the - slice to match the vector shape. The size of the slice is specified by the - size of the vector. This operation is called 'write' by opposition to - 'store' because the super-vector granularity is generally not representable - with a single hardware register. A `vector.transfer_write` is thus a + slice to match the vector shape. The permutation map may be implicit and + ommitted from parsing and printing if it is the canonical minor identity map + (i.e. if it does not permute or broadcast any dimension). + + The size of the slice is specified by the size of the vector. + + An optional boolean array attribute is provided to specify which dimensions + of the transfer need masking. When a dimension is specified as not requiring + masking, the `vector.transfer_write` may be lowered to simple stores. The + absence of this `mask` attribute signifies that all dimensions of the + transfer need to be masked. + + This operation is called 'write' by opposition to 'store' because the + super-vector granularity is generally not representable with a single + hardware register. A `vector.transfer_write` is thus a mid-level abstraction that supports super-vectorization with non-effecting padding for full-tile-only code. It is the responsibility of `vector.transfer_write`'s implementation to ensure the memory writes are @@ -1066,23 +1118,24 @@ def Vector_TransferWriteOp : }]; let builders = [ - // Builder that sets permutation map and padding to 'getMinorIdentityMap' - // by default. + // Builder that sets permutation map to 'getMinorIdentityMap'. + OpBuilder<"OpBuilder &builder, OperationState &result, Value vector, " + "Value memref, ValueRange indices, " + "ArrayRef maybeMasked = {}">, OpBuilder<"OpBuilder &builder, OperationState &result, Value vector, " - "Value memref, ValueRange indices"> + "Value memref, ValueRange indices, AffineMap permutationMap">, ]; - let extraClassDeclaration = [{ - VectorType getVectorType() { - return vector().getType().cast(); - } - MemRefType getMemRefType() { - return memref().getType().cast(); - } - }]; - let assemblyFormat = [{ - $vector `,` $memref `[` $indices `]` attr-dict `:` type($vector) `,` - type($memref) + let extraClassDeclaration = Vector_TransferOpUtils.extraTransferDeclaration # + [{ + /// Build the default minor identity map suitable for a vector transfer. + /// This also handles the case memref<... x vector<...>> -> vector<...> in + /// which the rank of the identity map must take the vector element type + /// into account. + static AffineMap getTransferMinorIdentityMap( + MemRefType memRefType, VectorType vectorType) { + return impl::getTransferMinorIdentityMap(memRefType, vectorType); + } }]; } diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp index eb25bf3abf85ed..975807ca86712f 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp @@ -746,12 +746,6 @@ class VectorTypeCastOpConversion : public ConvertToLLVMPattern { } }; -template -LogicalResult replaceTransferOp(ConversionPatternRewriter &rewriter, - LLVMTypeConverter &typeConverter, Location loc, - Operation *op, ArrayRef operands, - Value dataPtr, Value mask); - LogicalResult getLLVMTypeAndAlignment(LLVMTypeConverter &typeConverter, Type type, LLVM::LLVMType &llvmType, unsigned &align) { @@ -765,12 +759,25 @@ LogicalResult getLLVMTypeAndAlignment(LLVMTypeConverter &typeConverter, return success(); } -template <> -LogicalResult replaceTransferOp( - ConversionPatternRewriter &rewriter, LLVMTypeConverter &typeConverter, - Location loc, Operation *op, ArrayRef operands, Value dataPtr, - Value mask) { - auto xferOp = cast(op); +LogicalResult +replaceTransferOpWithLoadOrStore(ConversionPatternRewriter &rewriter, + LLVMTypeConverter &typeConverter, Location loc, + TransferReadOp xferOp, + ArrayRef operands, Value dataPtr) { + LLVM::LLVMType vecTy; + unsigned align; + if (failed(getLLVMTypeAndAlignment(typeConverter, xferOp.getVectorType(), + vecTy, align))) + return failure(); + rewriter.replaceOpWithNewOp(xferOp, dataPtr); + return success(); +} + +LogicalResult replaceTransferOpWithMasked(ConversionPatternRewriter &rewriter, + LLVMTypeConverter &typeConverter, + Location loc, TransferReadOp xferOp, + ArrayRef operands, + Value dataPtr, Value mask) { auto toLLVMTy = [&](Type t) { return typeConverter.convertType(t); }; VectorType fillType = xferOp.getVectorType(); Value fill = rewriter.create(loc, fillType, xferOp.padding()); @@ -783,19 +790,32 @@ LogicalResult replaceTransferOp( return failure(); rewriter.replaceOpWithNewOp( - op, vecTy, dataPtr, mask, ValueRange{fill}, + xferOp, vecTy, dataPtr, mask, ValueRange{fill}, rewriter.getI32IntegerAttr(align)); return success(); } -template <> -LogicalResult replaceTransferOp( - ConversionPatternRewriter &rewriter, LLVMTypeConverter &typeConverter, - Location loc, Operation *op, ArrayRef operands, Value dataPtr, - Value mask) { +LogicalResult +replaceTransferOpWithLoadOrStore(ConversionPatternRewriter &rewriter, + LLVMTypeConverter &typeConverter, Location loc, + TransferWriteOp xferOp, + ArrayRef operands, Value dataPtr) { auto adaptor = TransferWriteOpOperandAdaptor(operands); + LLVM::LLVMType vecTy; + unsigned align; + if (failed(getLLVMTypeAndAlignment(typeConverter, xferOp.getVectorType(), + vecTy, align))) + return failure(); + rewriter.replaceOpWithNewOp(xferOp, adaptor.vector(), dataPtr); + return success(); +} - auto xferOp = cast(op); +LogicalResult replaceTransferOpWithMasked(ConversionPatternRewriter &rewriter, + LLVMTypeConverter &typeConverter, + Location loc, TransferWriteOp xferOp, + ArrayRef operands, + Value dataPtr, Value mask) { + auto adaptor = TransferWriteOpOperandAdaptor(operands); LLVM::LLVMType vecTy; unsigned align; if (failed(getLLVMTypeAndAlignment(typeConverter, xferOp.getVectorType(), @@ -803,7 +823,8 @@ LogicalResult replaceTransferOp( return failure(); rewriter.replaceOpWithNewOp( - op, adaptor.vector(), dataPtr, mask, rewriter.getI32IntegerAttr(align)); + xferOp, adaptor.vector(), dataPtr, mask, + rewriter.getI32IntegerAttr(align)); return success(); } @@ -877,6 +898,10 @@ class VectorTransferConversion : public ConvertToLLVMPattern { vectorDataPtr = rewriter.create( loc, vecTy.getPointerTo(), dataPtr); + if (!xferOp.isMaskedDim(0)) + return replaceTransferOpWithLoadOrStore(rewriter, typeConverter, loc, + xferOp, operands, vectorDataPtr); + // 2. Create a vector with linear indices [ 0 .. vector_length - 1 ]. unsigned vecWidth = vecTy.getVectorNumElements(); VectorType vectorCmpType = VectorType::get(vecWidth, i64Type); @@ -910,8 +935,8 @@ class VectorTransferConversion : public ConvertToLLVMPattern { mask); // 5. Rewrite as a masked read / write. - return replaceTransferOp(rewriter, typeConverter, loc, op, - operands, vectorDataPtr, mask); + return replaceTransferOpWithMasked(rewriter, typeConverter, loc, xferOp, + operands, vectorDataPtr, mask); } }; diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp index c9cd605afb84cd..03b78491fa1222 100644 --- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp +++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp @@ -157,25 +157,34 @@ void NDTransferOpHelper::emitInBounds( ValueRange majorIvs, ValueRange majorOffsets, MemRefBoundsCapture &memrefBounds, LambdaThen thenBlockBuilder, LambdaElse elseBlockBuilder) { - Value inBounds = std_constant_int(/*value=*/1, /*width=*/1); + Value inBounds; SmallVector majorIvsPlusOffsets; majorIvsPlusOffsets.reserve(majorIvs.size()); + unsigned idx = 0; for (auto it : llvm::zip(majorIvs, majorOffsets, memrefBounds.getUbs())) { Value iv = std::get<0>(it), off = std::get<1>(it), ub = std::get<2>(it); using namespace mlir::edsc::op; majorIvsPlusOffsets.push_back(iv + off); - Value inBounds2 = majorIvsPlusOffsets.back() < ub; - inBounds = inBounds && inBounds2; + if (xferOp.isMaskedDim(leadingRank + idx)) { + Value inBounds2 = majorIvsPlusOffsets.back() < ub; + inBounds = (inBounds) ? (inBounds && inBounds2) : inBounds2; + } + ++idx; } - auto ifOp = ScopedContext::getBuilderRef().create( - ScopedContext::getLocation(), TypeRange{}, inBounds, - /*withElseRegion=*/std::is_same()); - BlockBuilder(&ifOp.thenRegion().front(), - Append())([&] { thenBlockBuilder(majorIvsPlusOffsets); }); - if (std::is_same()) - BlockBuilder(&ifOp.elseRegion().front(), - Append())([&] { elseBlockBuilder(majorIvsPlusOffsets); }); + if (inBounds) { + auto ifOp = ScopedContext::getBuilderRef().create( + ScopedContext::getLocation(), TypeRange{}, inBounds, + /*withElseRegion=*/std::is_same()); + BlockBuilder(&ifOp.thenRegion().front(), + Append())([&] { thenBlockBuilder(majorIvsPlusOffsets); }); + if (std::is_same()) + BlockBuilder(&ifOp.elseRegion().front(), + Append())([&] { elseBlockBuilder(majorIvsPlusOffsets); }); + } else { + // Just build the body of the then block right here. + thenBlockBuilder(majorIvsPlusOffsets); + } } template <> @@ -187,18 +196,23 @@ LogicalResult NDTransferOpHelper::doReplace() { MemRefBoundsCapture &memrefBounds) { // If in-bounds, index into memref and lower to 1-D transfer read. auto thenBlockBuilder = [&](ValueRange majorIvsPlusOffsets) { - auto map = AffineMap::getMinorIdentityMap( - xferOp.getMemRefType().getRank(), minorRank, xferOp.getContext()); - // Lower to 1-D vector_transfer_read and let recursion handle it. - Value memref = xferOp.memref(); SmallVector indexing; indexing.reserve(leadingRank + majorRank + minorRank); indexing.append(leadingOffsets.begin(), leadingOffsets.end()); indexing.append(majorIvsPlusOffsets.begin(), majorIvsPlusOffsets.end()); indexing.append(minorOffsets.begin(), minorOffsets.end()); - auto loaded1D = - vector_transfer_read(minorVectorType, memref, indexing, - AffineMapAttr::get(map), xferOp.padding()); + + Value memref = xferOp.memref(); + auto map = TransferReadOp::getTransferMinorIdentityMap( + xferOp.getMemRefType(), minorVectorType); + ArrayAttr masked; + if (xferOp.isMaskedDim(xferOp.getVectorType().getRank() - 1)) { + OpBuilder &b = ScopedContext::getBuilderRef(); + masked = b.getBoolArrayAttr({true}); + } + auto loaded1D = vector_transfer_read(minorVectorType, memref, indexing, + AffineMapAttr::get(map), + xferOp.padding(), masked); // Store the 1-D vector. std_store(loaded1D, alloc, majorIvs); }; @@ -229,17 +243,22 @@ LogicalResult NDTransferOpHelper::doReplace() { ValueRange majorOffsets, ValueRange minorOffsets, MemRefBoundsCapture &memrefBounds) { auto thenBlockBuilder = [&](ValueRange majorIvsPlusOffsets) { - // Lower to 1-D vector_transfer_write and let recursion handle it. - Value loaded1D = std_load(alloc, majorIvs); - auto map = AffineMap::getMinorIdentityMap( - xferOp.getMemRefType().getRank(), minorRank, xferOp.getContext()); SmallVector indexing; indexing.reserve(leadingRank + majorRank + minorRank); indexing.append(leadingOffsets.begin(), leadingOffsets.end()); indexing.append(majorIvsPlusOffsets.begin(), majorIvsPlusOffsets.end()); indexing.append(minorOffsets.begin(), minorOffsets.end()); + // Lower to 1-D vector_transfer_write and let recursion handle it. + Value loaded1D = std_load(alloc, majorIvs); + auto map = TransferWriteOp::getTransferMinorIdentityMap( + xferOp.getMemRefType(), minorVectorType); + ArrayAttr masked; + if (xferOp.isMaskedDim(xferOp.getVectorType().getRank() - 1)) { + OpBuilder &b = ScopedContext::getBuilderRef(); + masked = b.getBoolArrayAttr({true}); + } vector_transfer_write(loaded1D, xferOp.memref(), indexing, - AffineMapAttr::get(map)); + AffineMapAttr::get(map), masked); }; // Don't write anything when out of bounds. auto elseBlockBuilder = [&](ValueRange majorIvsPlusOffsets) {}; diff --git a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp index fe669624f6cb62..f5b98f9bf0653d 100644 --- a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp @@ -793,10 +793,7 @@ static LogicalResult vectorizeRootOrTerminal(Value iv, LLVM_DEBUG(permutationMap.print(dbgs())); auto transfer = b.create( opInst->getLoc(), vectorType, memoryOp.getMemRef(), indices, - AffineMapAttr::get(permutationMap), - // TODO(b/144455320) add a proper padding value, not just 0.0 : f32 - state->folder->create(b, opInst->getLoc(), - APFloat(0.0f), b.getF32Type())); + permutationMap); state->registerReplacement(opInst, transfer.getOperation()); } else { state->registerTerminal(opInst); @@ -1020,8 +1017,7 @@ static Operation *vectorizeOneOperation(Operation *opInst, LLVM_DEBUG(dbgs() << "\n[early-vect]+++++ permutationMap: "); LLVM_DEBUG(permutationMap.print(dbgs())); auto transfer = b.create( - opInst->getLoc(), vectorValue, memRef, indices, - AffineMapAttr::get(permutationMap)); + opInst->getLoc(), vectorValue, memRef, indices, permutationMap); auto *res = transfer.getOperation(); LLVM_DEBUG(dbgs() << "\n[early-vect]+++++ vectorized store: " << *res); // "Terminals" (i.e. AffineStoreOps) are erased on the spot. diff --git a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp index 5cbaa2f426dbcf..44de2a1021c274 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp @@ -56,6 +56,8 @@ struct LinalgOpInstancePromotionOptions { const LinalgPromotionOptions &options); /// SubViews to promote. SetVector subViews; + /// True if the full view should be used for the promoted buffer. + DenseMap useFullTileBuffers; /// Allow the use of dynamicaly-sized buffers. bool dynamicBuffers; /// Alignment of promoted buffer. @@ -65,20 +67,28 @@ struct LinalgOpInstancePromotionOptions { LinalgOpInstancePromotionOptions::LinalgOpInstancePromotionOptions( LinalgOp linalgOp, const LinalgPromotionOptions &options) - : subViews(), dynamicBuffers(options.dynamicBuffers), + : subViews(), useFullTileBuffers(), dynamicBuffers(options.dynamicBuffers), alignment(options.alignment) { + unsigned nBuffers = linalgOp.getNumInputsAndOutputBuffers(); + auto vUseFullTileBuffers = + options.useFullTileBuffers.getValueOr(llvm::SmallBitVector()); + vUseFullTileBuffers.resize(nBuffers, options.useFullTileBuffersDefault); + if (options.operandsToPromote.hasValue()) { - for (unsigned idx : options.operandsToPromote.getValue()) { - auto *op = linalgOp.getBuffer(idx).getDefiningOp(); - if (auto sv = dyn_cast_or_null(op)) + for (auto it : llvm::enumerate(options.operandsToPromote.getValue())) { + auto *op = linalgOp.getBuffer(it.value()).getDefiningOp(); + if (auto sv = dyn_cast_or_null(op)) { subViews.insert(sv); + useFullTileBuffers[sv] = vUseFullTileBuffers[it.index()]; + } } } else { - unsigned nBuffers = linalgOp.getNumInputsAndOutputBuffers(); for (unsigned idx = 0; idx < nBuffers; ++idx) { auto *op = linalgOp.getBuffer(idx).getDefiningOp(); - if (auto sv = dyn_cast_or_null(op)) + if (auto sv = dyn_cast_or_null(op)) { subViews.insert(sv); + useFullTileBuffers[sv] = vUseFullTileBuffers[idx]; + } } } } @@ -201,6 +211,9 @@ promoteSubViews(OpBuilder &b, Location loc, auto info = promotionInfoMap.find(v); if (info == promotionInfoMap.end()) continue; + // Only fill the buffer if the full local view is used + if (!options.useFullTileBuffers[v]) + continue; Value fillVal; if (auto t = subView.getType().getElementType().dyn_cast()) fillVal = folded_std_constant(folder, FloatAttr::get(t, 0.0)); @@ -244,7 +257,10 @@ static void promoteSubViews(OpBuilder &b, LinalgOp op, unsigned promotedIdx = 0; for (auto view : op.getInputsAndOutputBuffers()) { if (options.subViews.count(view) != 0) { - opViews.push_back(promotedBufferAndViews[promotedIdx].fullLocalView); + if (options.useFullTileBuffers[view]) + opViews.push_back(promotedBufferAndViews[promotedIdx].fullLocalView); + else + opViews.push_back(promotedBufferAndViews[promotedIdx].partialLocalView); writebackViews.emplace_back(std::make_pair( view, promotedBufferAndViews[promotedIdx].partialLocalView)); promotedIdx++; diff --git a/mlir/lib/Dialect/Vector/VectorOps.cpp b/mlir/lib/Dialect/Vector/VectorOps.cpp index 8385b253e9fb60..f347a564f446b8 100644 --- a/mlir/lib/Dialect/Vector/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/VectorOps.cpp @@ -1202,6 +1202,23 @@ void ExtractStridedSliceOp::getCanonicalizationPatterns( //===----------------------------------------------------------------------===// // TransferReadOp //===----------------------------------------------------------------------===// + +/// Build the default minor identity map suitable for a vector transfer. This +/// also handles the case memref<... x vector<...>> -> vector<...> in which the +/// rank of the identity map must take the vector element type into account. +AffineMap +mlir::vector::impl::getTransferMinorIdentityMap(MemRefType memRefType, + VectorType vectorType) { + int64_t elementVectorRank = 0; + VectorType elementVectorType = + memRefType.getElementType().dyn_cast(); + if (elementVectorType) + elementVectorRank += elementVectorType.getRank(); + return AffineMap::getMinorIdentityMap( + memRefType.getRank(), vectorType.getRank() - elementVectorRank, + memRefType.getContext()); +} + template static LogicalResult verifyPermutationMap(AffineMap permutationMap, EmitFun emitOpError) { @@ -1233,7 +1250,8 @@ static LogicalResult verifyPermutationMap(AffineMap permutationMap, static LogicalResult verifyTransferOp(Operation *op, MemRefType memrefType, VectorType vectorType, - AffineMap permutationMap) { + AffineMap permutationMap, + ArrayAttr optionalMasked) { auto memrefElementType = memrefType.getElementType(); if (auto memrefVectorElementType = memrefElementType.dyn_cast()) { // Memref has vector element type. @@ -1281,28 +1299,68 @@ static LogicalResult verifyTransferOp(Operation *op, MemRefType memrefType, if (permutationMap.getNumInputs() != memrefType.getRank()) return op->emitOpError("requires a permutation_map with input dims of the " "same rank as the memref type"); + + if (optionalMasked) { + if (permutationMap.getNumResults() != + static_cast(optionalMasked.size())) + return op->emitOpError("expects the optional masked attr of same rank as " + "permutation_map results: ") + << AffineMapAttr::get(permutationMap); + } + return success(); } -/// Builder that sets permutation map and padding to 'getMinorIdentityMap' and -/// zero, respectively, by default. +/// Builder that sets padding to zero. void TransferReadOp::build(OpBuilder &builder, OperationState &result, - VectorType vector, Value memref, - ValueRange indices) { - auto permMap = AffineMap::getMinorIdentityMap( - memref.getType().cast().getRank(), vector.getRank(), - builder.getContext()); + VectorType vector, Value memref, ValueRange indices, + AffineMap permutationMap, + ArrayRef maybeMasked) { Type elemType = vector.cast().getElementType(); Value padding = builder.create(result.location, elemType, builder.getZeroAttr(elemType)); + if (maybeMasked.empty()) + return build(builder, result, vector, memref, indices, permutationMap, + padding, ArrayAttr()); + ArrayAttr maskedArrayAttr = builder.getBoolArrayAttr(maybeMasked); + build(builder, result, vector, memref, indices, permutationMap, padding, + maskedArrayAttr); +} - build(builder, result, vector, memref, indices, permMap, padding); +/// Builder that sets permutation map (resp. padding) to 'getMinorIdentityMap' +/// (resp. zero). +void TransferReadOp::build(OpBuilder &builder, OperationState &result, + VectorType vectorType, Value memref, + ValueRange indices, ArrayRef maybeMasked) { + auto permMap = getTransferMinorIdentityMap( + memref.getType().cast(), vectorType); + build(builder, result, vectorType, memref, indices, permMap, maybeMasked); +} + +template +void printTransferAttrs(OpAsmPrinter &p, TransferOp op) { + SmallVector elidedAttrs; + if (op.permutation_map() == TransferOp::getTransferMinorIdentityMap( + op.getMemRefType(), op.getVectorType())) + elidedAttrs.push_back(op.getPermutationMapAttrName()); + bool elideMasked = true; + if (auto maybeMasked = op.masked()) { + for (auto attr : *maybeMasked) { + if (!attr.template cast().getValue()) { + elideMasked = false; + break; + } + } + } + if (elideMasked) + elidedAttrs.push_back(op.getMaskedAttrName()); + p.printOptionalAttrDict(op.getAttrs(), elidedAttrs); } static void print(OpAsmPrinter &p, TransferReadOp op) { p << op.getOperationName() << " " << op.memref() << "[" << op.indices() - << "], " << op.padding() << " "; - p.printOptionalAttrDict(op.getAttrs()); + << "], " << op.padding(); + printTransferAttrs(p, op); p << " : " << op.getMemRefType() << ", " << op.getVectorType(); } @@ -1313,7 +1371,7 @@ static ParseResult parseTransferReadOp(OpAsmParser &parser, SmallVector indexInfo; OpAsmParser::OperandType paddingInfo; SmallVector types; - // Parsing with support for optional paddingValue. + // Parsing with support for paddingValue. if (parser.parseOperand(memrefInfo) || parser.parseOperandList(indexInfo, OpAsmParser::Delimiter::Square) || parser.parseComma() || parser.parseOperand(paddingInfo) || @@ -1321,12 +1379,21 @@ static ParseResult parseTransferReadOp(OpAsmParser &parser, parser.getCurrentLocation(&typesLoc) || parser.parseColonTypeList(types)) return failure(); if (types.size() != 2) - return parser.emitError(typesLoc, "two types required"); + return parser.emitError(typesLoc, "requires two types"); auto indexType = parser.getBuilder().getIndexType(); MemRefType memRefType = types[0].dyn_cast(); if (!memRefType) - return parser.emitError(typesLoc, "memref type required"), failure(); - Type vectorType = types[1]; + return parser.emitError(typesLoc, "requires memref type"); + VectorType vectorType = types[1].dyn_cast(); + if (!vectorType) + return parser.emitError(typesLoc, "requires vector type"); + auto permutationAttrName = TransferReadOp::getPermutationMapAttrName(); + auto attr = result.attributes.get(permutationAttrName); + if (!attr) { + auto permMap = + TransferReadOp::getTransferMinorIdentityMap(memRefType, vectorType); + result.attributes.set(permutationAttrName, AffineMapAttr::get(permMap)); + } return failure( parser.resolveOperand(memrefInfo, memRefType, result.operands) || parser.resolveOperands(indexInfo, indexType, result.operands) || @@ -1347,7 +1414,8 @@ static LogicalResult verify(TransferReadOp op) { return op.emitOpError("requires ") << memrefType.getRank() << " indices"; if (failed(verifyTransferOp(op.getOperation(), memrefType, vectorType, - permutationMap))) + permutationMap, + op.masked() ? *op.masked() : ArrayAttr()))) return failure(); if (auto memrefVectorElementType = memrefElementType.dyn_cast()) { @@ -1376,15 +1444,67 @@ static LogicalResult verify(TransferReadOp op) { // TransferWriteOp //===----------------------------------------------------------------------===// -/// Builder that sets permutation map and padding to 'getMinorIdentityMap' by -/// default. +/// Builder that sets permutation map to 'getMinorIdentityMap'. void TransferWriteOp::build(OpBuilder &builder, OperationState &result, - Value vector, Value memref, ValueRange indices) { + Value vector, Value memref, ValueRange indices, + ArrayRef maybeMasked) { auto vectorType = vector.getType().cast(); - auto permMap = AffineMap::getMinorIdentityMap( - memref.getType().cast().getRank(), vectorType.getRank(), - builder.getContext()); - build(builder, result, vector, memref, indices, permMap); + auto permMap = getTransferMinorIdentityMap( + memref.getType().cast(), vectorType); + if (maybeMasked.empty()) + return build(builder, result, vector, memref, indices, permMap, + ArrayAttr()); + ArrayAttr maskedArrayAttr = builder.getBoolArrayAttr(maybeMasked); + build(builder, result, vector, memref, indices, permMap, maskedArrayAttr); +} + +/// Builder that sets permutation map to 'getMinorIdentityMap'. +void TransferWriteOp::build(OpBuilder &builder, OperationState &result, + Value vector, Value memref, ValueRange indices, + AffineMap permutationMap) { + build(builder, result, vector, memref, indices, + /*maybeMasked=*/ArrayRef{}); +} + +static ParseResult parseTransferWriteOp(OpAsmParser &parser, + OperationState &result) { + llvm::SMLoc typesLoc; + OpAsmParser::OperandType vectorInfo, memrefInfo; + SmallVector indexInfo; + SmallVector types; + if (parser.parseOperand(vectorInfo) || parser.parseComma() || + parser.parseOperand(memrefInfo) || + parser.parseOperandList(indexInfo, OpAsmParser::Delimiter::Square) || + parser.parseOptionalAttrDict(result.attributes) || + parser.getCurrentLocation(&typesLoc) || parser.parseColonTypeList(types)) + return failure(); + if (types.size() != 2) + return parser.emitError(typesLoc, "requires two types"); + auto indexType = parser.getBuilder().getIndexType(); + VectorType vectorType = types[0].dyn_cast(); + if (!vectorType) + return parser.emitError(typesLoc, "requires vector type"); + MemRefType memRefType = types[1].dyn_cast(); + if (!memRefType) + return parser.emitError(typesLoc, "requires memref type"); + auto permutationAttrName = TransferWriteOp::getPermutationMapAttrName(); + auto attr = result.attributes.get(permutationAttrName); + if (!attr) { + auto permMap = + TransferWriteOp::getTransferMinorIdentityMap(memRefType, vectorType); + result.attributes.set(permutationAttrName, AffineMapAttr::get(permMap)); + } + return failure( + parser.resolveOperand(vectorInfo, vectorType, result.operands) || + parser.resolveOperand(memrefInfo, memRefType, result.operands) || + parser.resolveOperands(indexInfo, indexType, result.operands)); +} + +static void print(OpAsmPrinter &p, TransferWriteOp op) { + p << op.getOperationName() << " " << op.vector() << ", " << op.memref() << "[" + << op.indices() << "]"; + printTransferAttrs(p, op); + p << " : " << op.getVectorType() << ", " << op.getMemRefType(); } static LogicalResult verify(TransferWriteOp op) { @@ -1397,7 +1517,8 @@ static LogicalResult verify(TransferWriteOp op) { return op.emitOpError("requires ") << memrefType.getRank() << " indices"; if (failed(verifyTransferOp(op.getOperation(), memrefType, vectorType, - permutationMap))) + permutationMap, + op.masked() ? *op.masked() : ArrayAttr()))) return failure(); return verifyPermutationMap(permutationMap, diff --git a/mlir/lib/Dialect/Vector/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/VectorTransforms.cpp index af7e5ad86af854..cf1bdede90271e 100644 --- a/mlir/lib/Dialect/Vector/VectorTransforms.cpp +++ b/mlir/lib/Dialect/Vector/VectorTransforms.cpp @@ -564,9 +564,12 @@ struct SplitTransferReadOp : public OpRewritePattern { // Get VectorType for slice 'i'. auto sliceVectorType = resultTupleType.getType(index); // Create split TransferReadOp for 'sliceUser'. + // `masked` attribute propagates conservatively: if the coarse op didn't + // need masking, the fine op doesn't either. vectorTupleValues[index] = rewriter.create( loc, sliceVectorType, xferReadOp.memref(), sliceIndices, - xferReadOp.permutation_map(), xferReadOp.padding()); + xferReadOp.permutation_map(), xferReadOp.padding(), + xferReadOp.masked() ? *xferReadOp.masked() : ArrayAttr()); }; generateTransferOpSlices(memrefElementType, sourceVectorType, resultTupleType, sizes, strides, indices, rewriter, @@ -620,9 +623,12 @@ struct SplitTransferWriteOp : public OpRewritePattern { xferWriteOp.indices().end()); auto createSlice = [&](unsigned index, ArrayRef sliceIndices) { // Create split TransferWriteOp for source vector 'tupleOp.operand[i]'. + // `masked` attribute propagates conservatively: if the coarse op didn't + // need masking, the fine op doesn't either. rewriter.create( loc, tupleOp.getOperand(index), xferWriteOp.memref(), sliceIndices, - xferWriteOp.permutation_map()); + xferWriteOp.permutation_map(), + xferWriteOp.masked() ? *xferWriteOp.masked() : ArrayAttr()); }; generateTransferOpSlices(memrefElementType, resultVectorType, sourceTupleType, sizes, strides, indices, rewriter, diff --git a/mlir/test/Conversion/AffineToStandard/lower-affine-to-vector.mlir b/mlir/test/Conversion/AffineToStandard/lower-affine-to-vector.mlir index f9a78aa495a5c3..7fba0996d8f560 100644 --- a/mlir/test/Conversion/AffineToStandard/lower-affine-to-vector.mlir +++ b/mlir/test/Conversion/AffineToStandard/lower-affine-to-vector.mlir @@ -1,6 +1,5 @@ // RUN: mlir-opt -lower-affine --split-input-file %s | FileCheck %s -// CHECK: #[[perm_map:.*]] = affine_map<(d0) -> (d0)> // CHECK-LABEL: func @affine_vector_load func @affine_vector_load(%arg0 : index) { %0 = alloc() : memref<100xf32> @@ -12,13 +11,12 @@ func @affine_vector_load(%arg0 : index) { // CHECK-NEXT: %[[c7:.*]] = constant 7 : index // CHECK-NEXT: %[[b:.*]] = addi %[[a]], %[[c7]] : index // CHECK-NEXT: %[[pad:.*]] = constant 0.0 -// CHECK-NEXT: vector.transfer_read %[[buf]][%[[b]]], %[[pad]] {permutation_map = #[[perm_map]]} : memref<100xf32>, vector<8xf32> +// CHECK-NEXT: vector.transfer_read %[[buf]][%[[b]]], %[[pad]] : memref<100xf32>, vector<8xf32> return } // ----- -// CHECK: #[[perm_map:.*]] = affine_map<(d0) -> (d0)> // CHECK-LABEL: func @affine_vector_store func @affine_vector_store(%arg0 : index) { %0 = alloc() : memref<100xf32> @@ -33,13 +31,12 @@ func @affine_vector_store(%arg0 : index) { // CHECK-NEXT: %[[b:.*]] = addi %{{.*}}, %[[a]] : index // CHECK-NEXT: %[[c7:.*]] = constant 7 : index // CHECK-NEXT: %[[c:.*]] = addi %[[b]], %[[c7]] : index -// CHECK-NEXT: vector.transfer_write %[[val]], %[[buf]][%[[c]]] {permutation_map = #[[perm_map]]} : vector<4xf32>, memref<100xf32> +// CHECK-NEXT: vector.transfer_write %[[val]], %[[buf]][%[[c]]] : vector<4xf32>, memref<100xf32> return } // ----- -// CHECK: #[[perm_map:.*]] = affine_map<(d0) -> (d0)> // CHECK-LABEL: func @affine_vector_load func @affine_vector_load(%arg0 : index) { %0 = alloc() : memref<100xf32> @@ -51,13 +48,12 @@ func @affine_vector_load(%arg0 : index) { // CHECK-NEXT: %[[c7:.*]] = constant 7 : index // CHECK-NEXT: %[[b:.*]] = addi %[[a]], %[[c7]] : index // CHECK-NEXT: %[[pad:.*]] = constant 0.0 -// CHECK-NEXT: vector.transfer_read %[[buf]][%[[b]]], %[[pad]] {permutation_map = #[[perm_map]]} : memref<100xf32>, vector<8xf32> +// CHECK-NEXT: vector.transfer_read %[[buf]][%[[b]]], %[[pad]] : memref<100xf32>, vector<8xf32> return } // ----- -// CHECK: #[[perm_map:.*]] = affine_map<(d0) -> (d0)> // CHECK-LABEL: func @affine_vector_store func @affine_vector_store(%arg0 : index) { %0 = alloc() : memref<100xf32> @@ -72,13 +68,12 @@ func @affine_vector_store(%arg0 : index) { // CHECK-NEXT: %[[b:.*]] = addi %{{.*}}, %[[a]] : index // CHECK-NEXT: %[[c7:.*]] = constant 7 : index // CHECK-NEXT: %[[c:.*]] = addi %[[b]], %[[c7]] : index -// CHECK-NEXT: vector.transfer_write %[[val]], %[[buf]][%[[c]]] {permutation_map = #[[perm_map]]} : vector<4xf32>, memref<100xf32> +// CHECK-NEXT: vector.transfer_write %[[val]], %[[buf]][%[[c]]] : vector<4xf32>, memref<100xf32> return } // ----- -// CHECK: #[[perm_map:.*]] = affine_map<(d0, d1) -> (d0, d1)> // CHECK-LABEL: func @vector_load_2d func @vector_load_2d() { %0 = alloc() : memref<100x100xf32> @@ -89,7 +84,7 @@ func @vector_load_2d() { // CHECK: scf.for %[[i0:.*]] = // CHECK: scf.for %[[i1:.*]] = // CHECK-NEXT: %[[pad:.*]] = constant 0.0 -// CHECK-NEXT: vector.transfer_read %[[buf]][%[[i0]], %[[i1]]], %[[pad]] {permutation_map = #[[perm_map]]} : memref<100x100xf32>, vector<2x8xf32> +// CHECK-NEXT: vector.transfer_read %[[buf]][%[[i0]], %[[i1]]], %[[pad]] : memref<100x100xf32>, vector<2x8xf32> } } return @@ -97,7 +92,6 @@ func @vector_load_2d() { // ----- -// CHECK: #[[perm_map:.*]] = affine_map<(d0, d1) -> (d0, d1)> // CHECK-LABEL: func @vector_store_2d func @vector_store_2d() { %0 = alloc() : memref<100x100xf32> @@ -109,7 +103,7 @@ func @vector_store_2d() { // CHECK: %[[val:.*]] = constant dense // CHECK: scf.for %[[i0:.*]] = // CHECK: scf.for %[[i1:.*]] = -// CHECK-NEXT: vector.transfer_write %[[val]], %[[buf]][%[[i0]], %[[i1]]] {permutation_map = #[[perm_map]]} : vector<2x8xf32>, memref<100x100xf32> +// CHECK-NEXT: vector.transfer_write %[[val]], %[[buf]][%[[i0]], %[[i1]]] : vector<2x8xf32>, memref<100x100xf32> } } return diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir index 1c23072b61092a..26e3e9dbe2b1e5 100644 --- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir +++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir @@ -918,6 +918,24 @@ func @transfer_read_1d_non_zero_addrspace(%A : memref, %base: index) - // CHECK: %[[vecPtr_b:.*]] = llvm.addrspacecast %[[gep_b]] : // CHECK-SAME: !llvm<"float addrspace(3)*"> to !llvm<"<17 x float>*"> +func @transfer_read_1d_not_masked(%A : memref, %base: index) -> vector<17xf32> { + %f7 = constant 7.0: f32 + %f = vector.transfer_read %A[%base], %f7 {masked = [false]} : + memref, vector<17xf32> + return %f: vector<17xf32> +} +// CHECK-LABEL: func @transfer_read_1d_not_masked +// CHECK-SAME: %[[BASE:[a-zA-Z0-9]*]]: !llvm.i64) -> !llvm<"<17 x float>"> +// +// 1. Bitcast to vector form. +// CHECK: %[[gep:.*]] = llvm.getelementptr {{.*}} : +// CHECK-SAME: (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> +// CHECK: %[[vecPtr:.*]] = llvm.bitcast %[[gep]] : +// CHECK-SAME: !llvm<"float*"> to !llvm<"<17 x float>*"> +// +// 2. Rewrite as a load. +// CHECK: %[[loaded:.*]] = llvm.load %[[vecPtr]] : !llvm<"<17 x float>*"> + func @genbool_1d() -> vector<8xi1> { %0 = vector.constant_mask [4] : vector<8xi1> return %0 : vector<8xi1> diff --git a/mlir/test/Conversion/VectorToLoops/vector-to-loops.mlir b/mlir/test/Conversion/VectorToLoops/vector-to-loops.mlir index 491196c91efb0f..c0bc5542e21d2a 100644 --- a/mlir/test/Conversion/VectorToLoops/vector-to-loops.mlir +++ b/mlir/test/Conversion/VectorToLoops/vector-to-loops.mlir @@ -220,16 +220,14 @@ func @transfer_read_progressive(%A : memref, %base: index) -> vector<17 // CHECK: %[[cst:.*]] = constant 7.000000e+00 : f32 %f7 = constant 7.0: f32 - // CHECK-DAG: %[[cond0:.*]] = constant 1 : i1 // CHECK-DAG: %[[splat:.*]] = constant dense<7.000000e+00> : vector<15xf32> // CHECK-DAG: %[[alloc:.*]] = alloc() : memref<17xvector<15xf32>> // CHECK-DAG: %[[dim:.*]] = dim %[[A]], 0 : memref // CHECK: affine.for %[[I:.*]] = 0 to 17 { // CHECK: %[[add:.*]] = affine.apply #[[MAP0]](%[[I]])[%[[base]]] - // CHECK: %[[cmp:.*]] = cmpi "slt", %[[add]], %[[dim]] : index - // CHECK: %[[cond1:.*]] = and %[[cmp]], %[[cond0]] : i1 + // CHECK: %[[cond1:.*]] = cmpi "slt", %[[add]], %[[dim]] : index // CHECK: scf.if %[[cond1]] { - // CHECK: %[[vec_1d:.*]] = vector.transfer_read %[[A]][%[[add]], %[[base]]], %[[cst]] {permutation_map = #[[MAP1]]} : memref, vector<15xf32> + // CHECK: %[[vec_1d:.*]] = vector.transfer_read %[[A]][%[[add]], %[[base]]], %[[cst]] : memref, vector<15xf32> // CHECK: store %[[vec_1d]], %[[alloc]][%[[I]]] : memref<17xvector<15xf32>> // CHECK: } else { // CHECK: store %[[splat]], %[[alloc]][%[[I]]] : memref<17xvector<15xf32>> @@ -253,7 +251,6 @@ func @transfer_read_progressive(%A : memref, %base: index) -> vector<17 // CHECK-SAME: %[[base:[a-zA-Z0-9]+]]: index, // CHECK-SAME: %[[vec:[a-zA-Z0-9]+]]: vector<17x15xf32> func @transfer_write_progressive(%A : memref, %base: index, %vec: vector<17x15xf32>) { - // CHECK: %[[cond0:.*]] = constant 1 : i1 // CHECK: %[[alloc:.*]] = alloc() : memref<17xvector<15xf32>> // CHECK: %[[vmemref:.*]] = vector.type_cast %[[alloc]] : memref<17xvector<15xf32>> to memref> // CHECK: store %[[vec]], %[[vmemref]][] : memref> @@ -261,13 +258,35 @@ func @transfer_write_progressive(%A : memref, %base: index, %vec: vecto // CHECK: affine.for %[[I:.*]] = 0 to 17 { // CHECK: %[[add:.*]] = affine.apply #[[MAP0]](%[[I]])[%[[base]]] // CHECK: %[[cmp:.*]] = cmpi "slt", %[[add]], %[[dim]] : index - // CHECK: %[[cond1:.*]] = and %[[cmp]], %[[cond0]] : i1 - // CHECK: scf.if %[[cond1]] { + // CHECK: scf.if %[[cmp]] { // CHECK: %[[vec_1d:.*]] = load %0[%[[I]]] : memref<17xvector<15xf32>> - // CHECK: vector.transfer_write %[[vec_1d]], %[[A]][%[[add]], %[[base]]] {permutation_map = #[[MAP1]]} : vector<15xf32>, memref + // CHECK: vector.transfer_write %[[vec_1d]], %[[A]][%[[add]], %[[base]]] : vector<15xf32>, memref // CHECK: } vector.transfer_write %vec, %A[%base, %base] {permutation_map = affine_map<(d0, d1) -> (d0, d1)>} : vector<17x15xf32>, memref return } + +// ----- + +// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0] -> (d0 + s0)> +// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0, d1) -> (d1)> + +// CHECK-LABEL: transfer_write_progressive_not_masked( +// CHECK-SAME: %[[A:[a-zA-Z0-9]+]]: memref, +// CHECK-SAME: %[[base:[a-zA-Z0-9]+]]: index, +// CHECK-SAME: %[[vec:[a-zA-Z0-9]+]]: vector<17x15xf32> +func @transfer_write_progressive_not_masked(%A : memref, %base: index, %vec: vector<17x15xf32>) { + // CHECK-NOT: scf.if + // CHECK-NEXT: %[[alloc:.*]] = alloc() : memref<17xvector<15xf32>> + // CHECK-NEXT: %[[vmemref:.*]] = vector.type_cast %[[alloc]] : memref<17xvector<15xf32>> to memref> + // CHECK-NEXT: store %[[vec]], %[[vmemref]][] : memref> + // CHECK-NEXT: affine.for %[[I:.*]] = 0 to 17 { + // CHECK-NEXT: %[[add:.*]] = affine.apply #[[MAP0]](%[[I]])[%[[base]]] + // CHECK-NEXT: %[[vec_1d:.*]] = load %0[%[[I]]] : memref<17xvector<15xf32>> + // CHECK-NEXT: vector.transfer_write %[[vec_1d]], %[[A]][%[[add]], %[[base]]] : vector<15xf32>, memref + vector.transfer_write %vec, %A[%base, %base] {masked = [false, false]} : + vector<17x15xf32>, memref + return +} diff --git a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir index b577e229ba7639..10bf5009d5f630 100644 --- a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir +++ b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir @@ -2,7 +2,6 @@ // Permutation maps used in vectorization. // CHECK: #[[map_proj_d0d1_0:map[0-9]+]] = affine_map<(d0, d1) -> (0)> -// CHECK: #[[map_proj_d0d1_d1:map[0-9]+]] = affine_map<(d0, d1) -> (d1)> #map0 = affine_map<(d0) -> (d0)> #mapadd1 = affine_map<(d0) -> (d0 + 1)> @@ -13,7 +12,6 @@ // Maps introduced to vectorize fastest varying memory index. // CHECK-LABEL: func @vec1d_1 func @vec1d_1(%A : memref, %B : memref) { -// CHECK-DAG: %{{.*}} = constant 0.0{{.*}}: f32 // CHECK-DAG: %[[C0:[a-z0-9_]+]] = constant 0 : index // CHECK-DAG: [[ARG_M:%[0-9]+]] = dim %{{.*}}, 0 : memref // CHECK-DAG: [[ARG_N:%[0-9]+]] = dim %{{.*}}, 1 : memref @@ -22,10 +20,11 @@ func @vec1d_1(%A : memref, %B : memref) { %N = dim %A, 1 : memref %P = dim %B, 2 : memref %cst0 = constant 0 : index -// + // CHECK: for {{.*}} step 128 // CHECK-NEXT: %{{.*}} = affine.apply #map0(%[[C0]]) // CHECK-NEXT: %{{.*}} = affine.apply #map0(%[[C0]]) +// CHECK-NEXT: %{{.*}} = constant 0.0{{.*}}: f32 // CHECK-NEXT: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1_0]]} : memref, vector<128xf32> affine.for %i0 = 0 to %M { // vectorized due to scalar -> vector %a0 = affine.load %A[%cst0, %cst0] : memref @@ -35,7 +34,6 @@ func @vec1d_1(%A : memref, %B : memref) { // CHECK-LABEL: func @vec1d_2 func @vec1d_2(%A : memref, %B : memref) { -// CHECK-DAG: %{{.*}} = constant 0.0{{.*}}: f32 // CHECK-DAG: %[[C0:[a-z0-9_]+]] = constant 0 : index // CHECK-DAG: [[ARG_M:%[0-9]+]] = dim %{{.*}}, 0 : memref // CHECK-DAG: [[ARG_N:%[0-9]+]] = dim %{{.*}}, 1 : memref @@ -46,7 +44,8 @@ func @vec1d_2(%A : memref, %B : memref) { %cst0 = constant 0 : index // // CHECK:for [[IV3:%[a-zA-Z0-9]+]] = 0 to [[ARG_M]] step 128 -// CHECK-NEXT: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1_d1]]} : memref, vector<128xf32> +// CHECK-NEXT: %[[CST:.*]] = constant 0.0{{.*}}: f32 +// CHECK-NEXT: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}], %[[CST]] : memref, vector<128xf32> affine.for %i3 = 0 to %M { // vectorized %a3 = affine.load %A[%cst0, %i3] : memref } @@ -55,7 +54,6 @@ func @vec1d_2(%A : memref, %B : memref) { // CHECK-LABEL: func @vec1d_3 func @vec1d_3(%A : memref, %B : memref) { -// CHECK-DAG: %{{.*}} = constant 0.0{{.*}}: f32 // CHECK-DAG: %[[C0:[a-z0-9_]+]] = constant 0 : index // CHECK-DAG: [[ARG_M:%[0-9]+]] = dim %arg0, 0 : memref // CHECK-DAG: [[ARG_N:%[0-9]+]] = dim %arg0, 1 : memref @@ -69,7 +67,8 @@ func @vec1d_3(%A : memref, %B : memref) { // CHECK-NEXT: for [[IV9:%[arg0-9]*]] = 0 to [[ARG_N]] { // CHECK-NEXT: %[[APP9_0:[0-9]+]] = affine.apply {{.*}}([[IV9]], [[IV8]]) // CHECK-NEXT: %[[APP9_1:[0-9]+]] = affine.apply {{.*}}([[IV9]], [[IV8]]) -// CHECK-NEXT: {{.*}} = vector.transfer_read %{{.*}}[%[[APP9_0]], %[[APP9_1]]], %{{.*}} {permutation_map = #[[map_proj_d0d1_d1]]} : memref, vector<128xf32> +// CHECK-NEXT: %[[CST:.*]] = constant 0.0{{.*}}: f32 +// CHECK-NEXT: {{.*}} = vector.transfer_read %{{.*}}[%[[APP9_0]], %[[APP9_1]]], %[[CST]] : memref, vector<128xf32> affine.for %i8 = 0 to %M { // vectorized affine.for %i9 = 0 to %N { %a9 = affine.load %A[%i9, %i8 + %i9] : memref @@ -87,31 +86,31 @@ func @vector_add_2d(%M : index, %N : index) -> f32 { %f2 = constant 2.0 : f32 affine.for %i0 = 0 to %M { affine.for %i1 = 0 to %N { - // CHECK: [[C1:%.*]] = constant dense<1.000000e+00> : vector<128xf32> - // CHECK: vector.transfer_write [[C1]], {{.*}} {permutation_map = #[[map_proj_d0d1_d1]]} : vector<128xf32>, memref + // CHECK: %[[C1:.*]] = constant dense<1.000000e+00> : vector<128xf32> + // CHECK: vector.transfer_write %[[C1]], {{.*}} : vector<128xf32>, memref // non-scoped %f1 affine.store %f1, %A[%i0, %i1] : memref } } affine.for %i2 = 0 to %M { affine.for %i3 = 0 to %N { - // CHECK: [[C3:%.*]] = constant dense<2.000000e+00> : vector<128xf32> - // CHECK: vector.transfer_write [[C3]], {{.*}} {permutation_map = #[[map_proj_d0d1_d1]]} : vector<128xf32>, memref + // CHECK: %[[C3:.*]] = constant dense<2.000000e+00> : vector<128xf32> + // CHECK: vector.transfer_write %[[C3]], {{.*}} : vector<128xf32>, memref // non-scoped %f2 affine.store %f2, %B[%i2, %i3] : memref } } affine.for %i4 = 0 to %M { affine.for %i5 = 0 to %N { - // CHECK: [[A5:%.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1_d1]]} : memref, vector<128xf32> - // CHECK: [[B5:%.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1_d1]]} : memref, vector<128xf32> - // CHECK: [[S5:%.*]] = addf [[A5]], [[B5]] : vector<128xf32> - // CHECK: [[SPLAT1:%.*]] = constant dense<1.000000e+00> : vector<128xf32> - // CHECK: [[S6:%.*]] = addf [[S5]], [[SPLAT1]] : vector<128xf32> - // CHECK: [[SPLAT2:%.*]] = constant dense<2.000000e+00> : vector<128xf32> - // CHECK: [[S7:%.*]] = addf [[S5]], [[SPLAT2]] : vector<128xf32> - // CHECK: [[S8:%.*]] = addf [[S7]], [[S6]] : vector<128xf32> - // CHECK: vector.transfer_write [[S8]], {{.*}} {permutation_map = #[[map_proj_d0d1_d1]]} : vector<128xf32>, memref + // CHECK: %[[A5:.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{[a-zA-Z0-9_]*}} : memref, vector<128xf32> + // CHECK: %[[B5:.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{[a-zA-Z0-9_]*}} : memref, vector<128xf32> + // CHECK: %[[S5:.*]] = addf %[[A5]], %[[B5]] : vector<128xf32> + // CHECK: %[[SPLAT1:.*]] = constant dense<1.000000e+00> : vector<128xf32> + // CHECK: %[[S6:.*]] = addf %[[S5]], %[[SPLAT1]] : vector<128xf32> + // CHECK: %[[SPLAT2:.*]] = constant dense<2.000000e+00> : vector<128xf32> + // CHECK: %[[S7:.*]] = addf %[[S5]], %[[SPLAT2]] : vector<128xf32> + // CHECK: %[[S8:.*]] = addf %[[S7]], %[[S6]] : vector<128xf32> + // CHECK: vector.transfer_write %[[S8]], {{.*}} : vector<128xf32>, memref %a5 = affine.load %A[%i4, %i5] : memref %b5 = affine.load %B[%i4, %i5] : memref %s5 = addf %a5, %b5 : f32 @@ -168,7 +167,6 @@ func @vec_rejected_2(%A : memref, %B : memref) { // CHECK-LABEL: func @vec_rejected_3 func @vec_rejected_3(%A : memref, %B : memref) { -// CHECK-DAG: %{{.*}} = constant 0.0{{.*}}: f32 // CHECK-DAG: [[C0:%[a-z0-9_]+]] = constant 0 : index // CHECK-DAG: [[ARG_M:%[0-9]+]] = dim %{{.*}}, 0 : memref // CHECK-DAG: [[ARG_N:%[0-9]+]] = dim %{{.*}}, 1 : memref @@ -180,7 +178,8 @@ func @vec_rejected_3(%A : memref, %B : memref) { // // CHECK:for [[IV4:%[arg0-9]+]] = 0 to [[ARG_M]] step 128 { // CHECK-NEXT: for [[IV5:%[arg0-9]*]] = 0 to [[ARG_N]] { -// CHECK-NEXT: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1_d1]]} : memref, vector<128xf32> +// CHECK-NEXT: %{{.*}} = constant 0.0{{.*}}: f32 +// CHECK-NEXT: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}], %{{[a-zA-Z0-9_]*}} : memref, vector<128xf32> affine.for %i4 = 0 to %M { // vectorized affine.for %i5 = 0 to %N { // not vectorized, would vectorize with --test-fastest-varying=1 %a5 = affine.load %A[%i5, %i4] : memref @@ -277,7 +276,6 @@ func @vec_rejected_7(%A : memref, %B : memref) { // CHECK-LABEL: func @vec_rejected_8 func @vec_rejected_8(%A : memref, %B : memref) { -// CHECK-DAG: %{{.*}} = constant 0.0{{.*}}: f32 // CHECK-DAG: %[[C0:[a-z0-9_]+]] = constant 0 : index // CHECK-DAG: [[ARG_M:%[0-9]+]] = dim %{{.*}}, 0 : memref // CHECK-DAG: [[ARG_N:%[0-9]+]] = dim %{{.*}}, 1 : memref @@ -291,6 +289,7 @@ func @vec_rejected_8(%A : memref, %B : memref) { // CHECK: for [[IV18:%[a-zA-Z0-9]+]] = 0 to [[ARG_M]] step 128 // CHECK: %{{.*}} = affine.apply #map0(%{{.*}}) // CHECK: %{{.*}} = affine.apply #map0(%{{.*}}) +// CHECK: %{{.*}} = constant 0.0{{.*}}: f32 // CHECK: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1_0]]} : memref, vector<128xf32> affine.for %i17 = 0 to %M { // not vectorized, the 1-D pattern that matched %{{.*}} in DFS post-order prevents vectorizing %{{.*}} affine.for %i18 = 0 to %M { // vectorized due to scalar -> vector @@ -302,7 +301,6 @@ func @vec_rejected_8(%A : memref, %B : memref) { // CHECK-LABEL: func @vec_rejected_9 func @vec_rejected_9(%A : memref, %B : memref) { -// CHECK-DAG: %{{.*}} = constant 0.0{{.*}}: f32 // CHECK-DAG: %[[C0:[a-z0-9_]+]] = constant 0 : index // CHECK-DAG: [[ARG_M:%[0-9]+]] = dim %{{.*}}, 0 : memref // CHECK-DAG: [[ARG_N:%[0-9]+]] = dim %{{.*}}, 1 : memref @@ -316,6 +314,7 @@ func @vec_rejected_9(%A : memref, %B : memref) { // CHECK: for [[IV18:%[a-zA-Z0-9]+]] = 0 to [[ARG_M]] step 128 // CHECK: %{{.*}} = affine.apply #map0(%{{.*}}) // CHECK-NEXT: %{{.*}} = affine.apply #map0(%{{.*}}) +// CHECK-NEXT: %{{.*}} = constant 0.0{{.*}}: f32 // CHECK-NEXT: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1_0]]} : memref, vector<128xf32> affine.for %i17 = 0 to %M { // not vectorized, the 1-D pattern that matched %i18 in DFS post-order prevents vectorizing %{{.*}} affine.for %i18 = 0 to %M { // vectorized due to scalar -> vector diff --git a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_2d.mlir b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_2d.mlir index 884907024bb115..3352644da63d8b 100644 --- a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_2d.mlir +++ b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_2d.mlir @@ -54,7 +54,7 @@ func @vector_add_2d(%M : index, %N : index) -> f32 { affine.for %i0 = 0 to %M { affine.for %i1 = 0 to %N { // CHECK: [[C1:%.*]] = constant dense<1.000000e+00> : vector<32x256xf32> - // CHECK: vector.transfer_write [[C1]], {{.*}} {permutation_map = #[[map_id2]]} : vector<32x256xf32>, memref + // CHECK: vector.transfer_write [[C1]], {{.*}} : vector<32x256xf32>, memref // non-scoped %f1 affine.store %f1, %A[%i0, %i1] : memref } @@ -62,22 +62,22 @@ func @vector_add_2d(%M : index, %N : index) -> f32 { affine.for %i2 = 0 to %M { affine.for %i3 = 0 to %N { // CHECK: [[C3:%.*]] = constant dense<2.000000e+00> : vector<32x256xf32> - // CHECK: vector.transfer_write [[C3]], {{.*}} {permutation_map = #[[map_id2]]} : vector<32x256xf32>, memref + // CHECK: vector.transfer_write [[C3]], {{.*}} : vector<32x256xf32>, memref // non-scoped %f2 affine.store %f2, %B[%i2, %i3] : memref } } affine.for %i4 = 0 to %M { affine.for %i5 = 0 to %N { - // CHECK: [[A5:%.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{.*}} {permutation_map = #[[map_id2]]} : memref, vector<32x256xf32> - // CHECK: [[B5:%.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{.*}} {permutation_map = #[[map_id2]]} : memref, vector<32x256xf32> + // CHECK: [[A5:%.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{.*}} : memref, vector<32x256xf32> + // CHECK: [[B5:%.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{.*}} : memref, vector<32x256xf32> // CHECK: [[S5:%.*]] = addf [[A5]], [[B5]] : vector<32x256xf32> // CHECK: [[SPLAT1:%.*]] = constant dense<1.000000e+00> : vector<32x256xf32> // CHECK: [[S6:%.*]] = addf [[S5]], [[SPLAT1]] : vector<32x256xf32> // CHECK: [[SPLAT2:%.*]] = constant dense<2.000000e+00> : vector<32x256xf32> // CHECK: [[S7:%.*]] = addf [[S5]], [[SPLAT2]] : vector<32x256xf32> // CHECK: [[S8:%.*]] = addf [[S7]], [[S6]] : vector<32x256xf32> - // CHECK: vector.transfer_write [[S8]], {{.*}} {permutation_map = #[[map_id2]]} : vector<32x256xf32>, memref + // CHECK: vector.transfer_write [[S8]], {{.*}} : vector<32x256xf32>, memref // %a5 = affine.load %A[%i4, %i5] : memref %b5 = affine.load %B[%i4, %i5] : memref @@ -110,7 +110,7 @@ func @vectorize_matmul(%arg0: memref, %arg1: memref, %arg2: me // VECT: {{.*}} #[[map_id1]](%[[M]]) step 4 { // VECT-NEXT: {{.*}} #[[map_id1]](%[[N]]) step 8 { // VECT: %[[VC0:.*]] = constant dense<0.000000e+00> : vector<4x8xf32> - // VECT-NEXT: vector.transfer_write %[[VC0]], %{{.*}}[%{{.*}}, %{{.*}}] {permutation_map = #[[map_id2]]} : vector<4x8xf32>, memref + // VECT-NEXT: vector.transfer_write %[[VC0]], %{{.*}}[%{{.*}}, %{{.*}}] : vector<4x8xf32>, memref affine.for %i0 = affine_map<(d0) -> (d0)>(%c0) to affine_map<(d0) -> (d0)>(%M) { affine.for %i1 = affine_map<(d0) -> (d0)>(%c0) to affine_map<(d0) -> (d0)>(%N) { %cst = constant 0.000000e+00 : f32 @@ -120,12 +120,12 @@ func @vectorize_matmul(%arg0: memref, %arg1: memref, %arg2: me // VECT: affine.for %[[I2:.*]] = #[[map_id1]](%[[C0]]) to #[[map_id1]](%[[M]]) step 4 { // VECT-NEXT: affine.for %[[I3:.*]] = #[[map_id1]](%[[C0]]) to #[[map_id1]](%[[N]]) step 8 { // VECT-NEXT: affine.for %[[I4:.*]] = #map5(%[[C0]]) to #[[map_id1]](%[[K]]) { - // VECT-NEXT: %[[A:.*]] = vector.transfer_read %{{.*}}[%[[I4]], %[[I3]]], %{{.*}} {permutation_map = #[[map_proj_d0d1_zerod1]]} : memref, vector<4x8xf32> - // VECT-NEXT: %[[B:.*]] = vector.transfer_read %{{.*}}[%[[I2]], %[[I4]]], %{{.*}} {permutation_map = #[[map_proj_d0d1_d0zero]]} : memref, vector<4x8xf32> + // VECT: %[[A:.*]] = vector.transfer_read %{{.*}}[%[[I4]], %[[I3]]], %{{.*}} {permutation_map = #[[map_proj_d0d1_zerod1]]} : memref, vector<4x8xf32> + // VECT: %[[B:.*]] = vector.transfer_read %{{.*}}[%[[I2]], %[[I4]]], %{{.*}} {permutation_map = #[[map_proj_d0d1_d0zero]]} : memref, vector<4x8xf32> // VECT-NEXT: %[[C:.*]] = mulf %[[B]], %[[A]] : vector<4x8xf32> - // VECT-NEXT: %[[D:.*]] = vector.transfer_read %{{.*}}[%[[I2]], %[[I3]]], %{{.*}} {permutation_map = #[[map_id2]]} : memref, vector<4x8xf32> + // VECT: %[[D:.*]] = vector.transfer_read %{{.*}}[%[[I2]], %[[I3]]], %{{.*}} : memref, vector<4x8xf32> // VECT-NEXT: %[[E:.*]] = addf %[[D]], %[[C]] : vector<4x8xf32> - // VECT-NEXT: vector.transfer_write %[[E]], %{{.*}}[%[[I2]], %[[I3]]] {permutation_map = #[[map_id2]]} : vector<4x8xf32>, memref + // VECT: vector.transfer_write %[[E]], %{{.*}}[%[[I2]], %[[I3]]] : vector<4x8xf32>, memref affine.for %i2 = affine_map<(d0) -> (d0)>(%c0) to affine_map<(d0) -> (d0)>(%M) { affine.for %i3 = affine_map<(d0) -> (d0)>(%c0) to affine_map<(d0) -> (d0)>(%N) { affine.for %i4 = affine_map<(d0) -> (d0)>(%c0) to affine_map<(d0) -> (d0)>(%K) { diff --git a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_3d.mlir b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_3d.mlir index 2980ee30d90868..5b6517ea390e5d 100644 --- a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_3d.mlir +++ b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_3d.mlir @@ -12,7 +12,7 @@ func @vec3d(%A : memref) { // CHECK: affine.for %{{.*}} = 0 to %{{.*}} step 32 { // CHECK: affine.for %{{.*}} = 0 to %{{.*}} step 64 { // CHECK: affine.for %{{.*}} = 0 to %{{.*}} step 256 { - // CHECK: %{{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1d2_d0d1d2]]} : memref, vector<32x64x256xf32> + // CHECK: %{{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}], %{{.*}} : memref, vector<32x64x256xf32> affine.for %t0 = 0 to %0 { affine.for %t1 = 0 to %0 { affine.for %i0 = 0 to %0 { diff --git a/mlir/test/Dialect/Linalg/promote.mlir b/mlir/test/Dialect/Linalg/promote.mlir index 64534733846a13..27364b05f3bd90 100644 --- a/mlir/test/Dialect/Linalg/promote.mlir +++ b/mlir/test/Dialect/Linalg/promote.mlir @@ -56,14 +56,11 @@ func @matmul_f32(%A: memref, %M: index, %N: index, %K: index) { // DYNAMIC: std.view %{{.*}}[{{.*}}][{{.*}}] : memref to memref // CHECK: %[[partialC:.*]] = subview %[[fullC]]{{.*}} : memref to memref -// CHECK: linalg.fill(%[[fullA]], {{.*}}) : memref, f32 -// CHECK: linalg.fill(%[[fullB]], {{.*}}) : memref, f32 -// CHECK: linalg.fill(%[[fullC]], {{.*}}) : memref, f32 // CHECK: linalg.copy(%[[vA]], %[[partialA]]) : memref, memref // CHECK: linalg.copy(%[[vB]], %[[partialB]]) : memref, memref // CHECK: linalg.copy(%[[vC]], %[[partialC]]) : memref, memref // -// CHECK: linalg.matmul(%[[fullA]], %[[fullB]], %[[fullC]]) : memref, memref, memref +// CHECK: linalg.matmul(%[[partialA]], %[[partialB]], %[[partialC]]) : memref, memref, memref // // CHECK: linalg.copy(%[[partialC]], %[[vC]]) : memref, memref // @@ -121,14 +118,11 @@ func @matmul_f64(%A: memref, %M: index, %N: index, %K: index) { // DYNAMIC: std.view %{{.*}}[{{.*}}][{{.*}}] : memref to memref // CHECK: %[[partialC_f64:.*]] = subview %[[fullC_f64]][%{{.*}}, %{{.*}}] : memref to memref -// CHECK: linalg.fill(%[[fullA_f64]], {{.*}}) : memref, f64 -// CHECK: linalg.fill(%[[fullB_f64]], {{.*}}) : memref, f64 -// CHECK: linalg.fill(%[[fullC_f64]], {{.*}}) : memref, f64 // CHECK: linalg.copy(%[[vA_f64]], %[[partialA_f64]]) : memref, memref // CHECK: linalg.copy(%[[vB_f64]], %[[partialB_f64]]) : memref, memref // CHECK: linalg.copy(%[[vC_f64]], %[[partialC_f64]]) : memref, memref // -// CHECK: linalg.matmul(%[[fullA_f64]], %[[fullB_f64]], %[[fullC_f64]]) : memref, memref, memref +// CHECK: linalg.matmul(%[[partialA_f64]], %[[partialB_f64]], %[[partialC_f64]]) : memref, memref, memref // // CHECK: linalg.copy(%[[partialC_f64]], %[[vC_f64]]) : memref, memref // @@ -186,14 +180,11 @@ func @matmul_i32(%A: memref, %M: index, %N: index, %K: index) { // DYNAMIC: std.view %{{.*}}[{{.*}}][{{.*}}] : memref to memref // CHECK: %[[partialC_i32:.*]] = subview %[[fullC_i32]][%{{.*}}, %{{.*}}] : memref to memref -// CHECK: linalg.fill(%[[fullA_i32]], {{.*}}) : memref, i32 -// CHECK: linalg.fill(%[[fullB_i32]], {{.*}}) : memref, i32 -// CHECK: linalg.fill(%[[fullC_i32]], {{.*}}) : memref, i32 // CHECK: linalg.copy(%[[vA_i32]], %[[partialA_i32]]) : memref, memref // CHECK: linalg.copy(%[[vB_i32]], %[[partialB_i32]]) : memref, memref // CHECK: linalg.copy(%[[vC_i32]], %[[partialC_i32]]) : memref, memref // -// CHECK: linalg.matmul(%[[fullA_i32]], %[[fullB_i32]], %[[fullC_i32]]) : memref, memref, memref +// CHECK: linalg.matmul(%[[partialA_i32]], %[[partialB_i32]], %[[partialC_i32]]) : memref, memref, memref // // CHECK: linalg.copy(%[[partialC_i32]], %[[vC_i32]]) : memref, memref // diff --git a/mlir/test/Dialect/Vector/invalid.mlir b/mlir/test/Dialect/Vector/invalid.mlir index ab50c566f9b3a1..c18cf38edfc90f 100644 --- a/mlir/test/Dialect/Vector/invalid.mlir +++ b/mlir/test/Dialect/Vector/invalid.mlir @@ -238,17 +238,28 @@ func @outerproduct_operand_3_result_type_generic(%arg0: vector<4xf32>, %arg1: ve func @test_vector.transfer_read(%arg0: memref) { %c3 = constant 3 : index %cst = constant 3.0 : f32 - // expected-error@+1 {{two types required}} + // expected-error@+1 {{requires two types}} %0 = vector.transfer_read %arg0[%c3, %c3], %cst { permutation_map = affine_map<()->(0)> } : memref } // ----- -func @test_vector.transfer_read(%arg0: memref) { +func @test_vector.transfer_read(%arg0: vector<4x3xf32>) { %c3 = constant 3 : index - %cst = constant 3.0 : f32 - // expected-error@+1 {{requires 2 indices}} - %0 = vector.transfer_read %arg0[%c3, %c3, %c3], %cst { permutation_map = affine_map<()->(0)> } : memref, vector<128xf32> + %f0 = constant 0.0 : f32 + %vf0 = splat %f0 : vector<4x3xf32> + // expected-error@+1 {{ requires memref type}} + %0 = vector.transfer_read %arg0[%c3, %c3], %vf0 : vector<4x3xf32>, vector<1x1x2x3xf32> +} + +// ----- + +func @test_vector.transfer_read(%arg0: memref<4x3xf32>) { + %c3 = constant 3 : index + %f0 = constant 0.0 : f32 + %vf0 = splat %f0 : vector<4x3xf32> + // expected-error@+1 {{ requires vector type}} + %0 = vector.transfer_read %arg0[%c3, %c3], %vf0 : memref<4x3xf32>, f32 } // ----- @@ -256,8 +267,8 @@ func @test_vector.transfer_read(%arg0: memref) { func @test_vector.transfer_read(%arg0: memref) { %c3 = constant 3 : index %cst = constant 3.0 : f32 - // expected-error@+1 {{requires attribute 'permutation_map'}} - %0 = vector.transfer_read %arg0[%c3, %c3], %cst {perm = affine_map<(d0)->(d0)>} : memref, vector<128xf32> + // expected-error@+1 {{requires 2 indices}} + %0 = vector.transfer_read %arg0[%c3, %c3, %c3], %cst { permutation_map = affine_map<()->(0)> } : memref, vector<128xf32> } // ----- @@ -337,11 +348,41 @@ func @test_vector.transfer_read(%arg0: memref>) { // ----- +func @test_vector.transfer_read(%arg0: memref>) { + %c3 = constant 3 : index + %f0 = constant 0.0 : f32 + %vf0 = splat %f0 : vector<2x3xf32> + // expected-error@+1 {{ expects the optional masked attr of same rank as permutation_map results: affine_map<(d0, d1) -> (d0, d1)>}} + %0 = vector.transfer_read %arg0[%c3, %c3], %vf0 {masked = [false], permutation_map = affine_map<(d0, d1)->(d0, d1)>} : memref>, vector<1x1x2x3xf32> +} + +// ----- + func @test_vector.transfer_write(%arg0: memref) { %c3 = constant 3 : index - %cst = constant dense<3.0> : vector<128 x f32> - // expected-error@+1 {{expected 5 operand types but had 4}} - %0 = "vector.transfer_write"(%cst, %arg0, %c3, %c3, %c3) {permutation_map = affine_map<()->(0)>} : (vector<128xf32>, memref, index, index) -> () + %cst = constant 3.0 : f32 + // expected-error@+1 {{requires two types}} + vector.transfer_write %arg0, %arg0[%c3, %c3] : memref +} + +// ----- + +func @test_vector.transfer_write(%arg0: memref>) { + %c3 = constant 3 : index + %f0 = constant 0.0 : f32 + %vf0 = splat %f0 : vector<4x3xf32> + // expected-error@+1 {{ requires vector type}} + vector.transfer_write %arg0, %arg0[%c3, %c3] : memref>, vector<4x3xf32> +} + +// ----- + +func @test_vector.transfer_write(%arg0: vector<4x3xf32>) { + %c3 = constant 3 : index + %f0 = constant 0.0 : f32 + %vf0 = splat %f0 : vector<4x3xf32> + // expected-error@+1 {{ requires memref type}} + vector.transfer_write %arg0, %arg0[%c3, %c3] : vector<4x3xf32>, f32 } // ----- @@ -349,8 +390,8 @@ func @test_vector.transfer_write(%arg0: memref) { func @test_vector.transfer_write(%arg0: memref) { %c3 = constant 3 : index %cst = constant dense<3.0> : vector<128 x f32> - // expected-error@+1 {{requires 2 indices}} - vector.transfer_write %cst, %arg0[%c3, %c3, %c3] {permutation_map = affine_map<()->(0)>} : vector<128xf32>, memref + // expected-error@+1 {{expected 5 operand types but had 4}} + %0 = "vector.transfer_write"(%cst, %arg0, %c3, %c3, %c3) {permutation_map = affine_map<()->(0)>} : (vector<128xf32>, memref, index, index) -> () } // ----- @@ -358,8 +399,8 @@ func @test_vector.transfer_write(%arg0: memref) { func @test_vector.transfer_write(%arg0: memref) { %c3 = constant 3 : index %cst = constant dense<3.0> : vector<128 x f32> - // expected-error@+1 {{requires attribute 'permutation_map'}} - vector.transfer_write %cst, %arg0[%c3, %c3] {perm = affine_map<(d0)->(d0)>} : vector<128xf32>, memref + // expected-error@+1 {{requires 2 indices}} + vector.transfer_write %cst, %arg0[%c3, %c3, %c3] {permutation_map = affine_map<()->(0)>} : vector<128xf32>, memref } // ----- diff --git a/mlir/test/Dialect/Vector/ops.mlir b/mlir/test/Dialect/Vector/ops.mlir index 73690d6ebcc86a..c194cbe2381172 100644 --- a/mlir/test/Dialect/Vector/ops.mlir +++ b/mlir/test/Dialect/Vector/ops.mlir @@ -20,15 +20,19 @@ func @vector_transfer_ops(%arg0: memref, %2 = vector.transfer_read %arg0[%c3, %c3], %cst {permutation_map = affine_map<(d0, d1)->(d0)>} : memref, vector<128xf32> // CHECK: vector.transfer_read %3 = vector.transfer_read %arg0[%c3, %c3], %cst {permutation_map = affine_map<(d0, d1)->(d1)>} : memref, vector<128xf32> - // CHECK: vector.transfer_read %{{.*}}[%[[C3]], %[[C3]]], %{{.*}} {permutation_map = #[[MAP0]]} : memref>, vector<1x1x4x3xf32> + // CHECK: vector.transfer_read %{{.*}}[%[[C3]], %[[C3]]], %{{.*}} : memref>, vector<1x1x4x3xf32> %4 = vector.transfer_read %arg1[%c3, %c3], %vf0 {permutation_map = affine_map<(d0, d1)->(d0, d1)>} : memref>, vector<1x1x4x3xf32> + // CHECK: vector.transfer_read %{{.*}}[%[[C3]], %[[C3]]], %{{.*}} {masked = [true, false]} : memref>, vector<1x1x4x3xf32> + %5 = vector.transfer_read %arg1[%c3, %c3], %vf0 {masked = [true, false]} : memref>, vector<1x1x4x3xf32> // CHECK: vector.transfer_write vector.transfer_write %0, %arg0[%c3, %c3] {permutation_map = affine_map<(d0, d1)->(d0)>} : vector<128xf32>, memref // CHECK: vector.transfer_write vector.transfer_write %1, %arg0[%c3, %c3] {permutation_map = affine_map<(d0, d1)->(d1, d0)>} : vector<3x7xf32>, memref - // CHECK: vector.transfer_write %{{.*}}, %{{.*}}[%[[C3]], %[[C3]]] {permutation_map = #[[MAP0]]} : vector<1x1x4x3xf32>, memref> + // CHECK: vector.transfer_write %{{.*}}, %{{.*}}[%[[C3]], %[[C3]]] : vector<1x1x4x3xf32>, memref> vector.transfer_write %4, %arg1[%c3, %c3] {permutation_map = affine_map<(d0, d1)->(d0, d1)>} : vector<1x1x4x3xf32>, memref> + // CHECK: vector.transfer_write %{{.*}}, %{{.*}}[%[[C3]], %[[C3]]] : vector<1x1x4x3xf32>, memref> + vector.transfer_write %5, %arg1[%c3, %c3] {masked = [true, true]} : vector<1x1x4x3xf32>, memref> return } diff --git a/mlir/test/Dialect/Vector/vector-transforms.mlir b/mlir/test/Dialect/Vector/vector-transforms.mlir index 2e4e9033fb81e6..8de153adf73108 100644 --- a/mlir/test/Dialect/Vector/vector-transforms.mlir +++ b/mlir/test/Dialect/Vector/vector-transforms.mlir @@ -231,26 +231,26 @@ func @contraction4x4_ikj(%arg0 : vector<4x2xf32>, %arg1 : vector<2x4xf32>, // Check LHS vector.transfer read is split for each user. -// CHECK: %[[VTR0:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %{{.*}} {permutation_map = #[[MAP0]]} : memref<4x2xf32>, vector<2x2xf32> -// CHECK-NEXT: %[[VTR1:.*]] = vector.transfer_read %{{.*}}[%[[C2]], %[[C0]]], %{{.*}} {permutation_map = #[[MAP0]]} : memref<4x2xf32>, vector<2x2xf32> +// CHECK: %[[VTR0:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %{{.*}} : memref<4x2xf32>, vector<2x2xf32> +// CHECK-NEXT: %[[VTR1:.*]] = vector.transfer_read %{{.*}}[%[[C2]], %[[C0]]], %{{.*}} : memref<4x2xf32>, vector<2x2xf32> -// CHECK-NEXT: %[[VTR2:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %{{.*}} {permutation_map = #[[MAP0]]} : memref<2x4xf32>, vector<2x2xf32> -// CHECK-NEXT: %[[VTR3:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C2]]], %{{.*}} {permutation_map = #[[MAP0]]} : memref<2x4xf32>, vector<2x2xf32> +// CHECK-NEXT: %[[VTR2:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %{{.*}} : memref<2x4xf32>, vector<2x2xf32> +// CHECK-NEXT: %[[VTR3:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C2]]], %{{.*}} : memref<2x4xf32>, vector<2x2xf32> -// CHECK-NEXT: %[[VTR4:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %{{.*}} {permutation_map = #[[MAP0]]} : memref<4x4xf32>, vector<2x2xf32> -// CHECK-NEXT: %[[VTR5:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C2]]], %{{.*}} {permutation_map = #[[MAP0]]} : memref<4x4xf32>, vector<2x2xf32> -// CHECK-NEXT: %[[VTR6:.*]] = vector.transfer_read %{{.*}}[%[[C2]], %[[C0]]], %{{.*}} {permutation_map = #[[MAP0]]} : memref<4x4xf32>, vector<2x2xf32> -// CHECK-NEXT: %[[VTR7:.*]] = vector.transfer_read %{{.*}}[%[[C2]], %[[C2]]], %{{.*}} {permutation_map = #[[MAP0]]} : memref<4x4xf32>, vector<2x2xf32> +// CHECK-NEXT: %[[VTR4:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %{{.*}} : memref<4x4xf32>, vector<2x2xf32> +// CHECK-NEXT: %[[VTR5:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C2]]], %{{.*}} : memref<4x4xf32>, vector<2x2xf32> +// CHECK-NEXT: %[[VTR6:.*]] = vector.transfer_read %{{.*}}[%[[C2]], %[[C0]]], %{{.*}} : memref<4x4xf32>, vector<2x2xf32> +// CHECK-NEXT: %[[VTR7:.*]] = vector.transfer_read %{{.*}}[%[[C2]], %[[C2]]], %{{.*}} : memref<4x4xf32>, vector<2x2xf32> // CHECK-NEXT: %[[R0:.*]] = vector.contract {indexing_maps = [#map2, #map3, #map0], iterator_types = ["parallel", "reduction", "parallel"]} %[[VTR0]], %[[VTR2]], %[[VTR4]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32> // CHECK-NEXT: %[[R1:.*]] = vector.contract {indexing_maps = [#map2, #map3, #map0], iterator_types = ["parallel", "reduction", "parallel"]} %[[VTR0]], %[[VTR3]], %[[VTR5]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32> // CHECK-NEXT: %[[R2:.*]] = vector.contract {indexing_maps = [#map2, #map3, #map0], iterator_types = ["parallel", "reduction", "parallel"]} %[[VTR1]], %[[VTR2]], %[[VTR6]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32> // CHECK-NEXT: %[[R3:.*]] = vector.contract {indexing_maps = [#map2, #map3, #map0], iterator_types = ["parallel", "reduction", "parallel"]} %[[VTR1]], %[[VTR3]], %[[VTR7]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32> -// CHECK-NEXT: vector.transfer_write %[[R0]], %{{.*}}[%[[C0]], %[[C0]]] {permutation_map = #[[MAP0]]} : vector<2x2xf32>, memref<4x4xf32> -// CHECK-NEXT: vector.transfer_write %[[R1]], %{{.*}}[%[[C0]], %[[C2]]] {permutation_map = #[[MAP0]]} : vector<2x2xf32>, memref<4x4xf32> -// CHECK-NEXT: vector.transfer_write %[[R2]], %{{.*}}[%[[C2]], %[[C0]]] {permutation_map = #[[MAP0]]} : vector<2x2xf32>, memref<4x4xf32> -// CHECK-NEXT: vector.transfer_write %[[R3]], %{{.*}}[%[[C2]], %[[C2]]] {permutation_map = #[[MAP0]]} : vector<2x2xf32>, memref<4x4xf32> +// CHECK-NEXT: vector.transfer_write %[[R0]], %{{.*}}[%[[C0]], %[[C0]]] : vector<2x2xf32>, memref<4x4xf32> +// CHECK-NEXT: vector.transfer_write %[[R1]], %{{.*}}[%[[C0]], %[[C2]]] : vector<2x2xf32>, memref<4x4xf32> +// CHECK-NEXT: vector.transfer_write %[[R2]], %{{.*}}[%[[C2]], %[[C0]]] : vector<2x2xf32>, memref<4x4xf32> +// CHECK-NEXT: vector.transfer_write %[[R3]], %{{.*}}[%[[C2]], %[[C2]]] : vector<2x2xf32>, memref<4x4xf32> // CHECK-NEXT: return func @contraction4x4_ikj_xfer_read(%arg0 : memref<4x2xf32>, @@ -425,10 +425,10 @@ func @cancelling_shape_cast_ops(%arg0 : vector<2x4xf32>) -> vector<2x4xf32> { // CHECK-LABEL: func @vector_transfers_vector_element_type // CHECK: %[[C0:.*]] = constant 0 : index // CHECK: %[[C1:.*]] = constant 1 : index -// CHECK: %[[VTR0:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]], %[[C0]]], %{{.*}} {permutation_map = #[[MAP1]]} : memref<6x2x1xvector<2x4xf32>>, vector<1x1x2x4xf32> -// CHECK-NEXT: %[[VTR1:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C1]], %[[C0]]], %{{.*}} {permutation_map = #[[MAP1]]} : memref<6x2x1xvector<2x4xf32>>, vector<1x1x2x4xf32> -// CHECK-NEXT: vector.transfer_write %[[VTR0]], %{{.*}}[%[[C0]], %[[C0]], %[[C0]]] {permutation_map = #[[MAP1]]} : vector<1x1x2x4xf32>, memref<6x2x1xvector<2x4xf32>> -// CHECK-NEXT: vector.transfer_write %[[VTR1]], %{{.*}}[%[[C0]], %[[C1]], %[[C0]]] {permutation_map = #[[MAP1]]} : vector<1x1x2x4xf32>, memref<6x2x1xvector<2x4xf32>> +// CHECK: %[[VTR0:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]], %[[C0]]], %{{.*}} : memref<6x2x1xvector<2x4xf32>>, vector<1x1x2x4xf32> +// CHECK-NEXT: %[[VTR1:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C1]], %[[C0]]], %{{.*}} : memref<6x2x1xvector<2x4xf32>>, vector<1x1x2x4xf32> +// CHECK-NEXT: vector.transfer_write %[[VTR0]], %{{.*}}[%[[C0]], %[[C0]], %[[C0]]] : vector<1x1x2x4xf32>, memref<6x2x1xvector<2x4xf32>> +// CHECK-NEXT: vector.transfer_write %[[VTR1]], %{{.*}}[%[[C0]], %[[C1]], %[[C0]]] : vector<1x1x2x4xf32>, memref<6x2x1xvector<2x4xf32>> func @vector_transfers_vector_element_type() { %c0 = constant 0 : index diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td index f3bbf0e50dca70..eb2ff83fdddfb8 100644 --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -1214,9 +1214,15 @@ def FormatAttrOp : TEST_Op<"format_attr_op"> { let assemblyFormat = "$attr attr-dict"; } +// Test that we elide optional attributes that are within the syntax. +def FormatOptAttrOp : TEST_Op<"format_opt_attr_op"> { + let arguments = (ins OptionalAttr:$opt_attr); + let assemblyFormat = "(`(`$opt_attr^`)`)? attr-dict"; +} + // Test that we elide attributes that are within the syntax. def FormatAttrDictWithKeywordOp : TEST_Op<"format_attr_dict_w_keyword"> { - let arguments = (ins I64Attr:$attr); + let arguments = (ins I64Attr:$attr, OptionalAttr:$opt_attr); let assemblyFormat = "attr-dict-with-keyword"; } diff --git a/mlir/test/lib/Transforms/TestLinalgTransforms.cpp b/mlir/test/lib/Transforms/TestLinalgTransforms.cpp index 0390ac945d2f7d..87191d3e87d2b4 100644 --- a/mlir/test/lib/Transforms/TestLinalgTransforms.cpp +++ b/mlir/test/lib/Transforms/TestLinalgTransforms.cpp @@ -132,13 +132,20 @@ static void applyPatterns(FuncOp funcOp) { // Linalg subview operands promotion. //===--------------------------------------------------------------------===// patterns.insert>( - ctx, LinalgPromotionOptions(), + ctx, LinalgPromotionOptions().useFullTileBuffersByDefault(), LinalgMarker({"_promote_views_"}, "_views_promoted_")); patterns.insert>( - ctx, LinalgPromotionOptions().setOperandsToPromote({0}), + ctx, + LinalgPromotionOptions() + .setOperandsToPromote({0}) + .useFullTileBuffersByDefault(), LinalgMarker({"_promote_first_view_"}, "_first_view_promoted_")); patterns.insert>( - ctx, LinalgPromotionOptions().setOperandsToPromote({0}).setAlignment(32), + ctx, + LinalgPromotionOptions() + .setOperandsToPromote({0}) + .setUseFullTileBuffers({true}) + .setAlignment(32), LinalgMarker({"_promote_views_aligned_"}, "_views_aligned_promoted_")); applyPatternsAndFoldGreedily(funcOp, patterns); @@ -171,7 +178,8 @@ void fillL1TilingAndMatmulToVectorPatterns( LinalgMarker({startMarker}, "L1"))); patternsVector.emplace_back(LinalgPromotionPattern( - context, LinalgPromotionOptions(), LinalgMarker({"L1"}, "VEC"))); + context, LinalgPromotionOptions().useFullTileBuffersByDefault(), + LinalgMarker({"L1"}, "VEC"))); patternsVector.emplace_back( LinalgVectorizationPattern(context, LinalgMarker({"VEC"}))); diff --git a/mlir/test/mlir-tblgen/op-format.mlir b/mlir/test/mlir-tblgen/op-format.mlir index 8d55768aced79b..066e548e17083e 100644 --- a/mlir/test/mlir-tblgen/op-format.mlir +++ b/mlir/test/mlir-tblgen/op-format.mlir @@ -12,9 +12,16 @@ test.format_literal_op keyword_$. -> :, = <> () [] {foo.some_attr} // CHECK-NOT: {attr test.format_attr_op 10 +// CHECK: test.format_opt_attr_op(10) +// CHECK-NOT: {opt_attr +test.format_opt_attr_op(10) + // CHECK: test.format_attr_dict_w_keyword attributes {attr = 10 : i64} test.format_attr_dict_w_keyword attributes {attr = 10 : i64} +// CHECK: test.format_attr_dict_w_keyword attributes {attr = 10 : i64, opt_attr = 10 : i64} +test.format_attr_dict_w_keyword attributes {attr = 10 : i64, opt_attr = 10 : i64} + // CHECK: test.format_buildable_type_op %[[I64]] %ignored = test.format_buildable_type_op %i64 diff --git a/mlir/tools/mlir-opt/CMakeLists.txt b/mlir/tools/mlir-opt/CMakeLists.txt index 3e8ed0ebee7b51..d509b23505d12b 100644 --- a/mlir/tools/mlir-opt/CMakeLists.txt +++ b/mlir/tools/mlir-opt/CMakeLists.txt @@ -10,24 +10,30 @@ set(LLVM_LINK_COMPONENTS AsmParser ) +if(MLIR_INCLUDE_TESTS) + set(test_libs + MLIRAffineTransformsTestPasses + MLIRSPIRVTestPasses + MLIRTestDialect + MLIRTestIR + MLIRTestPass + MLIRTestTransforms + ) +endif() + set(LIBS ${dialect_libs} ${conversion_libs} + ${test_libs} MLIRLoopAnalysis - MLIRAffineTransformsTestPasses MLIRAnalysis MLIRDialect MLIREDSC MLIROptLib MLIRParser MLIRPass - MLIRSPIRVTestPasses MLIRTransforms MLIRTransformUtils - MLIRTestDialect - MLIRTestIR - MLIRTestPass - MLIRTestTransforms MLIRSupport MLIRIR ) diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp index 218d6c03b4b80d..69b1d8d57bc56b 100644 --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -93,6 +93,7 @@ static cl::opt allowUnregisteredDialects( "allow-unregistered-dialect", cl::desc("Allow operation with no registered dialects"), cl::init(false)); +#ifdef MLIR_INCLUDE_TESTS void registerTestPasses() { registerConvertToTargetEnvPass(); registerInliner(); @@ -131,6 +132,7 @@ void registerTestPasses() { registerTestVectorToSCFPass(); registerVectorizerTestPass(); } +#endif static cl::opt showDialects("show-dialects", @@ -140,7 +142,9 @@ static cl::opt int main(int argc, char **argv) { registerAllDialects(); registerAllPasses(); +#ifdef MLIR_INCLUDE_TESTS registerTestPasses(); +#endif InitLLVM y(argc, argv); // Register any command line options. diff --git a/mlir/tools/mlir-tblgen/OpFormatGen.cpp b/mlir/tools/mlir-tblgen/OpFormatGen.cpp index 127b6b976cd53d..9fa87e3a842771 100644 --- a/mlir/tools/mlir-tblgen/OpFormatGen.cpp +++ b/mlir/tools/mlir-tblgen/OpFormatGen.cpp @@ -886,9 +886,17 @@ static void genAttrDictPrinter(OperationFormat &fmt, Operator &op, OpMethodBody &body, bool withKeyword) { // Collect all of the attributes used in the format, these will be elided. SmallVector usedAttributes; - for (auto &it : fmt.elements) + for (auto &it : fmt.elements) { if (auto *attr = dyn_cast(it.get())) usedAttributes.push_back(attr->getVar()); + // Collect the optional attributes. + if (auto *opt = dyn_cast(it.get())) { + for (auto &elem : opt->getElements()) { + if (auto *attr = dyn_cast(&elem)) + usedAttributes.push_back(attr->getVar()); + } + } + } body << " p.printOptionalAttrDict" << (withKeyword ? "WithKeyword" : "") << "(getAttrs(), /*elidedAttrs=*/{";