@@ -635,11 +635,34 @@ getVectorDeinterleaveFactor(IntrinsicInst *II,
635635  return  true ;
636636}
637637
638+ // / Check the interleaved mask
639+ // /
640+ // / - if a value within the optional is non-nullptr, the value corresponds to
641+ // /   deinterleaved mask
642+ // / - if a value within the option is nullptr, the value corresponds to all-true
643+ // /   mask
644+ // / - return nullopt if mask cannot be deinterleaved
645+ static  std::optional<Value *> getMask (Value *WideMask, unsigned  Factor) {
646+   using  namespace  llvm ::PatternMatch; 
647+   if  (auto  *IMI = dyn_cast<IntrinsicInst>(WideMask)) {
648+     SmallVector<Value *, 8 > Operands;
649+     SmallVector<Instruction *, 8 > DeadInsts;
650+     if  (getVectorInterleaveFactor (IMI, Operands, DeadInsts)) {
651+       assert (!Operands.empty ());
652+       if  (Operands.size () == Factor &&
653+           std::equal (Operands.begin (), Operands.end (), Operands.begin ()))
654+         return  Operands.front ();
655+     }
656+   }
657+   if  (match (WideMask, m_AllOnes ()))
658+     return  nullptr ;
659+   return  std::nullopt ;
660+ }
661+ 
638662bool  InterleavedAccessImpl::lowerDeinterleaveIntrinsic (
639663    IntrinsicInst *DI, SmallSetVector<Instruction *, 32 > &DeadInsts) {
640-   LoadInst *LI = dyn_cast<LoadInst>(DI->getOperand (0 ));
641- 
642-   if  (!LI || !LI->hasOneUse () || !LI->isSimple ())
664+   Value *LoadedVal = DI->getOperand (0 );
665+   if  (!LoadedVal->hasOneUse () || !isa<LoadInst, VPIntrinsic>(LoadedVal))
643666    return  false ;
644667
645668  SmallVector<Value *, 8 > DeinterleaveValues;
@@ -648,43 +671,92 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
648671                                   DeinterleaveDeadInsts))
649672    return  false ;
650673
651-   LLVM_DEBUG (dbgs () << " IA: Found a deinterleave intrinsic: "   << *DI
652-                     << "  with factor = "   << DeinterleaveValues.size () << " \n "  );
674+   const  unsigned  Factor = DeinterleaveValues.size ();
653675
654-   //  Try and match this with target specific intrinsics.
655-   if  (!TLI->lowerDeinterleaveIntrinsicToLoad (DI, LI, DeinterleaveValues))
656-     return  false ;
676+   if  (auto  *VPLoad = dyn_cast<VPIntrinsic>(LoadedVal)) {
677+     if  (VPLoad->getIntrinsicID () != Intrinsic::vp_load)
678+       return  false ;
679+     //  Check mask operand. Handle both all-true and interleaved mask.
680+     Value *WideMask = VPLoad->getOperand (1 );
681+     std::optional<Value *> Mask = getMask (WideMask, Factor);
682+     if  (!Mask)
683+       return  false ;
684+ 
685+     LLVM_DEBUG (dbgs () << " IA: Found a vp.load with deinterleave intrinsic " 
686+                       << *DI << "  and factor = "   << Factor << " \n "  );
687+ 
688+     //  Since lowerInterleaveLoad expects Shuffles and LoadInst, use special
689+     //  TLI function to emit target-specific interleaved instruction.
690+     if  (!TLI->lowerInterleavedScalableLoad (VPLoad, *Mask, DI,
691+                                            DeinterleaveValues))
692+       return  false ;
693+ 
694+   } else  {
695+     auto  *LI = cast<LoadInst>(LoadedVal);
696+     if  (!LI->isSimple ())
697+       return  false ;
698+ 
699+     LLVM_DEBUG (dbgs () << " IA: Found a load with deinterleave intrinsic "   << *DI
700+                       << "  and factor = "   << Factor << " \n "  );
701+ 
702+     //  Try and match this with target specific intrinsics.
703+     if  (!TLI->lowerDeinterleaveIntrinsicToLoad (DI, LI, DeinterleaveValues))
704+       return  false ;
705+   }
657706
658707  DeadInsts.insert (DeinterleaveDeadInsts.begin (), DeinterleaveDeadInsts.end ());
659708  //  We now have a target-specific load, so delete the old one.
660-   DeadInsts.insert (LI );
709+   DeadInsts.insert (cast<Instruction>(LoadedVal) );
661710  return  true ;
662711}
663712
664713bool  InterleavedAccessImpl::lowerInterleaveIntrinsic (
665714    IntrinsicInst *II, SmallSetVector<Instruction *, 32 > &DeadInsts) {
666715  if  (!II->hasOneUse ())
667716    return  false ;
668- 
669-   StoreInst *SI = dyn_cast<StoreInst>(*(II->users ().begin ()));
670- 
671-   if  (!SI || !SI->isSimple ())
717+   Value *StoredBy = II->user_back ();
718+   if  (!isa<StoreInst, VPIntrinsic>(StoredBy))
672719    return  false ;
673720
674721  SmallVector<Value *, 8 > InterleaveValues;
675722  SmallVector<Instruction *, 8 > InterleaveDeadInsts;
676723  if  (!getVectorInterleaveFactor (II, InterleaveValues, InterleaveDeadInsts))
677724    return  false ;
678725
679-   LLVM_DEBUG (dbgs () << " IA: Found an interleave intrinsic: "   << *II
680-                     << "  with factor = "   << InterleaveValues.size () << " \n "  );
726+   const  unsigned  Factor = InterleaveValues.size ();
681727
682-   //  Try and match this with target specific intrinsics.
683-   if  (!TLI->lowerInterleaveIntrinsicToStore (II, SI, InterleaveValues))
684-     return  false ;
728+   if  (auto  *VPStore = dyn_cast<VPIntrinsic>(StoredBy)) {
729+     if  (VPStore->getIntrinsicID () != Intrinsic::vp_store)
730+       return  false ;
731+ 
732+     Value *WideMask = VPStore->getOperand (2 );
733+     std::optional<Value *> Mask = getMask (WideMask, Factor);
734+     if  (!Mask)
735+       return  false ;
736+ 
737+     LLVM_DEBUG (dbgs () << " IA: Found a vp.store with interleave intrinsic " 
738+                       << *II << "  and factor = "   << Factor << " \n "  );
739+ 
740+     //  Since lowerInterleavedStore expects Shuffle and StoreInst, use special
741+     //  TLI function to emit target-specific interleaved instruction.
742+     if  (!TLI->lowerInterleavedScalableStore (VPStore, *Mask, II,
743+                                             InterleaveValues))
744+       return  false ;
745+   } else  {
746+     auto  *SI = cast<StoreInst>(StoredBy);
747+     if  (!SI->isSimple ())
748+       return  false ;
749+ 
750+     LLVM_DEBUG (dbgs () << " IA: Found a store with interleave intrinsic "   << *II
751+                       << "  and factor = "   << Factor << " \n "  );
752+ 
753+     //  Try and match this with target specific intrinsics.
754+     if  (!TLI->lowerInterleaveIntrinsicToStore (II, SI, InterleaveValues))
755+       return  false ;
756+   }
685757
686758  //  We now have a target-specific store, so delete the old one.
687-   DeadInsts.insert (SI );
759+   DeadInsts.insert (cast<Instruction>(StoredBy) );
688760  DeadInsts.insert (InterleaveDeadInsts.begin (), InterleaveDeadInsts.end ());
689761  return  true ;
690762}
0 commit comments