@@ -15710,48 +15710,51 @@ bool AArch64TargetLowering::shouldSinkOperands(
15710
15710
return false;
15711
15711
}
15712
15712
15713
- static bool createTblShuffleForZExt(ZExtInst *ZExt, FixedVectorType *DstTy,
15714
- bool IsLittleEndian) {
15715
- Value *Op = ZExt->getOperand(0);
15716
- auto *SrcTy = cast<FixedVectorType>(Op->getType());
15717
- auto SrcWidth = cast<IntegerType>(SrcTy->getElementType())->getBitWidth();
15718
- auto DstWidth = cast<IntegerType>(DstTy->getElementType())->getBitWidth();
15713
+ static bool createTblShuffleMask(unsigned SrcWidth, unsigned DstWidth,
15714
+ unsigned NumElts, bool IsLittleEndian,
15715
+ SmallVectorImpl<int> &Mask) {
15719
15716
if (DstWidth % 8 != 0 || DstWidth <= 16 || DstWidth >= 64)
15720
15717
return false;
15721
15718
15722
- assert(DstWidth % SrcWidth == 0 &&
15723
- "TBL lowering is not supported for a ZExt instruction with this "
15724
- "source & destination element type.");
15725
- unsigned ZExtFactor = DstWidth / SrcWidth;
15719
+ if (DstWidth % SrcWidth != 0)
15720
+ return false;
15721
+
15722
+ unsigned Factor = DstWidth / SrcWidth;
15723
+ unsigned MaskLen = NumElts * Factor;
15724
+
15725
+ Mask.clear();
15726
+ Mask.resize(MaskLen, NumElts);
15727
+
15728
+ unsigned SrcIndex = 0;
15729
+ for (unsigned I = 0; I < MaskLen; I += Factor)
15730
+ Mask[I] = SrcIndex++;
15731
+
15732
+ if (!IsLittleEndian)
15733
+ std::rotate(Mask.rbegin(), Mask.rbegin() + Factor - 1, Mask.rend());
15734
+
15735
+ return true;
15736
+ }
15737
+
15738
+ static Value *createTblShuffleForZExt(IRBuilderBase &Builder, Value *Op,
15739
+ FixedVectorType *ZExtTy,
15740
+ FixedVectorType *DstTy,
15741
+ bool IsLittleEndian) {
15742
+ auto *SrcTy = cast<FixedVectorType>(Op->getType());
15726
15743
unsigned NumElts = SrcTy->getNumElements();
15727
- IRBuilder<> Builder(ZExt);
15744
+ auto SrcWidth = cast<IntegerType>(SrcTy->getElementType())->getBitWidth();
15745
+ auto DstWidth = cast<IntegerType>(DstTy->getElementType())->getBitWidth();
15746
+
15728
15747
SmallVector<int> Mask;
15729
- // Create a mask that selects <0,...,Op[i]> for each lane of the destination
15730
- // vector to replace the original ZExt. This can later be lowered to a set of
15731
- // tbl instructions.
15732
- for (unsigned i = 0; i < NumElts * ZExtFactor; i++) {
15733
- if (IsLittleEndian) {
15734
- if (i % ZExtFactor == 0)
15735
- Mask.push_back(i / ZExtFactor);
15736
- else
15737
- Mask.push_back(NumElts);
15738
- } else {
15739
- if ((i + 1) % ZExtFactor == 0)
15740
- Mask.push_back((i - ZExtFactor + 1) / ZExtFactor);
15741
- else
15742
- Mask.push_back(NumElts);
15743
- }
15744
- }
15748
+ if (!createTblShuffleMask(SrcWidth, DstWidth, NumElts, IsLittleEndian, Mask))
15749
+ return nullptr;
15745
15750
15746
15751
auto *FirstEltZero = Builder.CreateInsertElement(
15747
15752
PoisonValue::get(SrcTy), Builder.getInt8(0), uint64_t(0));
15748
15753
Value *Result = Builder.CreateShuffleVector(Op, FirstEltZero, Mask);
15749
15754
Result = Builder.CreateBitCast(Result, DstTy);
15750
- if (DstTy != ZExt->getType())
15751
- Result = Builder.CreateZExt(Result, ZExt->getType());
15752
- ZExt->replaceAllUsesWith(Result);
15753
- ZExt->eraseFromParent();
15754
- return true;
15755
+ if (DstTy != ZExtTy)
15756
+ Result = Builder.CreateZExt(Result, ZExtTy);
15757
+ return Result;
15755
15758
}
15756
15759
15757
15760
static void createTblForTrunc(TruncInst *TI, bool IsLittleEndian) {
@@ -15916,21 +15919,30 @@ bool AArch64TargetLowering::optimizeExtendOrTruncateConversion(
15916
15919
15917
15920
DstTy = TruncDstType;
15918
15921
}
15919
-
15920
- return createTblShuffleForZExt(ZExt, DstTy, Subtarget->isLittleEndian());
15922
+ IRBuilder<> Builder(ZExt);
15923
+ Value *Result = createTblShuffleForZExt(
15924
+ Builder, ZExt->getOperand(0), cast<FixedVectorType>(ZExt->getType()),
15925
+ DstTy, Subtarget->isLittleEndian());
15926
+ if (!Result)
15927
+ return false;
15928
+ ZExt->replaceAllUsesWith(Result);
15929
+ ZExt->eraseFromParent();
15930
+ return true;
15921
15931
}
15922
15932
15923
15933
auto *UIToFP = dyn_cast<UIToFPInst>(I);
15924
15934
if (UIToFP && SrcTy->getElementType()->isIntegerTy(8) &&
15925
15935
DstTy->getElementType()->isFloatTy()) {
15926
15936
IRBuilder<> Builder(I);
15927
- auto *ZExt = cast<ZExtInst>(
15928
- Builder.CreateZExt(I->getOperand(0), VectorType::getInteger(DstTy)));
15937
+ Value *ZExt = createTblShuffleForZExt(
15938
+ Builder, I->getOperand(0), FixedVectorType::getInteger(DstTy),
15939
+ FixedVectorType::getInteger(DstTy), Subtarget->isLittleEndian());
15940
+ if (!ZExt)
15941
+ return false;
15929
15942
auto *UI = Builder.CreateUIToFP(ZExt, DstTy);
15930
15943
I->replaceAllUsesWith(UI);
15931
15944
I->eraseFromParent();
15932
- return createTblShuffleForZExt(ZExt, cast<FixedVectorType>(ZExt->getType()),
15933
- Subtarget->isLittleEndian());
15945
+ return true;
15934
15946
}
15935
15947
15936
15948
// Convert 'fptoui <(8|16) x float> to <(8|16) x i8>' to a wide fptoui
0 commit comments