@@ -15854,48 +15854,51 @@ bool AArch64TargetLowering::shouldSinkOperands(
15854
15854
return false;
15855
15855
}
15856
15856
15857
- static bool createTblShuffleForZExt(ZExtInst *ZExt, FixedVectorType *DstTy,
15858
- bool IsLittleEndian) {
15859
- Value *Op = ZExt->getOperand(0);
15860
- auto *SrcTy = cast<FixedVectorType>(Op->getType());
15861
- auto SrcWidth = cast<IntegerType>(SrcTy->getElementType())->getBitWidth();
15862
- auto DstWidth = cast<IntegerType>(DstTy->getElementType())->getBitWidth();
15857
+ static bool createTblShuffleMask(unsigned SrcWidth, unsigned DstWidth,
15858
+ unsigned NumElts, bool IsLittleEndian,
15859
+ SmallVectorImpl<int> &Mask) {
15863
15860
if (DstWidth % 8 != 0 || DstWidth <= 16 || DstWidth >= 64)
15864
15861
return false;
15865
15862
15866
- assert(DstWidth % SrcWidth == 0 &&
15867
- "TBL lowering is not supported for a ZExt instruction with this "
15868
- "source & destination element type.");
15869
- unsigned ZExtFactor = DstWidth / SrcWidth;
15863
+ if (DstWidth % SrcWidth != 0)
15864
+ return false;
15865
+
15866
+ unsigned Factor = DstWidth / SrcWidth;
15867
+ unsigned MaskLen = NumElts * Factor;
15868
+
15869
+ Mask.clear();
15870
+ Mask.resize(MaskLen, NumElts);
15871
+
15872
+ unsigned SrcIndex = 0;
15873
+ for (unsigned I = 0; I < MaskLen; I += Factor)
15874
+ Mask[I] = SrcIndex++;
15875
+
15876
+ if (!IsLittleEndian)
15877
+ std::rotate(Mask.rbegin(), Mask.rbegin() + Factor - 1, Mask.rend());
15878
+
15879
+ return true;
15880
+ }
15881
+
15882
+ static Value *createTblShuffleForZExt(IRBuilderBase &Builder, Value *Op,
15883
+ FixedVectorType *ZExtTy,
15884
+ FixedVectorType *DstTy,
15885
+ bool IsLittleEndian) {
15886
+ auto *SrcTy = cast<FixedVectorType>(Op->getType());
15870
15887
unsigned NumElts = SrcTy->getNumElements();
15871
- IRBuilder<> Builder(ZExt);
15888
+ auto SrcWidth = cast<IntegerType>(SrcTy->getElementType())->getBitWidth();
15889
+ auto DstWidth = cast<IntegerType>(DstTy->getElementType())->getBitWidth();
15890
+
15872
15891
SmallVector<int> Mask;
15873
- // Create a mask that selects <0,...,Op[i]> for each lane of the destination
15874
- // vector to replace the original ZExt. This can later be lowered to a set of
15875
- // tbl instructions.
15876
- for (unsigned i = 0; i < NumElts * ZExtFactor; i++) {
15877
- if (IsLittleEndian) {
15878
- if (i % ZExtFactor == 0)
15879
- Mask.push_back(i / ZExtFactor);
15880
- else
15881
- Mask.push_back(NumElts);
15882
- } else {
15883
- if ((i + 1) % ZExtFactor == 0)
15884
- Mask.push_back((i - ZExtFactor + 1) / ZExtFactor);
15885
- else
15886
- Mask.push_back(NumElts);
15887
- }
15888
- }
15892
+ if (!createTblShuffleMask(SrcWidth, DstWidth, NumElts, IsLittleEndian, Mask))
15893
+ return nullptr;
15889
15894
15890
15895
auto *FirstEltZero = Builder.CreateInsertElement(
15891
15896
PoisonValue::get(SrcTy), Builder.getInt8(0), uint64_t(0));
15892
15897
Value *Result = Builder.CreateShuffleVector(Op, FirstEltZero, Mask);
15893
15898
Result = Builder.CreateBitCast(Result, DstTy);
15894
- if (DstTy != ZExt->getType())
15895
- Result = Builder.CreateZExt(Result, ZExt->getType());
15896
- ZExt->replaceAllUsesWith(Result);
15897
- ZExt->eraseFromParent();
15898
- return true;
15899
+ if (DstTy != ZExtTy)
15900
+ Result = Builder.CreateZExt(Result, ZExtTy);
15901
+ return Result;
15899
15902
}
15900
15903
15901
15904
static void createTblForTrunc(TruncInst *TI, bool IsLittleEndian) {
@@ -16060,21 +16063,30 @@ bool AArch64TargetLowering::optimizeExtendOrTruncateConversion(
16060
16063
16061
16064
DstTy = TruncDstType;
16062
16065
}
16063
-
16064
- return createTblShuffleForZExt(ZExt, DstTy, Subtarget->isLittleEndian());
16066
+ IRBuilder<> Builder(ZExt);
16067
+ Value *Result = createTblShuffleForZExt(
16068
+ Builder, ZExt->getOperand(0), cast<FixedVectorType>(ZExt->getType()),
16069
+ DstTy, Subtarget->isLittleEndian());
16070
+ if (!Result)
16071
+ return false;
16072
+ ZExt->replaceAllUsesWith(Result);
16073
+ ZExt->eraseFromParent();
16074
+ return true;
16065
16075
}
16066
16076
16067
16077
auto *UIToFP = dyn_cast<UIToFPInst>(I);
16068
16078
if (UIToFP && SrcTy->getElementType()->isIntegerTy(8) &&
16069
16079
DstTy->getElementType()->isFloatTy()) {
16070
16080
IRBuilder<> Builder(I);
16071
- auto *ZExt = cast<ZExtInst>(
16072
- Builder.CreateZExt(I->getOperand(0), VectorType::getInteger(DstTy)));
16081
+ Value *ZExt = createTblShuffleForZExt(
16082
+ Builder, I->getOperand(0), FixedVectorType::getInteger(DstTy),
16083
+ FixedVectorType::getInteger(DstTy), Subtarget->isLittleEndian());
16084
+ if (!ZExt)
16085
+ return false;
16073
16086
auto *UI = Builder.CreateUIToFP(ZExt, DstTy);
16074
16087
I->replaceAllUsesWith(UI);
16075
16088
I->eraseFromParent();
16076
- return createTblShuffleForZExt(ZExt, cast<FixedVectorType>(ZExt->getType()),
16077
- Subtarget->isLittleEndian());
16089
+ return true;
16078
16090
}
16079
16091
16080
16092
// Convert 'fptoui <(8|16) x float> to <(8|16) x i8>' to a wide fptoui
0 commit comments