@@ -490,6 +490,10 @@ static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit,
490
490
for_each (DVRAssignMarkerRange, MigrateDbgAssign);
491
491
}
492
492
493
+ static Type *getTypePartition (const DataLayout &DL, Type *Ty, uint64_t Offset,
494
+ uint64_t Size);
495
+ static Type *getTypePartition (const AllocaInst &AI, const Partition &P);
496
+
493
497
namespace {
494
498
495
499
// / A custom IRBuilder inserter which prefixes all names, but only in
@@ -1011,37 +1015,33 @@ static Value *foldPHINodeOrSelectInst(Instruction &I) {
1011
1015
return foldSelectInst (cast<SelectInst>(I));
1012
1016
}
1013
1017
1014
- static constexpr size_t getMaxNumFixedVectorElements () {
1015
- // FIXME: hack. Do we have a named constant for this?
1016
- // SDAG SDNode can't have more than 65535 operands.
1017
- return std::numeric_limits<unsigned short >::max ();
1018
- }
1019
-
1020
1018
// / Returns a fixed vector type equivalent to the memory set by II or nullptr if
1021
- // / unable to do so.
1022
- static FixedVectorType *getVectorTypeFor (const MemSetInst &II,
1023
- const DataLayout &DL) {
1019
+ // / not viable.
1020
+ static FixedVectorType *getVectorTypeFor (const DataLayout &DL, Type *PartTy,
1021
+ const MemSetInst &II) {
1022
+ auto *PartVecTy = dyn_cast_or_null<FixedVectorType>(PartTy);
1023
+ if (!PartVecTy)
1024
+ return nullptr ;
1025
+
1026
+ const uint64_t PartVecSize = DL.getTypeStoreSize (PartVecTy).getFixedValue ();
1027
+
1024
1028
const ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength ());
1025
1029
if (!Length)
1026
1030
return nullptr ;
1027
1031
1028
1032
const APInt &Val = Length->getValue ();
1029
- if (Val.ugt (getMaxNumFixedVectorElements () ))
1033
+ if (Val.ugt (PartVecSize ))
1030
1034
return nullptr ;
1031
1035
1032
1036
// Element type will always be i8. TODO: Support
1033
1037
// llvm.experimental.memset.pattern?
1034
- uint64_t MemSetLen = Val.getZExtValue ();
1035
- auto *VTy = FixedVectorType::get (II.getValue ()->getType (), MemSetLen);
1036
-
1037
- // FIXME: This is a workaround. Vector promotion sometimes inhibits our
1038
- // ability to merge constant stores. It seems to be related to the presence of
1039
- // alignment bytes. See
1040
- // test/Transforms/PhaseOrdering/X86/store-constant-merge.ll
1041
- if (MemSetLen != DL.getTypeAllocSize (VTy).getFixedValue ())
1042
- return nullptr ;
1038
+ return FixedVectorType::get (II.getValue ()->getType (), Val.getZExtValue ());
1039
+ }
1043
1040
1044
- return VTy;
1041
+ static FixedVectorType *getVectorTypeFor (const AllocaInst &AI,
1042
+ const Partition &P,
1043
+ const MemSetInst &II) {
1044
+ return getVectorTypeFor (AI.getDataLayout (), getTypePartition (AI, P), II);
1045
1045
}
1046
1046
1047
1047
// / Builder for the alloca slices.
@@ -1055,6 +1055,7 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
1055
1055
using Base = PtrUseVisitor<SliceBuilder>;
1056
1056
1057
1057
const uint64_t AllocSize;
1058
+ const AllocaInst &AI;
1058
1059
AllocaSlices &AS;
1059
1060
1060
1061
SmallDenseMap<Instruction *, unsigned > MemTransferSliceMap;
@@ -1067,7 +1068,7 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
1067
1068
SliceBuilder (const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
1068
1069
: PtrUseVisitor<SliceBuilder>(DL),
1069
1070
AllocSize (DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue()),
1070
- AS (AS) {}
1071
+ AI (AI), AS(AS) {}
1071
1072
1072
1073
private:
1073
1074
void markAsDead (Instruction &I) {
@@ -1132,16 +1133,15 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
1132
1133
return Base::visitGetElementPtrInst (GEPI);
1133
1134
}
1134
1135
1135
- bool isSplittableMemOp (Type *Ty, bool IsVolatile) {
1136
- return Ty->isIntegerTy () && !IsVolatile && DL.typeSizeEqualsStoreSize (Ty);
1137
- }
1138
-
1139
1136
void handleLoadOrStore (Type *Ty, Instruction &I, const APInt &Offset,
1140
1137
uint64_t Size, bool IsVolatile) {
1141
1138
// We allow splitting of non-volatile loads and stores where the type is an
1142
1139
// integer type. These may be used to implement 'memcpy' or other "transfer
1143
1140
// of bits" patterns.
1144
- insertUse (I, Offset, Size, isSplittableMemOp (Ty, IsVolatile));
1141
+ bool IsSplittable =
1142
+ Ty->isIntegerTy () && !IsVolatile && DL.typeSizeEqualsStoreSize (Ty);
1143
+
1144
+ insertUse (I, Offset, Size, IsSplittable);
1145
1145
}
1146
1146
1147
1147
void visitLoadInst (LoadInst &LI) {
@@ -1206,17 +1206,17 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
1206
1206
if (!IsOffsetKnown)
1207
1207
return PI.setAborted (&II);
1208
1208
1209
- bool Splittable;
1210
-
1211
- if ( getVectorTypeFor (II, DL))
1212
- Splittable = isSplittableMemOp (AS. AI . getAllocatedType (), II. isVolatile ());
1213
- else
1214
- Splittable = ( bool )Length;
1215
-
1216
- insertUse (II, Offset,
1217
- Length ? Length-> getLimitedValue ()
1218
- : AllocSize - Offset. getLimitedValue (),
1219
- Splittable);
1209
+ uint64_t Size = Length ? Length-> getLimitedValue ()
1210
+ : AllocSize - Offset. getLimitedValue ();
1211
+ bool Splittable = ( bool )Length;
1212
+ if (Splittable) {
1213
+ // Encourage the use of vector types by making this non-splittable if the
1214
+ // memset corresponds to viable vector type.
1215
+ Type *PartTy = getTypePartition (DL, AI. getAllocatedType (),
1216
+ Offset. getLimitedValue (), Size);
1217
+ Splittable = ! getVectorTypeFor (DL, PartTy, II);
1218
+ }
1219
+ insertUse (II, Offset, Size, Splittable);
1220
1220
}
1221
1221
1222
1222
void visitMemTransferInst (MemTransferInst &II) {
@@ -2084,10 +2084,11 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
2084
2084
// /
2085
2085
// / This function is called to test each entry in a partition which is slated
2086
2086
// / for a single slice.
2087
- static bool isVectorPromotionViableForSlice (Partition &P, const Slice &S,
2088
- VectorType *Ty,
2089
- uint64_t ElementSize,
2090
- const DataLayout &DL) {
2087
+ static bool isVectorPromotionViableForSlice (const AllocaInst &AI, Partition &P,
2088
+ const Slice &S, VectorType *Ty,
2089
+ uint64_t ElementSize) {
2090
+ const DataLayout &DL = AI.getDataLayout ();
2091
+
2091
2092
// First validate the slice offsets.
2092
2093
uint64_t BeginOffset =
2093
2094
std::max (S.beginOffset (), P.beginOffset ()) - P.beginOffset ();
@@ -2116,14 +2117,14 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
2116
2117
if (MI->isVolatile ())
2117
2118
return false ;
2118
2119
2119
- auto *II = dyn_cast<MemSetInst>(U->getUser ());
2120
- if (!II && !S.isSplittable ()) {
2120
+ if (!S.isSplittable ()) {
2121
2121
// Skip any non-memset unsplittable intrinsics.
2122
- return false ;
2123
- }
2124
- if (II) {
2125
- // For memset, allow if we have a suitable vector type
2126
- Type *VTy = getVectorTypeFor (*II, DL);
2122
+ auto *II = dyn_cast<MemSetInst>(U->getUser ());
2123
+ if (!II)
2124
+ return false ;
2125
+
2126
+ // For memset, allow if we have a viable vector type
2127
+ Type *VTy = getVectorTypeFor (AI, P, *II);
2127
2128
if (!VTy)
2128
2129
return false ;
2129
2130
if (!canConvertValue (DL, SliceTy, VTy))
@@ -2170,8 +2171,9 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
2170
2171
// / This implements the necessary checking for \c checkVectorTypesForPromotion
2171
2172
// / (and thus isVectorPromotionViable) over all slices of the alloca for the
2172
2173
// / given VectorType.
2173
- static bool checkVectorTypeForPromotion (Partition &P, VectorType *VTy,
2174
- const DataLayout &DL) {
2174
+ static bool checkVectorTypeForPromotion (const AllocaInst &AI, Partition &P,
2175
+ VectorType *VTy) {
2176
+ const DataLayout &DL = AI.getDataLayout ();
2175
2177
uint64_t ElementSize =
2176
2178
DL.getTypeSizeInBits (VTy->getElementType ()).getFixedValue ();
2177
2179
@@ -2184,11 +2186,11 @@ static bool checkVectorTypeForPromotion(Partition &P, VectorType *VTy,
2184
2186
ElementSize /= 8 ;
2185
2187
2186
2188
for (const Slice &S : P)
2187
- if (!isVectorPromotionViableForSlice (P, S, VTy, ElementSize, DL ))
2189
+ if (!isVectorPromotionViableForSlice (AI, P, S, VTy, ElementSize))
2188
2190
return false ;
2189
2191
2190
2192
for (const Slice *S : P.splitSliceTails ())
2191
- if (!isVectorPromotionViableForSlice (P, *S, VTy, ElementSize, DL ))
2193
+ if (!isVectorPromotionViableForSlice (AI, P, *S, VTy, ElementSize))
2192
2194
return false ;
2193
2195
2194
2196
return true ;
@@ -2199,11 +2201,12 @@ static bool checkVectorTypeForPromotion(Partition &P, VectorType *VTy,
2199
2201
// / This implements the necessary checking for \c isVectorPromotionViable over
2200
2202
// / all slices of the alloca for the given VectorType.
2201
2203
static VectorType *
2202
- checkVectorTypesForPromotion (Partition &P, const DataLayout &DL ,
2204
+ checkVectorTypesForPromotion (const AllocaInst &AI, Partition &P ,
2203
2205
SmallVectorImpl<VectorType *> &CandidateTys,
2204
2206
bool HaveCommonEltTy, Type *CommonEltTy,
2205
2207
bool HaveVecPtrTy, bool HaveCommonVecPtrTy,
2206
2208
VectorType *CommonVecPtrTy) {
2209
+ const DataLayout &DL = AI.getDataLayout ();
2207
2210
// If we didn't find a vector type, nothing to do here.
2208
2211
if (CandidateTys.empty ())
2209
2212
return nullptr ;
@@ -2271,24 +2274,27 @@ checkVectorTypesForPromotion(Partition &P, const DataLayout &DL,
2271
2274
CandidateTys.resize (1 );
2272
2275
}
2273
2276
2277
+ // FIXME: hack. Do we have a named constant for this?
2278
+ // SDAG SDNode can't have more than 65535 operands.
2274
2279
llvm::erase_if (CandidateTys, [](VectorType *VTy) {
2275
2280
return cast<FixedVectorType>(VTy)->getNumElements () >
2276
- getMaxNumFixedVectorElements ();
2281
+ std::numeric_limits< unsigned short >:: max ();
2277
2282
});
2278
2283
2279
2284
for (VectorType *VTy : CandidateTys)
2280
- if (checkVectorTypeForPromotion (P, VTy, DL ))
2285
+ if (checkVectorTypeForPromotion (AI, P, VTy ))
2281
2286
return VTy;
2282
2287
2283
2288
return nullptr ;
2284
2289
}
2285
2290
2286
2291
static VectorType *createAndCheckVectorTypesForPromotion (
2287
2292
SetVector<Type *> &OtherTys, ArrayRef<VectorType *> CandidateTysCopy,
2288
- function_ref<void (Type *)> CheckCandidateType, Partition &P ,
2289
- const DataLayout &DL , SmallVectorImpl<VectorType *> &CandidateTys,
2293
+ function_ref<void (Type *)> CheckCandidateType, const AllocaInst &AI ,
2294
+ Partition &P , SmallVectorImpl<VectorType *> &CandidateTys,
2290
2295
bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy,
2291
2296
bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy) {
2297
+ const DataLayout &DL = AI.getDataLayout ();
2292
2298
[[maybe_unused]] VectorType *OriginalElt =
2293
2299
CandidateTysCopy.size () ? CandidateTysCopy[0 ] : nullptr ;
2294
2300
// Consider additional vector types where the element type size is a
@@ -2313,7 +2319,7 @@ static VectorType *createAndCheckVectorTypesForPromotion(
2313
2319
}
2314
2320
}
2315
2321
2316
- return checkVectorTypesForPromotion (P, DL , CandidateTys, HaveCommonEltTy,
2322
+ return checkVectorTypesForPromotion (AI, P , CandidateTys, HaveCommonEltTy,
2317
2323
CommonEltTy, HaveVecPtrTy,
2318
2324
HaveCommonVecPtrTy, CommonVecPtrTy);
2319
2325
}
@@ -2327,9 +2333,10 @@ static VectorType *createAndCheckVectorTypesForPromotion(
2327
2333
// / SSA value. We only can ensure this for a limited set of operations, and we
2328
2334
// / don't want to do the rewrites unless we are confident that the result will
2329
2335
// / be promotable, so we have an early test here.
2330
- static VectorType *isVectorPromotionViable (Partition &P, const DataLayout &DL ) {
2336
+ static VectorType *isVectorPromotionViable (const AllocaInst &AI, Partition &P ) {
2331
2337
// Collect the candidate types for vector-based promotion. Also track whether
2332
2338
// we have different element types.
2339
+ const DataLayout &DL = AI.getDataLayout ();
2333
2340
SmallVector<VectorType *, 4 > CandidateTys;
2334
2341
SetVector<Type *> LoadStoreTys;
2335
2342
SetVector<Type *> DeferredTys;
@@ -2375,7 +2382,7 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
2375
2382
else if (auto *SI = dyn_cast<StoreInst>(S.getUse ()->getUser ()))
2376
2383
Ty = SI->getValueOperand ()->getType ();
2377
2384
else if (auto *II = dyn_cast<MemSetInst>(S.getUse ()->getUser ())) {
2378
- Ty = getVectorTypeFor (*II, DL );
2385
+ Ty = getVectorTypeFor (AI, P, *II );
2379
2386
if (!Ty)
2380
2387
continue ;
2381
2388
} else
@@ -2396,14 +2403,14 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
2396
2403
2397
2404
SmallVector<VectorType *, 4 > CandidateTysCopy = CandidateTys;
2398
2405
if (auto *VTy = createAndCheckVectorTypesForPromotion (
2399
- LoadStoreTys, CandidateTysCopy, CheckCandidateType, P, DL ,
2406
+ LoadStoreTys, CandidateTysCopy, CheckCandidateType, AI, P ,
2400
2407
CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2401
2408
HaveCommonVecPtrTy, CommonVecPtrTy))
2402
2409
return VTy;
2403
2410
2404
2411
CandidateTys.clear ();
2405
2412
return createAndCheckVectorTypesForPromotion (
2406
- DeferredTys, CandidateTysCopy, CheckCandidateType, P, DL , CandidateTys,
2413
+ DeferredTys, CandidateTysCopy, CheckCandidateType, AI, P , CandidateTys,
2407
2414
HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy,
2408
2415
CommonVecPtrTy);
2409
2416
}
@@ -4386,6 +4393,13 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset,
4386
4393
return SubTy;
4387
4394
}
4388
4395
4396
+ static Type *getTypePartition (const AllocaInst &AI, const Partition &P) {
4397
+ if (P.empty ())
4398
+ return nullptr ;
4399
+ return getTypePartition (AI.getDataLayout (), AI.getAllocatedType (),
4400
+ P.beginOffset (), P.size ());
4401
+ }
4402
+
4389
4403
// / Pre-split loads and stores to simplify rewriting.
4390
4404
// /
4391
4405
// / We want to break up the splittable load+store pairs as much as
@@ -4929,12 +4943,12 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
4929
4943
4930
4944
// If the common use types are not viable for promotion then attempt to find
4931
4945
// another type that is viable.
4932
- if (SliceVecTy && !checkVectorTypeForPromotion (P, SliceVecTy, DL ))
4946
+ if (SliceVecTy && !checkVectorTypeForPromotion (AI, P, SliceVecTy ))
4933
4947
if (Type *TypePartitionTy = getTypePartition (DL, AI.getAllocatedType (),
4934
4948
P.beginOffset (), P.size ())) {
4935
4949
VectorType *TypePartitionVecTy = dyn_cast<VectorType>(TypePartitionTy);
4936
4950
if (TypePartitionVecTy &&
4937
- checkVectorTypeForPromotion (P, TypePartitionVecTy, DL ))
4951
+ checkVectorTypeForPromotion (AI, P, TypePartitionVecTy ))
4938
4952
SliceTy = TypePartitionTy;
4939
4953
}
4940
4954
@@ -4945,7 +4959,7 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
4945
4959
bool IsIntegerPromotable = isIntegerWideningViable (P, SliceTy, DL);
4946
4960
4947
4961
VectorType *VecTy =
4948
- IsIntegerPromotable ? nullptr : isVectorPromotionViable (P, DL );
4962
+ IsIntegerPromotable ? nullptr : isVectorPromotionViable (AI, P );
4949
4963
if (VecTy)
4950
4964
SliceTy = VecTy;
4951
4965
0 commit comments