@@ -1190,16 +1190,16 @@ class LoopVectorizationCostModel {
1190
1190
1191
1191
// / Returns true if the target machine supports masked store operation
1192
1192
// / for the given \p DataType and kind of access to \p Ptr.
1193
- bool isLegalMaskedStore (Type *DataType, Value *Ptr, unsigned Alignment) {
1193
+ bool isLegalMaskedStore (Type *DataType, Value *Ptr, MaybeAlign Alignment) {
1194
1194
return Legal->isConsecutivePtr (Ptr) &&
1195
- TTI.isLegalMaskedStore (DataType, MaybeAlign ( Alignment) );
1195
+ TTI.isLegalMaskedStore (DataType, Alignment);
1196
1196
}
1197
1197
1198
1198
// / Returns true if the target machine supports masked load operation
1199
1199
// / for the given \p DataType and kind of access to \p Ptr.
1200
- bool isLegalMaskedLoad (Type *DataType, Value *Ptr, unsigned Alignment) {
1200
+ bool isLegalMaskedLoad (Type *DataType, Value *Ptr, MaybeAlign Alignment) {
1201
1201
return Legal->isConsecutivePtr (Ptr) &&
1202
- TTI.isLegalMaskedLoad (DataType, MaybeAlign ( Alignment) );
1202
+ TTI.isLegalMaskedLoad (DataType, Alignment);
1203
1203
}
1204
1204
1205
1205
// / Returns true if the target machine supports masked scatter operation
@@ -2359,12 +2359,11 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
2359
2359
Type *ScalarDataTy = getMemInstValueType (Instr);
2360
2360
Type *DataTy = VectorType::get (ScalarDataTy, VF);
2361
2361
Value *Ptr = getLoadStorePointerOperand (Instr);
2362
- unsigned Alignment = getLoadStoreAlignment (Instr);
2363
2362
// An alignment of 0 means target abi alignment. We need to use the scalar's
2364
2363
// target abi alignment in such a case.
2365
2364
const DataLayout &DL = Instr->getModule ()->getDataLayout ();
2366
- if (! Alignment)
2367
- Alignment = DL.getABITypeAlignment ( ScalarDataTy);
2365
+ const Align Alignment =
2366
+ DL.getValueOrABITypeAlignment ( getLoadStoreAlignment (Instr), ScalarDataTy);
2368
2367
unsigned AddressSpace = getLoadStoreAddressSpace (Instr);
2369
2368
2370
2369
// Determine if the pointer operand of the access is either consecutive or
@@ -2428,8 +2427,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
2428
2427
if (CreateGatherScatter) {
2429
2428
Value *MaskPart = isMaskRequired ? Mask[Part] : nullptr ;
2430
2429
Value *VectorGep = getOrCreateVectorValue (Ptr, Part);
2431
- NewSI = Builder.CreateMaskedScatter (StoredVal, VectorGep, Alignment,
2432
- MaskPart);
2430
+ NewSI = Builder.CreateMaskedScatter (StoredVal, VectorGep,
2431
+ Alignment. value (), MaskPart);
2433
2432
} else {
2434
2433
if (Reverse) {
2435
2434
// If we store to reverse consecutive memory locations, then we need
@@ -2440,10 +2439,11 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
2440
2439
}
2441
2440
auto *VecPtr = CreateVecPtr (Part, Ptr);
2442
2441
if (isMaskRequired)
2443
- NewSI = Builder.CreateMaskedStore (StoredVal, VecPtr, Alignment,
2444
- Mask[Part]);
2442
+ NewSI = Builder.CreateMaskedStore (StoredVal, VecPtr,
2443
+ Alignment. value (), Mask[Part]);
2445
2444
else
2446
- NewSI = Builder.CreateAlignedStore (StoredVal, VecPtr, Alignment);
2445
+ NewSI =
2446
+ Builder.CreateAlignedStore (StoredVal, VecPtr, Alignment.value ());
2447
2447
}
2448
2448
addMetadata (NewSI, SI);
2449
2449
}
@@ -2458,18 +2458,18 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
2458
2458
if (CreateGatherScatter) {
2459
2459
Value *MaskPart = isMaskRequired ? Mask[Part] : nullptr ;
2460
2460
Value *VectorGep = getOrCreateVectorValue (Ptr, Part);
2461
- NewLI = Builder.CreateMaskedGather (VectorGep, Alignment, MaskPart,
2461
+ NewLI = Builder.CreateMaskedGather (VectorGep, Alignment. value () , MaskPart,
2462
2462
nullptr , " wide.masked.gather" );
2463
2463
addMetadata (NewLI, LI);
2464
2464
} else {
2465
2465
auto *VecPtr = CreateVecPtr (Part, Ptr);
2466
2466
if (isMaskRequired)
2467
- NewLI = Builder.CreateMaskedLoad (VecPtr, Alignment, Mask[Part],
2467
+ NewLI = Builder.CreateMaskedLoad (VecPtr, Alignment. value () , Mask[Part],
2468
2468
UndefValue::get (DataTy),
2469
2469
" wide.masked.load" );
2470
2470
else
2471
- NewLI =
2472
- Builder. CreateAlignedLoad (DataTy, VecPtr, Alignment, " wide.load" );
2471
+ NewLI = Builder. CreateAlignedLoad (DataTy, VecPtr, Alignment. value (),
2472
+ " wide.load" );
2473
2473
2474
2474
// Add metadata to the load, but setVectorValue to the reverse shuffle.
2475
2475
addMetadata (NewLI, LI);
@@ -4553,7 +4553,6 @@ bool LoopVectorizationCostModel::isScalarWithPredication(Instruction *I, unsigne
4553
4553
return false ;
4554
4554
auto *Ptr = getLoadStorePointerOperand (I);
4555
4555
auto *Ty = getMemInstValueType (I);
4556
- unsigned Alignment = getLoadStoreAlignment (I);
4557
4556
// We have already decided how to vectorize this instruction, get that
4558
4557
// result.
4559
4558
if (VF > 1 ) {
@@ -4562,6 +4561,7 @@ bool LoopVectorizationCostModel::isScalarWithPredication(Instruction *I, unsigne
4562
4561
" Widening decision should be ready at this moment" );
4563
4562
return WideningDecision == CM_Scalarize;
4564
4563
}
4564
+ const MaybeAlign Alignment = getLoadStoreAlignment (I);
4565
4565
return isa<LoadInst>(I) ?
4566
4566
!(isLegalMaskedLoad (Ty, Ptr, Alignment) || isLegalMaskedGather (Ty))
4567
4567
: !(isLegalMaskedStore (Ty, Ptr, Alignment) || isLegalMaskedScatter (Ty));
@@ -4607,9 +4607,9 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(Instruction *I,
4607
4607
" Masked interleave-groups for predicated accesses are not enabled." );
4608
4608
4609
4609
auto *Ty = getMemInstValueType (I);
4610
- unsigned Alignment = getLoadStoreAlignment (I);
4611
- return isa<LoadInst>(I) ? TTI.isLegalMaskedLoad (Ty, MaybeAlign ( Alignment) )
4612
- : TTI.isLegalMaskedStore (Ty, MaybeAlign ( Alignment) );
4610
+ const MaybeAlign Alignment = getLoadStoreAlignment (I);
4611
+ return isa<LoadInst>(I) ? TTI.isLegalMaskedLoad (Ty, Alignment)
4612
+ : TTI.isLegalMaskedStore (Ty, Alignment);
4613
4613
}
4614
4614
4615
4615
bool LoopVectorizationCostModel::memoryInstructionCanBeWidened (Instruction *I,
@@ -5731,7 +5731,6 @@ unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
5731
5731
Type *ValTy = getMemInstValueType (I);
5732
5732
auto SE = PSE.getSE ();
5733
5733
5734
- unsigned Alignment = getLoadStoreAlignment (I);
5735
5734
unsigned AS = getLoadStoreAddressSpace (I);
5736
5735
Value *Ptr = getLoadStorePointerOperand (I);
5737
5736
Type *PtrTy = ToVectorTy (Ptr->getType (), VF);
@@ -5745,9 +5744,9 @@ unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
5745
5744
5746
5745
// Don't pass *I here, since it is scalar but will actually be part of a
5747
5746
// vectorized loop where the user of it is a vectorized instruction.
5748
- Cost += VF *
5749
- TTI.getMemoryOpCost (I->getOpcode (), ValTy->getScalarType (), Alignment ,
5750
- AS);
5747
+ const MaybeAlign Alignment = getLoadStoreAlignment (I);
5748
+ Cost += VF * TTI.getMemoryOpCost (I->getOpcode (), ValTy->getScalarType (),
5749
+ Alignment ? Alignment-> value () : 0 , AS);
5751
5750
5752
5751
// Get the overhead of the extractelement and insertelement instructions
5753
5752
// we might create due to scalarization.
@@ -5772,18 +5771,20 @@ unsigned LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
5772
5771
unsigned VF) {
5773
5772
Type *ValTy = getMemInstValueType (I);
5774
5773
Type *VectorTy = ToVectorTy (ValTy, VF);
5775
- unsigned Alignment = getLoadStoreAlignment (I);
5776
5774
Value *Ptr = getLoadStorePointerOperand (I);
5777
5775
unsigned AS = getLoadStoreAddressSpace (I);
5778
5776
int ConsecutiveStride = Legal->isConsecutivePtr (Ptr);
5779
5777
5780
5778
assert ((ConsecutiveStride == 1 || ConsecutiveStride == -1 ) &&
5781
5779
" Stride should be 1 or -1 for consecutive memory access" );
5780
+ const MaybeAlign Alignment = getLoadStoreAlignment (I);
5782
5781
unsigned Cost = 0 ;
5783
5782
if (Legal->isMaskRequired (I))
5784
- Cost += TTI.getMaskedMemoryOpCost (I->getOpcode (), VectorTy, Alignment, AS);
5783
+ Cost += TTI.getMaskedMemoryOpCost (I->getOpcode (), VectorTy,
5784
+ Alignment ? Alignment->value () : 0 , AS);
5785
5785
else
5786
- Cost += TTI.getMemoryOpCost (I->getOpcode (), VectorTy, Alignment, AS, I);
5786
+ Cost += TTI.getMemoryOpCost (I->getOpcode (), VectorTy,
5787
+ Alignment ? Alignment->value () : 0 , AS, I);
5787
5788
5788
5789
bool Reverse = ConsecutiveStride < 0 ;
5789
5790
if (Reverse)
@@ -5795,33 +5796,37 @@ unsigned LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,
5795
5796
unsigned VF) {
5796
5797
Type *ValTy = getMemInstValueType (I);
5797
5798
Type *VectorTy = ToVectorTy (ValTy, VF);
5798
- unsigned Alignment = getLoadStoreAlignment (I);
5799
+ const MaybeAlign Alignment = getLoadStoreAlignment (I);
5799
5800
unsigned AS = getLoadStoreAddressSpace (I);
5800
5801
if (isa<LoadInst>(I)) {
5801
5802
return TTI.getAddressComputationCost (ValTy) +
5802
- TTI.getMemoryOpCost (Instruction::Load, ValTy, Alignment, AS) +
5803
+ TTI.getMemoryOpCost (Instruction::Load, ValTy,
5804
+ Alignment ? Alignment->value () : 0 , AS) +
5803
5805
TTI.getShuffleCost (TargetTransformInfo::SK_Broadcast, VectorTy);
5804
5806
}
5805
5807
StoreInst *SI = cast<StoreInst>(I);
5806
5808
5807
5809
bool isLoopInvariantStoreValue = Legal->isUniform (SI->getValueOperand ());
5808
5810
return TTI.getAddressComputationCost (ValTy) +
5809
- TTI.getMemoryOpCost (Instruction::Store, ValTy, Alignment, AS) +
5810
- (isLoopInvariantStoreValue ? 0 : TTI.getVectorInstrCost (
5811
- Instruction::ExtractElement,
5812
- VectorTy, VF - 1 ));
5811
+ TTI.getMemoryOpCost (Instruction::Store, ValTy,
5812
+ Alignment ? Alignment->value () : 0 , AS) +
5813
+ (isLoopInvariantStoreValue
5814
+ ? 0
5815
+ : TTI.getVectorInstrCost (Instruction::ExtractElement, VectorTy,
5816
+ VF - 1 ));
5813
5817
}
5814
5818
5815
5819
unsigned LoopVectorizationCostModel::getGatherScatterCost (Instruction *I,
5816
5820
unsigned VF) {
5817
5821
Type *ValTy = getMemInstValueType (I);
5818
5822
Type *VectorTy = ToVectorTy (ValTy, VF);
5819
- unsigned Alignment = getLoadStoreAlignment (I);
5823
+ const MaybeAlign Alignment = getLoadStoreAlignment (I);
5820
5824
Value *Ptr = getLoadStorePointerOperand (I);
5821
5825
5822
5826
return TTI.getAddressComputationCost (VectorTy) +
5823
5827
TTI.getGatherScatterOpCost (I->getOpcode (), VectorTy, Ptr,
5824
- Legal->isMaskRequired (I), Alignment);
5828
+ Legal->isMaskRequired (I),
5829
+ Alignment ? Alignment->value () : 0 );
5825
5830
}
5826
5831
5827
5832
unsigned LoopVectorizationCostModel::getInterleaveGroupCost (Instruction *I,
@@ -5868,11 +5873,12 @@ unsigned LoopVectorizationCostModel::getMemoryInstructionCost(Instruction *I,
5868
5873
// moment.
5869
5874
if (VF == 1 ) {
5870
5875
Type *ValTy = getMemInstValueType (I);
5871
- unsigned Alignment = getLoadStoreAlignment (I);
5876
+ const MaybeAlign Alignment = getLoadStoreAlignment (I);
5872
5877
unsigned AS = getLoadStoreAddressSpace (I);
5873
5878
5874
5879
return TTI.getAddressComputationCost (ValTy) +
5875
- TTI.getMemoryOpCost (I->getOpcode (), ValTy, Alignment, AS, I);
5880
+ TTI.getMemoryOpCost (I->getOpcode (), ValTy,
5881
+ Alignment ? Alignment->value () : 0 , AS, I);
5876
5882
}
5877
5883
return getWideningCost (I, VF);
5878
5884
}
0 commit comments