@@ -173,15 +173,38 @@ namespace {
173
173
}
174
174
};
175
175
176
- struct PredicatedMI {
177
- MachineInstr *MI = nullptr ;
178
- SetVector<MachineInstr*> Predicates;
176
+ class VPTBlock {
177
+ SmallVector<MachineInstr *, 4 > Insts;
179
178
180
179
public:
181
- PredicatedMI (MachineInstr *I, SetVector<MachineInstr *> &Preds) : MI(I) {
182
- assert (I && " Instruction must not be null!" );
183
- Predicates.insert (Preds.begin (), Preds.end ());
180
+ VPTBlock (MachineInstr *MI) { Insts.push_back (MI); }
181
+
182
+ // Have we found an instruction within the block which defines the vpr? If
183
+ // so, not all the instructions in the block will have the same predicate.
184
+ bool hasUniformPredicate () { return getDivergent () == nullptr ; }
185
+
186
+ // If it exists, return the first internal instruction which modifies the
187
+ // VPR.
188
+ MachineInstr *getDivergent () {
189
+ SmallVectorImpl<MachineInstr *> &Insts = getInsts ();
190
+ for (unsigned i = 1 ; i < Insts.size (); ++i) {
191
+ MachineInstr *Next = Insts[i];
192
+ if (isVectorPredicate (Next))
193
+ return Next; // Found an instruction altering the vpr.
194
+ }
195
+ return nullptr ;
184
196
}
197
+
198
+ void insert (MachineInstr *MI) {
199
+ Insts.push_back (MI);
200
+ // VPT/VPST + 4 predicated instructions.
201
+ assert (Insts.size () <= 5 && " Too many instructions in VPT block!" );
202
+ }
203
+
204
+ bool containsVCTP () const { return llvm::any_of (Insts, isVCTP); }
205
+
206
+ unsigned size () const { return Insts.size (); }
207
+ SmallVectorImpl<MachineInstr *> &getInsts () { return Insts; }
185
208
};
186
209
187
210
// Represent the current state of the VPR and hold all instances which
@@ -192,86 +215,55 @@ namespace {
192
215
class VPTState {
193
216
friend struct LowOverheadLoop ;
194
217
195
- SmallVector<MachineInstr *, 4 > Insts;
196
-
197
- static SmallVector<VPTState, 4 > Blocks;
198
- static SetVector<MachineInstr *> CurrentPredicates;
199
- static std::map<MachineInstr *,
200
- std::unique_ptr<PredicatedMI>> PredicatedInsts;
218
+ SmallVector<VPTBlock, 4 > Blocks;
219
+ SetVector<MachineInstr *> CurrentPredicates;
220
+ std::map<MachineInstr *, SetVector<MachineInstr *>> PredicatedInsts;
201
221
202
- static void CreateVPTBlock (MachineInstr *MI) {
222
+ void CreateVPTBlock (MachineInstr *MI) {
203
223
assert ((CurrentPredicates.size () || MI->getParent ()->isLiveIn (ARM::VPR))
204
224
&& " Can't begin VPT without predicate" );
205
225
Blocks.emplace_back (MI);
206
226
// The execution of MI is predicated upon the current set of instructions
207
227
// that are AND'ed together to form the VPR predicate value. In the case
208
228
// that MI is a VPT, CurrentPredicates will also just be MI.
209
- PredicatedInsts.emplace (
210
- MI, std::make_unique<PredicatedMI>(MI, CurrentPredicates));
229
+ PredicatedInsts[MI] = CurrentPredicates;
211
230
}
212
231
213
- static void reset () {
214
- Blocks.clear ();
215
- PredicatedInsts.clear ();
216
- CurrentPredicates.clear ();
217
- }
218
-
219
- static void addInst (MachineInstr *MI) {
232
+ void addInst (MachineInstr *MI) {
220
233
Blocks.back ().insert (MI);
221
- PredicatedInsts.emplace (
222
- MI, std::make_unique<PredicatedMI>(MI, CurrentPredicates));
234
+ PredicatedInsts[MI] = CurrentPredicates;
223
235
}
224
236
225
- static void addPredicate (MachineInstr *MI) {
237
+ void addPredicate (MachineInstr *MI) {
226
238
LLVM_DEBUG (dbgs () << " ARM Loops: Adding VPT Predicate: " << *MI);
227
239
CurrentPredicates.insert (MI);
228
240
}
229
241
230
- static void resetPredicate (MachineInstr *MI) {
242
+ void resetPredicate (MachineInstr *MI) {
231
243
LLVM_DEBUG (dbgs () << " ARM Loops: Resetting VPT Predicate: " << *MI);
232
244
CurrentPredicates.clear ();
233
245
CurrentPredicates.insert (MI);
234
246
}
235
247
236
248
public:
237
- // Have we found an instruction within the block which defines the vpr? If
238
- // so, not all the instructions in the block will have the same predicate.
239
- static bool hasUniformPredicate (VPTState &Block) {
240
- return getDivergent (Block) == nullptr ;
241
- }
242
-
243
- // If it exists, return the first internal instruction which modifies the
244
- // VPR.
245
- static MachineInstr *getDivergent (VPTState &Block) {
246
- SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts ();
247
- for (unsigned i = 1 ; i < Insts.size (); ++i) {
248
- MachineInstr *Next = Insts[i];
249
- if (isVectorPredicate (Next))
250
- return Next; // Found an instruction altering the vpr.
251
- }
252
- return nullptr ;
253
- }
254
-
255
249
// Return whether the given instruction is predicated upon a VCTP.
256
- static bool isPredicatedOnVCTP (MachineInstr *MI, bool Exclusive = false ) {
257
- SetVector<MachineInstr *> &Predicates = PredicatedInsts[MI]-> Predicates ;
250
+ bool isPredicatedOnVCTP (MachineInstr *MI, bool Exclusive = false ) {
251
+ SetVector<MachineInstr *> &Predicates = PredicatedInsts[MI];
258
252
if (Exclusive && Predicates.size () != 1 )
259
253
return false ;
260
254
return llvm::any_of (Predicates, isVCTP);
261
255
}
262
256
263
257
// Is the VPST, controlling the block entry, predicated upon a VCTP.
264
- static bool isEntryPredicatedOnVCTP (VPTState &Block,
265
- bool Exclusive = false ) {
258
+ bool isEntryPredicatedOnVCTP (VPTBlock &Block, bool Exclusive = false ) {
266
259
SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts ();
267
260
return isPredicatedOnVCTP (Insts.front (), Exclusive);
268
261
}
269
262
270
263
// If this block begins with a VPT, we can check whether it's using
271
264
// at least one predicated input(s), as well as possible loop invariant
272
265
// which would result in it being implicitly predicated.
273
- static bool hasImplicitlyValidVPT (VPTState &Block,
274
- ReachingDefAnalysis &RDA) {
266
+ bool hasImplicitlyValidVPT (VPTBlock &Block, ReachingDefAnalysis &RDA) {
275
267
SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts ();
276
268
MachineInstr *VPT = Insts.front ();
277
269
assert (isVPTOpcode (VPT->getOpcode ()) &&
@@ -308,7 +300,7 @@ namespace {
308
300
(IsOperandPredicated (VPT, 2 ) || IsOperandInvariant (VPT, 2 ));
309
301
}
310
302
311
- static bool isValid (ReachingDefAnalysis &RDA) {
303
+ bool isValid (ReachingDefAnalysis &RDA) {
312
304
// All predication within the loop should be based on vctp. If the block
313
305
// isn't predicated on entry, check whether the vctp is within the block
314
306
// and that all other instructions are then predicated on it.
@@ -344,21 +336,6 @@ namespace {
344
336
}
345
337
return true ;
346
338
}
347
-
348
- VPTState (MachineInstr *MI) { Insts.push_back (MI); }
349
-
350
- void insert (MachineInstr *MI) {
351
- Insts.push_back (MI);
352
- // VPT/VPST + 4 predicated instructions.
353
- assert (Insts.size () <= 5 && " Too many instructions in VPT block!" );
354
- }
355
-
356
- bool containsVCTP () const {
357
- return llvm::any_of (Insts, isVCTP);
358
- }
359
-
360
- unsigned size () const { return Insts.size (); }
361
- SmallVectorImpl<MachineInstr *> &getInsts () { return Insts; }
362
339
};
363
340
364
341
struct LowOverheadLoop {
@@ -383,6 +360,7 @@ namespace {
383
360
SmallPtrSet<MachineInstr *, 4 > VMOVCopies;
384
361
bool Revert = false ;
385
362
bool CannotTailPredicate = false ;
363
+ VPTState VPTstate;
386
364
387
365
LowOverheadLoop (MachineLoop &ML, MachineLoopInfo &MLI,
388
366
ReachingDefAnalysis &RDA, const TargetRegisterInfo &TRI,
@@ -394,7 +372,6 @@ namespace {
394
372
Preheader = MBB;
395
373
else if (auto *MBB = MLI.findLoopPreheader (&ML, true , true ))
396
374
Preheader = MBB;
397
- VPTState::reset ();
398
375
}
399
376
400
377
// If this is an MVE instruction, check that we know how to use tail
@@ -434,9 +411,7 @@ namespace {
434
411
return Start && Dec && End;
435
412
}
436
413
437
- SmallVectorImpl<VPTState> &getVPTBlocks () {
438
- return VPTState::Blocks;
439
- }
414
+ SmallVectorImpl<VPTBlock> &getVPTBlocks () { return VPTstate.Blocks ; }
440
415
441
416
// Return the operand for the loop start instruction. This will be the loop
442
417
// iteration count, or the number of elements if we're tail predicating.
@@ -529,11 +504,6 @@ namespace {
529
504
530
505
char ARMLowOverheadLoops::ID = 0 ;
531
506
532
- SmallVector<VPTState, 4 > VPTState::Blocks;
533
- SetVector<MachineInstr *> VPTState::CurrentPredicates;
534
- std::map<MachineInstr *,
535
- std::unique_ptr<PredicatedMI>> VPTState::PredicatedInsts;
536
-
537
507
INITIALIZE_PASS (ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME,
538
508
false , false )
539
509
@@ -622,7 +592,7 @@ bool LowOverheadLoop::ValidateTailPredicate() {
622
592
return false ;
623
593
}
624
594
625
- if (!VPTState:: isValid (RDA)) {
595
+ if (!VPTstate. isValid (RDA)) {
626
596
LLVM_DEBUG (dbgs () << " ARM Loops: Invalid VPT state.\n " );
627
597
return false ;
628
598
}
@@ -1250,7 +1220,7 @@ bool LowOverheadLoop::ValidateMVEInst(MachineInstr *MI) {
1250
1220
continue ;
1251
1221
1252
1222
if (ARM::isVpred (Op.value ().OperandType )) {
1253
- VPTState:: addInst (MI);
1223
+ VPTstate. addInst (MI);
1254
1224
IsUse = true ;
1255
1225
} else if (MI->getOpcode () != ARM::MVE_VPST) {
1256
1226
LLVM_DEBUG (dbgs () << " ARM Loops: Found instruction using vpr: " << *MI);
@@ -1287,15 +1257,15 @@ bool LowOverheadLoop::ValidateMVEInst(MachineInstr *MI) {
1287
1257
// Clear the existing predicate when we're not in VPT Active state,
1288
1258
// otherwise we add to it.
1289
1259
if (!isVectorPredicated (MI))
1290
- VPTState:: resetPredicate (MI);
1260
+ VPTstate. resetPredicate (MI);
1291
1261
else
1292
- VPTState:: addPredicate (MI);
1262
+ VPTstate. addPredicate (MI);
1293
1263
}
1294
1264
1295
1265
// Finally once the predicate has been modified, we can start a new VPT
1296
1266
// block if necessary.
1297
1267
if (isVPTOpcode (MI->getOpcode ()))
1298
- VPTState:: CreateVPTBlock (MI);
1268
+ VPTstate. CreateVPTBlock (MI);
1299
1269
1300
1270
return true ;
1301
1271
}
@@ -1328,7 +1298,6 @@ bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) {
1328
1298
}
1329
1299
1330
1300
bool ARMLowOverheadLoops::ProcessLoop (MachineLoop *ML) {
1331
-
1332
1301
bool Changed = false ;
1333
1302
1334
1303
// Process inner loops first.
@@ -1611,9 +1580,9 @@ void ARMLowOverheadLoops::ConvertVPTBlocks(LowOverheadLoop &LoLoop) {
1611
1580
TheVCMP = nullptr ;
1612
1581
};
1613
1582
1614
- if (VPTState:: isEntryPredicatedOnVCTP (Block, /* exclusive*/ true )) {
1583
+ if (LoLoop. VPTstate . isEntryPredicatedOnVCTP (Block, /* exclusive*/ true )) {
1615
1584
MachineInstr *VPST = Insts.front ();
1616
- if (VPTState:: hasUniformPredicate (Block )) {
1585
+ if (Block. hasUniformPredicate ()) {
1617
1586
// A vpt block starting with VPST, is only predicated upon vctp and has no
1618
1587
// internal vpr defs:
1619
1588
// - Remove vpst.
@@ -1629,7 +1598,7 @@ void ARMLowOverheadLoops::ConvertVPTBlocks(LowOverheadLoop &LoLoop) {
1629
1598
// we come across the divergent vpr def.
1630
1599
// - Insert a new vpst to predicate the instruction(s) that following
1631
1600
// the divergent vpr def.
1632
- MachineInstr *Divergent = VPTState:: getDivergent (Block );
1601
+ MachineInstr *Divergent = Block. getDivergent ();
1633
1602
MachineBasicBlock *MBB = Divergent->getParent ();
1634
1603
auto DivergentNext = ++MachineBasicBlock::iterator (Divergent);
1635
1604
while (DivergentNext != MBB->end () && DivergentNext->isDebugInstr ())
0 commit comments