@@ -23441,72 +23441,47 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
23441
23441
return DAG.getNode(ISD::MUL, dl, VT, R, Scale);
23442
23442
23443
23443
// If possible, lower this shift as a sequence of two shifts by
23444
- // constant plus a MOVSS/MOVSD/PBLEND instead of scalarizing it.
23444
+ // constant plus a BLENDing shuffle instead of scalarizing it.
23445
23445
// Example:
23446
23446
// (v4i32 (srl A, (build_vector < X, Y, Y, Y>)))
23447
23447
//
23448
23448
// Could be rewritten as:
23449
23449
// (v4i32 (MOVSS (srl A, <Y,Y,Y,Y>), (srl A, <X,X,X,X>)))
23450
23450
//
23451
23451
// The advantage is that the two shifts from the example would be
23452
- // lowered as X86ISD::VSRLI nodes. This would be cheaper than scalarizing
23453
- // the vector shift into four scalar shifts plus four pairs of vector
23454
- // insert/extract.
23452
+ // lowered as X86ISD::VSRLI nodes in parallel before blending.
23455
23453
if (ConstantAmt && (VT == MVT::v8i16 || VT == MVT::v4i32)) {
23456
- bool UseMOVSD = false;
23457
- bool CanBeSimplified;
23458
- // The splat value for the first packed shift (the 'X' from the example).
23459
- SDValue Amt1 = Amt->getOperand(0);
23460
- // The splat value for the second packed shift (the 'Y' from the example).
23461
- SDValue Amt2 = (VT == MVT::v4i32) ? Amt->getOperand(1) : Amt->getOperand(2);
23462
-
23463
- // See if it is possible to replace this node with a sequence of
23464
- // two shifts followed by a MOVSS/MOVSD/PBLEND.
23465
- if (VT == MVT::v4i32) {
23466
- // Check if it is legal to use a MOVSS.
23467
- CanBeSimplified = Amt2 == Amt->getOperand(2) &&
23468
- Amt2 == Amt->getOperand(3);
23469
- if (!CanBeSimplified) {
23470
- // Otherwise, check if we can still simplify this node using a MOVSD.
23471
- CanBeSimplified = Amt1 == Amt->getOperand(1) &&
23472
- Amt->getOperand(2) == Amt->getOperand(3);
23473
- UseMOVSD = true;
23474
- Amt2 = Amt->getOperand(2);
23454
+ SDValue Amt1, Amt2;
23455
+ unsigned NumElts = VT.getVectorNumElements();
23456
+ SmallVector<int, 8> ShuffleMask;
23457
+ for (unsigned i = 0; i != NumElts; ++i) {
23458
+ SDValue A = Amt->getOperand(i);
23459
+ if (A.isUndef()) {
23460
+ ShuffleMask.push_back(SM_SentinelUndef);
23461
+ continue;
23475
23462
}
23476
- } else {
23477
- // Do similar checks for the case where the machine value type
23478
- // is MVT::v8i16.
23479
- CanBeSimplified = Amt1 == Amt->getOperand(1);
23480
- for (unsigned i=3; i != 8 && CanBeSimplified; ++i)
23481
- CanBeSimplified = Amt2 == Amt->getOperand(i);
23482
-
23483
- if (!CanBeSimplified) {
23484
- UseMOVSD = true;
23485
- CanBeSimplified = true;
23486
- Amt2 = Amt->getOperand(4);
23487
- for (unsigned i=0; i != 4 && CanBeSimplified; ++i)
23488
- CanBeSimplified = Amt1 == Amt->getOperand(i);
23489
- for (unsigned j=4; j != 8 && CanBeSimplified; ++j)
23490
- CanBeSimplified = Amt2 == Amt->getOperand(j);
23463
+ if (!Amt1 || Amt1 == A) {
23464
+ ShuffleMask.push_back(i);
23465
+ Amt1 = A;
23466
+ continue;
23467
+ }
23468
+ if (!Amt2 || Amt2 == A) {
23469
+ ShuffleMask.push_back(i + NumElts);
23470
+ Amt2 = A;
23471
+ continue;
23491
23472
}
23473
+ break;
23492
23474
}
23493
23475
23494
- if (CanBeSimplified && isa<ConstantSDNode>(Amt1) &&
23476
+ if (ShuffleMask.size() == NumElts && isa<ConstantSDNode>(Amt1) &&
23495
23477
isa<ConstantSDNode>(Amt2)) {
23496
- // Replace this node with two shifts followed by a MOVSS/MOVSD/PBLEND.
23497
23478
SDValue Splat1 =
23498
23479
DAG.getConstant(cast<ConstantSDNode>(Amt1)->getAPIntValue(), dl, VT);
23499
23480
SDValue Shift1 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat1);
23500
23481
SDValue Splat2 =
23501
23482
DAG.getConstant(cast<ConstantSDNode>(Amt2)->getAPIntValue(), dl, VT);
23502
23483
SDValue Shift2 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat2);
23503
- SDValue BitCast1 = DAG.getBitcast(MVT::v4i32, Shift1);
23504
- SDValue BitCast2 = DAG.getBitcast(MVT::v4i32, Shift2);
23505
- if (UseMOVSD)
23506
- return DAG.getBitcast(VT, DAG.getVectorShuffle(MVT::v4i32, dl, BitCast1,
23507
- BitCast2, {0, 1, 6, 7}));
23508
- return DAG.getBitcast(VT, DAG.getVectorShuffle(MVT::v4i32, dl, BitCast1,
23509
- BitCast2, {0, 5, 6, 7}));
23484
+ return DAG.getVectorShuffle(VT, dl, Shift1, Shift2, ShuffleMask);
23510
23485
}
23511
23486
}
23512
23487
0 commit comments