Skip to content

Commit

Permalink
Merged master:702ccb40e2d7 into amd-gfx:121871dddd87
Browse files Browse the repository at this point in the history
Local branch amd-gfx 121871d Merged master:5d330f435e12 into amd-gfx:3c5789a6ff0c
Remote branch master 702ccb4 [InstCombine] getLogBase2(undef) -> 0.
  • Loading branch information
Sw authored and Sw committed Oct 10, 2020
2 parents 121871d + 702ccb4 commit 4a7e6e5
Show file tree
Hide file tree
Showing 7 changed files with 578 additions and 320 deletions.
1 change: 1 addition & 0 deletions llvm/cmake/modules/CheckAtomic.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ function(check_working_cxx_atomics64 varname)
std::atomic<uint64_t> x (0);
int main() {
uint64_t i = x.load(std::memory_order_relaxed);
(void)i;
return 0;
}
" ${varname})
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/Target/TargetSelectionDAG.td
Original file line number Diff line number Diff line change
Expand Up @@ -638,6 +638,7 @@ def ist : SDNode<"ISD::STORE" , SDTIStore,

def vector_shuffle : SDNode<"ISD::VECTOR_SHUFFLE", SDTVecShuffle, []>;
def build_vector : SDNode<"ISD::BUILD_VECTOR", SDTypeProfile<1, -1, []>, []>;
def splat_vector : SDNode<"ISD::SPLAT_VECTOR", SDTypeProfile<1, 1, []>, []>;
def scalar_to_vector : SDNode<"ISD::SCALAR_TO_VECTOR", SDTypeProfile<1, 1, []>,
[]>;

Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11040,6 +11040,10 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(Lowered);
return;
}
case ISD::FSHL:
case ISD::FSHR:
// Don't handle funnel shifts here.
return;
case ISD::BITCAST:
// Don't handle bitcast here.
return;
Expand Down
23 changes: 9 additions & 14 deletions llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,10 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombinerImpl &IC,
///
/// If C is a scalar/fixed width vector of known powers of 2, then this
/// function returns a new scalar/fixed width vector obtained from logBase2
/// of C.
/// of C. Undef elements in a vector are set to zero.
/// Return a null pointer otherwise.
static Constant *getLogBase2(Type *Ty, Constant *C) {
// Note that log2(iN undef) is *NOT* iN undef, because log2(iN undef) u< N.
// FIXME: just assert that C there is no undef in \p C.

static Constant *getLogBase2(Constant *C) {
Type *Ty = C->getType();
const APInt *IVal;
if (match(C, m_APInt(IVal)) && IVal->isPowerOf2())
return ConstantInt::get(Ty, IVal->logBase2());
Expand All @@ -119,8 +117,9 @@ static Constant *getLogBase2(Type *Ty, Constant *C) {
Constant *Elt = C->getAggregateElement(I);
if (!Elt)
return nullptr;
// Note that log2(iN undef) is *NOT* iN undef, because log2(iN undef) u< N.
if (isa<UndefValue>(Elt)) {
Elts.push_back(UndefValue::get(Ty->getScalarType()));
Elts.push_back(Constant::getNullValue(Ty->getScalarType()));
continue;
}
if (!match(Elt, m_APInt(IVal)) || !IVal->isPowerOf2())
Expand Down Expand Up @@ -220,11 +219,7 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) {

if (match(&I, m_Mul(m_Value(NewOp), m_Constant(C1)))) {
// Replace X*(2^C) with X << C, where C is either a scalar or a vector.
// Note that we need to sanitize undef multipliers to 1,
// to avoid introducing poison.
Constant *SafeC1 = Constant::replaceUndefsWith(
C1, ConstantInt::get(C1->getType()->getScalarType(), 1));
if (Constant *NewCst = getLogBase2(NewOp->getType(), SafeC1)) {
if (Constant *NewCst = getLogBase2(C1)) {
BinaryOperator *Shl = BinaryOperator::CreateShl(NewOp, NewCst);

if (I.hasNoUnsignedWrap())
Expand Down Expand Up @@ -913,7 +908,7 @@ struct UDivFoldAction {
static Instruction *foldUDivPow2Cst(Value *Op0, Value *Op1,
const BinaryOperator &I,
InstCombinerImpl &IC) {
Constant *C1 = getLogBase2(Op0->getType(), cast<Constant>(Op1));
Constant *C1 = getLogBase2(cast<Constant>(Op1));
if (!C1)
llvm_unreachable("Failed to constant fold udiv -> logbase2");
BinaryOperator *LShr = BinaryOperator::CreateLShr(Op0, C1);
Expand All @@ -934,7 +929,7 @@ static Instruction *foldUDivShl(Value *Op0, Value *Op1, const BinaryOperator &I,
Value *N;
if (!match(ShiftLeft, m_Shl(m_Constant(CI), m_Value(N))))
llvm_unreachable("match should never fail here!");
Constant *Log2Base = getLogBase2(N->getType(), CI);
Constant *Log2Base = getLogBase2(CI);
if (!Log2Base)
llvm_unreachable("getLogBase2 should never fail here!");
N = IC.Builder.CreateAdd(N, Log2Base);
Expand Down Expand Up @@ -1151,7 +1146,7 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) {
if (DivisorWasNegative)
Op1 = ConstantExpr::getNeg(cast<Constant>(Op1));
auto *AShr = BinaryOperator::CreateExactAShr(
Op0, getLogBase2(Ty, cast<Constant>(Op1)), I.getName());
Op0, getLogBase2(cast<Constant>(Op1)), I.getName());
if (!DivisorWasNegative)
return AShr;
Builder.Insert(AShr);
Expand Down
239 changes: 158 additions & 81 deletions llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=ppc32-- | FileCheck %s --check-prefixes=CHECK,CHECK32
; RUN: llc < %s -mtriple=ppc32-- | FileCheck %s --check-prefixes=CHECK,CHECK32,CHECK32_32
; RUN: llc < %s -mtriple=ppc32-- -mcpu=ppc64 | FileCheck %s --check-prefixes=CHECK,CHECK32,CHECK32_64
; RUN: llc < %s -mtriple=powerpc64le-- | FileCheck %s --check-prefixes=CHECK,CHECK64

declare i8 @llvm.fshl.i8(i8, i8, i8)
Expand Down Expand Up @@ -84,30 +85,55 @@ define i32 @rotl_i32(i32 %x, i32 %z) {
}

define i64 @rotl_i64(i64 %x, i64 %z) {
; CHECK32-LABEL: rotl_i64:
; CHECK32: # %bb.0:
; CHECK32-NEXT: clrlwi 5, 6, 26
; CHECK32-NEXT: subfic 8, 5, 32
; CHECK32-NEXT: neg 6, 6
; CHECK32-NEXT: slw 7, 3, 5
; CHECK32-NEXT: addi 9, 5, -32
; CHECK32-NEXT: srw 8, 4, 8
; CHECK32-NEXT: clrlwi 6, 6, 26
; CHECK32-NEXT: slw 9, 4, 9
; CHECK32-NEXT: or 7, 7, 8
; CHECK32-NEXT: subfic 8, 6, 32
; CHECK32-NEXT: or 7, 7, 9
; CHECK32-NEXT: addi 9, 6, -32
; CHECK32-NEXT: slw 8, 3, 8
; CHECK32-NEXT: srw 9, 3, 9
; CHECK32-NEXT: srw 3, 3, 6
; CHECK32-NEXT: srw 6, 4, 6
; CHECK32-NEXT: or 6, 6, 8
; CHECK32-NEXT: or 6, 6, 9
; CHECK32-NEXT: slw 4, 4, 5
; CHECK32-NEXT: or 3, 7, 3
; CHECK32-NEXT: or 4, 4, 6
; CHECK32-NEXT: blr
; CHECK32_32-LABEL: rotl_i64:
; CHECK32_32: # %bb.0:
; CHECK32_32-NEXT: clrlwi 5, 6, 26
; CHECK32_32-NEXT: subfic 8, 5, 32
; CHECK32_32-NEXT: neg 6, 6
; CHECK32_32-NEXT: slw 7, 3, 5
; CHECK32_32-NEXT: addi 9, 5, -32
; CHECK32_32-NEXT: srw 8, 4, 8
; CHECK32_32-NEXT: clrlwi 6, 6, 26
; CHECK32_32-NEXT: slw 9, 4, 9
; CHECK32_32-NEXT: or 7, 7, 8
; CHECK32_32-NEXT: subfic 8, 6, 32
; CHECK32_32-NEXT: or 7, 7, 9
; CHECK32_32-NEXT: addi 9, 6, -32
; CHECK32_32-NEXT: slw 8, 3, 8
; CHECK32_32-NEXT: srw 9, 3, 9
; CHECK32_32-NEXT: srw 3, 3, 6
; CHECK32_32-NEXT: srw 6, 4, 6
; CHECK32_32-NEXT: or 6, 6, 8
; CHECK32_32-NEXT: or 6, 6, 9
; CHECK32_32-NEXT: slw 4, 4, 5
; CHECK32_32-NEXT: or 3, 7, 3
; CHECK32_32-NEXT: or 4, 4, 6
; CHECK32_32-NEXT: blr
;
; CHECK32_64-LABEL: rotl_i64:
; CHECK32_64: # %bb.0:
; CHECK32_64-NEXT: clrlwi 5, 6, 26
; CHECK32_64-NEXT: neg 6, 6
; CHECK32_64-NEXT: subfic 8, 5, 32
; CHECK32_64-NEXT: slw 7, 3, 5
; CHECK32_64-NEXT: clrlwi 6, 6, 26
; CHECK32_64-NEXT: srw 8, 4, 8
; CHECK32_64-NEXT: addi 9, 5, -32
; CHECK32_64-NEXT: or 7, 7, 8
; CHECK32_64-NEXT: subfic 8, 6, 32
; CHECK32_64-NEXT: slw 5, 4, 5
; CHECK32_64-NEXT: slw 9, 4, 9
; CHECK32_64-NEXT: srw 10, 3, 6
; CHECK32_64-NEXT: srw 4, 4, 6
; CHECK32_64-NEXT: addi 6, 6, -32
; CHECK32_64-NEXT: slw 8, 3, 8
; CHECK32_64-NEXT: srw 3, 3, 6
; CHECK32_64-NEXT: or 4, 4, 8
; CHECK32_64-NEXT: or 6, 7, 9
; CHECK32_64-NEXT: or 4, 4, 3
; CHECK32_64-NEXT: or 3, 6, 10
; CHECK32_64-NEXT: or 4, 5, 4
; CHECK32_64-NEXT: blr
;
; CHECK64-LABEL: rotl_i64:
; CHECK64: # %bb.0:
Expand All @@ -120,13 +146,18 @@ define i64 @rotl_i64(i64 %x, i64 %z) {
; Vector rotate.

define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %z) {
; CHECK32-LABEL: rotl_v4i32:
; CHECK32: # %bb.0:
; CHECK32-NEXT: rotlw 3, 3, 7
; CHECK32-NEXT: rotlw 4, 4, 8
; CHECK32-NEXT: rotlw 5, 5, 9
; CHECK32-NEXT: rotlw 6, 6, 10
; CHECK32-NEXT: blr
; CHECK32_32-LABEL: rotl_v4i32:
; CHECK32_32: # %bb.0:
; CHECK32_32-NEXT: rotlw 3, 3, 7
; CHECK32_32-NEXT: rotlw 4, 4, 8
; CHECK32_32-NEXT: rotlw 5, 5, 9
; CHECK32_32-NEXT: rotlw 6, 6, 10
; CHECK32_32-NEXT: blr
;
; CHECK32_64-LABEL: rotl_v4i32:
; CHECK32_64: # %bb.0:
; CHECK32_64-NEXT: vrlw 2, 2, 3
; CHECK32_64-NEXT: blr
;
; CHECK64-LABEL: rotl_v4i32:
; CHECK64: # %bb.0:
Expand All @@ -139,13 +170,19 @@ define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %z) {
; Vector rotate by constant splat amount.

define <4 x i32> @rotl_v4i32_const_shift(<4 x i32> %x) {
; CHECK32-LABEL: rotl_v4i32_const_shift:
; CHECK32: # %bb.0:
; CHECK32-NEXT: rotlwi 3, 3, 3
; CHECK32-NEXT: rotlwi 4, 4, 3
; CHECK32-NEXT: rotlwi 5, 5, 3
; CHECK32-NEXT: rotlwi 6, 6, 3
; CHECK32-NEXT: blr
; CHECK32_32-LABEL: rotl_v4i32_const_shift:
; CHECK32_32: # %bb.0:
; CHECK32_32-NEXT: rotlwi 3, 3, 3
; CHECK32_32-NEXT: rotlwi 4, 4, 3
; CHECK32_32-NEXT: rotlwi 5, 5, 3
; CHECK32_32-NEXT: rotlwi 6, 6, 3
; CHECK32_32-NEXT: blr
;
; CHECK32_64-LABEL: rotl_v4i32_const_shift:
; CHECK32_64: # %bb.0:
; CHECK32_64-NEXT: vspltisw 3, 3
; CHECK32_64-NEXT: vrlw 2, 2, 3
; CHECK32_64-NEXT: blr
;
; CHECK64-LABEL: rotl_v4i32_const_shift:
; CHECK64: # %bb.0:
Expand Down Expand Up @@ -217,30 +254,55 @@ define i32 @rotr_i32(i32 %x, i32 %z) {
}

define i64 @rotr_i64(i64 %x, i64 %z) {
; CHECK32-LABEL: rotr_i64:
; CHECK32: # %bb.0:
; CHECK32-NEXT: clrlwi 5, 6, 26
; CHECK32-NEXT: subfic 8, 5, 32
; CHECK32-NEXT: neg 6, 6
; CHECK32-NEXT: srw 7, 4, 5
; CHECK32-NEXT: addi 9, 5, -32
; CHECK32-NEXT: slw 8, 3, 8
; CHECK32-NEXT: clrlwi 6, 6, 26
; CHECK32-NEXT: srw 9, 3, 9
; CHECK32-NEXT: or 7, 7, 8
; CHECK32-NEXT: subfic 8, 6, 32
; CHECK32-NEXT: or 7, 7, 9
; CHECK32-NEXT: addi 9, 6, -32
; CHECK32-NEXT: srw 8, 4, 8
; CHECK32-NEXT: slw 9, 4, 9
; CHECK32-NEXT: slw 4, 4, 6
; CHECK32-NEXT: slw 6, 3, 6
; CHECK32-NEXT: or 6, 6, 8
; CHECK32-NEXT: or 6, 6, 9
; CHECK32-NEXT: srw 3, 3, 5
; CHECK32-NEXT: or 4, 7, 4
; CHECK32-NEXT: or 3, 3, 6
; CHECK32-NEXT: blr
; CHECK32_32-LABEL: rotr_i64:
; CHECK32_32: # %bb.0:
; CHECK32_32-NEXT: clrlwi 5, 6, 26
; CHECK32_32-NEXT: subfic 8, 5, 32
; CHECK32_32-NEXT: neg 6, 6
; CHECK32_32-NEXT: srw 7, 4, 5
; CHECK32_32-NEXT: addi 9, 5, -32
; CHECK32_32-NEXT: slw 8, 3, 8
; CHECK32_32-NEXT: clrlwi 6, 6, 26
; CHECK32_32-NEXT: srw 9, 3, 9
; CHECK32_32-NEXT: or 7, 7, 8
; CHECK32_32-NEXT: subfic 8, 6, 32
; CHECK32_32-NEXT: or 7, 7, 9
; CHECK32_32-NEXT: addi 9, 6, -32
; CHECK32_32-NEXT: srw 8, 4, 8
; CHECK32_32-NEXT: slw 9, 4, 9
; CHECK32_32-NEXT: slw 4, 4, 6
; CHECK32_32-NEXT: slw 6, 3, 6
; CHECK32_32-NEXT: or 6, 6, 8
; CHECK32_32-NEXT: or 6, 6, 9
; CHECK32_32-NEXT: srw 3, 3, 5
; CHECK32_32-NEXT: or 4, 7, 4
; CHECK32_32-NEXT: or 3, 3, 6
; CHECK32_32-NEXT: blr
;
; CHECK32_64-LABEL: rotr_i64:
; CHECK32_64: # %bb.0:
; CHECK32_64-NEXT: clrlwi 5, 6, 26
; CHECK32_64-NEXT: neg 6, 6
; CHECK32_64-NEXT: subfic 8, 5, 32
; CHECK32_64-NEXT: srw 7, 4, 5
; CHECK32_64-NEXT: clrlwi 6, 6, 26
; CHECK32_64-NEXT: slw 8, 3, 8
; CHECK32_64-NEXT: addi 9, 5, -32
; CHECK32_64-NEXT: or 7, 7, 8
; CHECK32_64-NEXT: subfic 8, 6, 32
; CHECK32_64-NEXT: srw 5, 3, 5
; CHECK32_64-NEXT: srw 9, 3, 9
; CHECK32_64-NEXT: slw 10, 4, 6
; CHECK32_64-NEXT: slw 3, 3, 6
; CHECK32_64-NEXT: addi 6, 6, -32
; CHECK32_64-NEXT: srw 8, 4, 8
; CHECK32_64-NEXT: slw 4, 4, 6
; CHECK32_64-NEXT: or 3, 3, 8
; CHECK32_64-NEXT: or 6, 7, 9
; CHECK32_64-NEXT: or 3, 3, 4
; CHECK32_64-NEXT: or 4, 6, 10
; CHECK32_64-NEXT: or 3, 5, 3
; CHECK32_64-NEXT: blr
;
; CHECK64-LABEL: rotr_i64:
; CHECK64: # %bb.0:
Expand All @@ -254,17 +316,24 @@ define i64 @rotr_i64(i64 %x, i64 %z) {
; Vector rotate.

define <4 x i32> @rotr_v4i32(<4 x i32> %x, <4 x i32> %z) {
; CHECK32-LABEL: rotr_v4i32:
; CHECK32: # %bb.0:
; CHECK32-NEXT: neg 7, 7
; CHECK32-NEXT: neg 8, 8
; CHECK32-NEXT: neg 9, 9
; CHECK32-NEXT: neg 10, 10
; CHECK32-NEXT: rotlw 3, 3, 7
; CHECK32-NEXT: rotlw 4, 4, 8
; CHECK32-NEXT: rotlw 5, 5, 9
; CHECK32-NEXT: rotlw 6, 6, 10
; CHECK32-NEXT: blr
; CHECK32_32-LABEL: rotr_v4i32:
; CHECK32_32: # %bb.0:
; CHECK32_32-NEXT: neg 7, 7
; CHECK32_32-NEXT: neg 8, 8
; CHECK32_32-NEXT: neg 9, 9
; CHECK32_32-NEXT: neg 10, 10
; CHECK32_32-NEXT: rotlw 3, 3, 7
; CHECK32_32-NEXT: rotlw 4, 4, 8
; CHECK32_32-NEXT: rotlw 5, 5, 9
; CHECK32_32-NEXT: rotlw 6, 6, 10
; CHECK32_32-NEXT: blr
;
; CHECK32_64-LABEL: rotr_v4i32:
; CHECK32_64: # %bb.0:
; CHECK32_64-NEXT: vxor 4, 4, 4
; CHECK32_64-NEXT: vsubuwm 3, 4, 3
; CHECK32_64-NEXT: vrlw 2, 2, 3
; CHECK32_64-NEXT: blr
;
; CHECK64-LABEL: rotr_v4i32:
; CHECK64: # %bb.0:
Expand All @@ -279,13 +348,21 @@ define <4 x i32> @rotr_v4i32(<4 x i32> %x, <4 x i32> %z) {
; Vector rotate by constant splat amount.

define <4 x i32> @rotr_v4i32_const_shift(<4 x i32> %x) {
; CHECK32-LABEL: rotr_v4i32_const_shift:
; CHECK32: # %bb.0:
; CHECK32-NEXT: rotlwi 3, 3, 29
; CHECK32-NEXT: rotlwi 4, 4, 29
; CHECK32-NEXT: rotlwi 5, 5, 29
; CHECK32-NEXT: rotlwi 6, 6, 29
; CHECK32-NEXT: blr
; CHECK32_32-LABEL: rotr_v4i32_const_shift:
; CHECK32_32: # %bb.0:
; CHECK32_32-NEXT: rotlwi 3, 3, 29
; CHECK32_32-NEXT: rotlwi 4, 4, 29
; CHECK32_32-NEXT: rotlwi 5, 5, 29
; CHECK32_32-NEXT: rotlwi 6, 6, 29
; CHECK32_32-NEXT: blr
;
; CHECK32_64-LABEL: rotr_v4i32_const_shift:
; CHECK32_64: # %bb.0:
; CHECK32_64-NEXT: vspltisw 3, -16
; CHECK32_64-NEXT: vspltisw 4, 13
; CHECK32_64-NEXT: vsubuwm 3, 4, 3
; CHECK32_64-NEXT: vrlw 2, 2, 3
; CHECK32_64-NEXT: blr
;
; CHECK64-LABEL: rotr_v4i32_const_shift:
; CHECK64: # %bb.0:
Expand Down
Loading

0 comments on commit 4a7e6e5

Please sign in to comment.