From 50c743fa713002fe4e0c76d23043e6c1f9e9fe6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1vid=20Bolvansk=C3=BD?= Date: Thu, 13 Aug 2020 12:45:14 +0200 Subject: [PATCH 01/23] [BPI] Improve static heuristics for integer comparisons Similarly as for pointers, even for integers a == b is usually false. GCC also uses this heuristic. Reviewed By: ebrevnov Differential Revision: https://reviews.llvm.org/D85781 --- .../test/profile/Linux/counter_promo_for.c | 16 +- .../test/profile/Linux/counter_promo_while.c | 14 +- .../llvm/Analysis/BranchProbabilityInfo.h | 2 +- llvm/lib/Analysis/BranchProbabilityInfo.cpp | 27 +- ...ro_heuristics.ll => integer_heuristics.ll} | 51 + .../Analysis/BranchProbabilityInfo/loop.ll | 16 +- llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll | 1 - .../CodeGen/AArch64/branch-relax-alignment.ll | 27 +- .../AArch64/combine-comparisons-by-cse.ll | 441 +++++++-- llvm/test/CodeGen/AArch64/cond-br-tuning.ll | 139 ++- .../CodeGen/AArch64/fast-isel-cmp-branch.ll | 347 +++++-- .../CodeGen/ARM/2011-12-14-machine-sink.ll | 7 +- llvm/test/CodeGen/ARM/cmpxchg-weak.ll | 53 +- llvm/test/CodeGen/ARM/lsr-unfolded-offset.ll | 8 +- llvm/test/CodeGen/ARM/machine-cse-cmp.ll | 2 +- llvm/test/CodeGen/Hexagon/newvaluejump2.ll | 30 +- llvm/test/CodeGen/Mips/brcongt.ll | 25 +- llvm/test/CodeGen/Mips/brconlt.ll | 45 +- llvm/test/CodeGen/Mips/brconne.ll | 26 +- .../Mips/compactbranches/no-beqzc-bnezc.ll | 377 +++++++- llvm/test/CodeGen/Mips/lcb2.ll | 300 +++++- llvm/test/CodeGen/Mips/lcb5.ll | 352 ++++++- .../compact-branches-long-branch.ll | 233 ++++- llvm/test/CodeGen/Mips/seleq.ll | 84 +- llvm/test/CodeGen/Mips/selle.ll | 82 +- llvm/test/CodeGen/PowerPC/brcond.ll | 583 +++++++++--- .../memCmpUsedInZeroEqualityComparison.ll | 18 +- .../PowerPC/redundant-copy-after-tail-dup.ll | 28 +- llvm/test/CodeGen/RISCV/branch.ll | 45 +- .../RISCV/rv64m-w-insts-legalization.ll | 10 +- llvm/test/CodeGen/SystemZ/int-cmp-37.ll | 78 +- llvm/test/CodeGen/SystemZ/int-cmp-40.ll | 78 +- .../LowOverheadLoops/mve-float-loops.ll | 3 +- .../varying-outer-2d-reduction.ll | 21 +- .../CodeGen/Thumb2/mve-postinc-distribute.ll | 3 +- llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll | 139 ++- llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll | 113 ++- llvm/test/CodeGen/Thumb2/thumb2-branch.ll | 4 +- llvm/test/CodeGen/X86/3addr-16bit.ll | 80 +- llvm/test/CodeGen/X86/absolute-cmp.ll | 48 +- llvm/test/CodeGen/X86/atomic-flags.ll | 97 +- llvm/test/CodeGen/X86/atomic-unordered.ll | 20 +- llvm/test/CodeGen/X86/bmi.ll | 34 +- llvm/test/CodeGen/X86/bt.ll | 56 +- .../CodeGen/X86/conditional-tailcall-pgso.ll | 48 +- llvm/test/CodeGen/X86/conditional-tailcall.ll | 48 +- .../test/CodeGen/X86/fast-isel-cmp-branch2.ll | 317 +++++-- llvm/test/CodeGen/X86/funnel-shift.ll | 18 +- .../X86/indirect-branch-tracking-eh2.ll | 220 ++++- llvm/test/CodeGen/X86/jump_sign.ll | 14 +- llvm/test/CodeGen/X86/lsr-negative-stride.ll | 9 +- llvm/test/CodeGen/X86/machine-cse.ll | 7 +- .../CodeGen/X86/memcmp-more-load-pairs.ll | 875 +++++++++--------- llvm/test/CodeGen/X86/memcmp-optsize.ll | 158 ++-- llvm/test/CodeGen/X86/memcmp-pgso.ll | 166 ++-- llvm/test/CodeGen/X86/memcmp.ll | 433 ++++----- llvm/test/CodeGen/X86/neg_cmp.ll | 16 +- llvm/test/CodeGen/X86/nobt.ll | 14 +- llvm/test/CodeGen/X86/pr29170.ll | 10 +- llvm/test/CodeGen/X86/wide-integer-cmp.ll | 20 +- llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll | 23 +- .../PGOProfile/counter_promo_mexits.ll | 127 ++- llvm/test/Transforms/PGOProfile/landingpad.ll | 3 +- 63 files changed, 4757 insertions(+), 1932 deletions(-) rename llvm/test/Analysis/BranchProbabilityInfo/{zero_heuristics.ll => integer_heuristics.ll} (67%) diff --git a/compiler-rt/test/profile/Linux/counter_promo_for.c b/compiler-rt/test/profile/Linux/counter_promo_for.c index 0efebdc95271db..7cab70b08773bc 100644 --- a/compiler-rt/test/profile/Linux/counter_promo_for.c +++ b/compiler-rt/test/profile/Linux/counter_promo_for.c @@ -2,7 +2,7 @@ // RUN: rm -fr %t.nopromo.prof // RUN: %clang_pgogen=%t.promo.prof/ -o %t.promo.gen -O2 %s // RUN: %clang_pgogen=%t.promo.prof/ -o %t.promo.gen.ll -emit-llvm -S -O2 %s -// RUN: cat %t.promo.gen.ll | FileCheck --check-prefix=PROMO %s +// RUN: cp %t.promo.gen.ll /tmp/d.txt ; cat %t.promo.gen.ll | FileCheck --check-prefix=PROMO %s // RUN: %run %t.promo.gen // RUN: llvm-profdata merge -o %t.promo.profdata %t.promo.prof/ // RUN: llvm-profdata show --counts --all-functions %t.promo.profdata > %t.promo.dump @@ -22,23 +22,23 @@ __attribute__((noinline)) void foo(int n, int N) { // PROMO: load{{.*}}@__profc_foo{{.*}} 3){{.*}} // PROMO-NEXT: add // PROMO-NEXT: store{{.*}}@__profc_foo{{.*}} 3){{.*}} -// PROMO: load{{.*}}@__profc_foo{{.*}} 0){{.*}} -// PROMO-NEXT: add -// PROMO-NEXT: store{{.*}}@__profc_foo{{.*}} 0){{.*}} -// PROMO-NEXT: load{{.*}}@__profc_foo{{.*}} 1){{.*}} +// PROMO: load{{.*}}@__profc_foo{{.*}} 1){{.*}} // PROMO-NEXT: add // PROMO-NEXT: store{{.*}}@__profc_foo{{.*}} 1){{.*}} +// PROMO-NEXT: load{{.*}}@__profc_foo{{.*}} 0){{.*}} +// PROMO-NEXT: add +// PROMO-NEXT: store{{.*}}@__profc_foo{{.*}} 0){{.*}} // PROMO: load{{.*}}@__profc_foo{{.*}} 2){{.*}} // PROMO-NEXT: add // PROMO-NEXT: store{{.*}}@__profc_foo{{.*}} 2){{.*}} // // NOPROMO-LABEL: @foo -// NOPROMO: load{{.*}}@__profc_foo{{.*}} 0){{.*}} -// NOPROMO-NEXT: add -// NOPROMO-NEXT: store{{.*}}@__profc_foo{{.*}} 0){{.*}} // NOPROMO: load{{.*}}@__profc_foo{{.*}} 1){{.*}} // NOPROMO-NEXT: add // NOPROMO-NEXT: store{{.*}}@__profc_foo{{.*}} 1){{.*}} +// NOPROMO: load{{.*}}@__profc_foo{{.*}} 0){{.*}} +// NOPROMO-NEXT: add +// NOPROMO-NEXT: store{{.*}}@__profc_foo{{.*}} 0){{.*}} // NOPROMO: load{{.*}}@__profc_foo{{.*}} 2){{.*}} // NOPROMO-NEXT: add // NOPROMO-NEXT: store{{.*}}@__profc_foo{{.*}} 2){{.*}} diff --git a/compiler-rt/test/profile/Linux/counter_promo_while.c b/compiler-rt/test/profile/Linux/counter_promo_while.c index 183ef8543affb5..fdd77982c2bf69 100644 --- a/compiler-rt/test/profile/Linux/counter_promo_while.c +++ b/compiler-rt/test/profile/Linux/counter_promo_while.c @@ -17,23 +17,23 @@ int g; __attribute__((noinline)) void bar(int i) { g += i; } __attribute__((noinline)) void foo(int n, int N) { // PROMO-LABEL: @foo -// PROMO: load{{.*}}@__profc_foo{{.*}} 0){{.*}} -// PROMO-NEXT: add -// PROMO-NEXT: store{{.*}}@__profc_foo{{.*}} 0){{.*}} -// PROMO-NEXT: load{{.*}}@__profc_foo{{.*}} 1){{.*}} +// PROMO: load{{.*}}@__profc_foo{{.*}} 1){{.*}} // PROMO-NEXT: add // PROMO-NEXT: store{{.*}}@__profc_foo{{.*}} 1){{.*}} +// PROMO-NEXT: load{{.*}}@__profc_foo{{.*}} 0){{.*}} +// PROMO-NEXT: add +// PROMO-NEXT: store{{.*}}@__profc_foo{{.*}} 0){{.*}} // PROMO-NEXT: load{{.*}}@__profc_foo{{.*}} 2){{.*}} // PROMO-NEXT: add // PROMO-NEXT: store{{.*}}@__profc_foo{{.*}} 2){{.*}} // // NOPROMO-LABEL: @foo -// NOPROMO: load{{.*}}@__profc_foo{{.*}} 0){{.*}} -// NOPROMO-NEXT: add -// NOPROMO-NEXT: store{{.*}}@__profc_foo{{.*}} 0){{.*}} // NOPROMO: load{{.*}}@__profc_foo{{.*}} 1){{.*}} // NOPROMO-NEXT: add // NOPROMO-NEXT: store{{.*}}@__profc_foo{{.*}} 1){{.*}} +// NOPROMO: load{{.*}}@__profc_foo{{.*}} 0){{.*}} +// NOPROMO-NEXT: add +// NOPROMO-NEXT: store{{.*}}@__profc_foo{{.*}} 0){{.*}} // NOPROMO: load{{.*}}@__profc_foo{{.*}} 2){{.*}} // NOPROMO-NEXT: add // NOPROMO-NEXT: store{{.*}}@__profc_foo{{.*}} 2){{.*}} diff --git a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h index 447f14501cb65d..2c736fe9c1f07a 100644 --- a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h +++ b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h @@ -315,7 +315,7 @@ class BranchProbabilityInfo { bool calcColdCallHeuristics(const BasicBlock *BB); bool calcPointerHeuristics(const BasicBlock *BB); bool calcLoopBranchHeuristics(const BasicBlock *BB, const LoopInfo &LI); - bool calcZeroHeuristics(const BasicBlock *BB, const TargetLibraryInfo *TLI); + bool calcIntegerHeuristics(const BasicBlock *BB, const TargetLibraryInfo *TLI); bool calcFloatingPointHeuristics(const BasicBlock *BB); bool calcInvokeHeuristics(const BasicBlock *BB); }; diff --git a/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/llvm/lib/Analysis/BranchProbabilityInfo.cpp index eae2c4ea9da887..6df90e66b84b8c 100644 --- a/llvm/lib/Analysis/BranchProbabilityInfo.cpp +++ b/llvm/lib/Analysis/BranchProbabilityInfo.cpp @@ -122,8 +122,8 @@ static const uint32_t CC_NONTAKEN_WEIGHT = 64; static const uint32_t PH_TAKEN_WEIGHT = 20; static const uint32_t PH_NONTAKEN_WEIGHT = 12; -static const uint32_t ZH_TAKEN_WEIGHT = 20; -static const uint32_t ZH_NONTAKEN_WEIGHT = 12; +static const uint32_t INTH_TAKEN_WEIGHT = 20; +static const uint32_t INTH_NONTAKEN_WEIGHT = 12; static const uint32_t FPH_TAKEN_WEIGHT = 20; static const uint32_t FPH_NONTAKEN_WEIGHT = 12; @@ -856,7 +856,7 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(const BasicBlock *BB, return true; } -bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB, +bool BranchProbabilityInfo::calcIntegerHeuristics(const BasicBlock *BB, const TargetLibraryInfo *TLI) { const BranchInst *BI = dyn_cast(BB->getTerminator()); if (!BI || !BI->isConditional()) @@ -873,10 +873,21 @@ bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB, return dyn_cast(V); }; + BranchProbability TakenProb(INTH_TAKEN_WEIGHT, + INTH_TAKEN_WEIGHT + INTH_NONTAKEN_WEIGHT); + BranchProbability UntakenProb(INTH_NONTAKEN_WEIGHT, + INTH_TAKEN_WEIGHT + INTH_NONTAKEN_WEIGHT); Value *RHS = CI->getOperand(1); ConstantInt *CV = GetConstantInt(RHS); - if (!CV) - return false; + if (!CV) { + // X == Y -> Unlikely + // Otherwise -> Likely + if (CI->isTrueWhenEqual()) + std::swap(TakenProb, UntakenProb); + setEdgeProbability( + BB, SmallVector({TakenProb, UntakenProb})); + return true; + } // If the LHS is the result of AND'ing a value with a single bit bitmask, // we don't have information about probabilities. @@ -964,10 +975,6 @@ bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB, return false; } - BranchProbability TakenProb(ZH_TAKEN_WEIGHT, - ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT); - BranchProbability UntakenProb(ZH_NONTAKEN_WEIGHT, - ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT); if (!isProb) std::swap(TakenProb, UntakenProb); @@ -1221,7 +1228,7 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI, continue; if (calcPointerHeuristics(BB)) continue; - if (calcZeroHeuristics(BB, TLI)) + if (calcIntegerHeuristics(BB, TLI)) continue; if (calcFloatingPointHeuristics(BB)) continue; diff --git a/llvm/test/Analysis/BranchProbabilityInfo/zero_heuristics.ll b/llvm/test/Analysis/BranchProbabilityInfo/integer_heuristics.ll similarity index 67% rename from llvm/test/Analysis/BranchProbabilityInfo/zero_heuristics.ll rename to llvm/test/Analysis/BranchProbabilityInfo/integer_heuristics.ll index c6e1cb8c265a3b..c1d894a712fcfa 100644 --- a/llvm/test/Analysis/BranchProbabilityInfo/zero_heuristics.ll +++ b/llvm/test/Analysis/BranchProbabilityInfo/integer_heuristics.ll @@ -101,3 +101,54 @@ for.inc: exit: ret void } + +declare void @foo() + +; CHECK-LABEL: foo1 +define i32 @foo1(i32 %x, i32 %y, i8 signext %z, i8 signext %w) { +entry: + %c = icmp eq i32 %x, %y + br i1 %c, label %then, label %else +; CHECK: edge entry -> then probability is 0x30000000 / 0x80000000 = 37.50% +; CHECK: edge entry -> else probability is 0x50000000 / 0x80000000 = 62.50% +then: + tail call void @foo() + br label %else +; CHECK: edge then -> else probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +else: + %v = phi i8 [ %z, %then ], [ %w, %entry ] + %r = sext i8 %v to i32 + ret i32 %r +} + +; CHECK-LABEL: foo2 +define i32 @foo2(i32 %x, i32 %y, i8 signext %z, i8 signext %w) { +entry: + %c = icmp ne i32 %x, %y + br i1 %c, label %then, label %else +; CHECK: edge entry -> then probability is 0x50000000 / 0x80000000 = 62.50% +; CHECK: edge entry -> else probability is 0x30000000 / 0x80000000 = 37.50% +then: + br label %else +; CHECK: edge then -> else probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +else: + %v = phi i8 [ %z, %then ], [ %w, %entry ] + %r = sext i8 %v to i32 + ret i32 %r +} + +; CHECK-LABEL: foo3 +define i32 @foo3(i32 %x, i32 %y, i8 signext %z, i8 signext %w) { +entry: + %c = icmp ult i32 %x, %y + br i1 %c, label %then, label %else +; CHECK: edge entry -> then probability is 0x50000000 / 0x80000000 = 62.50% +; CHECK: edge entry -> else probability is 0x30000000 / 0x80000000 = 37.50% +then: + br label %else +; CHECK: edge then -> else probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +else: + %v = phi i8 [ %z, %then ], [ %w, %entry ] + %r = sext i8 %v to i32 + ret i32 %r +} diff --git a/llvm/test/Analysis/BranchProbabilityInfo/loop.ll b/llvm/test/Analysis/BranchProbabilityInfo/loop.ll index 63377e3ba955d2..fde631a273c57d 100644 --- a/llvm/test/Analysis/BranchProbabilityInfo/loop.ll +++ b/llvm/test/Analysis/BranchProbabilityInfo/loop.ll @@ -263,8 +263,8 @@ for.body: %0 = load i32, i32* %c, align 4 %cmp1 = icmp eq i32 %0, %i.011 br i1 %cmp1, label %for.inc5, label %if.end -; CHECK: edge for.body -> for.inc5 probability is 0x40000000 / 0x80000000 = 50.00% -; CHECK: edge for.body -> if.end probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge for.body -> for.inc5 probability is 0x30000000 / 0x80000000 = 37.50% +; CHECK: edge for.body -> if.end probability is 0x50000000 / 0x80000000 = 62.50% if.end: call void @g1() @@ -324,22 +324,22 @@ for.body3: %0 = load i32, i32* %c, align 4 %cmp4 = icmp eq i32 %0, %j.017 br i1 %cmp4, label %for.inc, label %if.end -; CHECK: edge for.body3 -> for.inc probability is 0x40000000 / 0x80000000 = 50.00% -; CHECK: edge for.body3 -> if.end probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge for.body3 -> for.inc probability is 0x30000000 / 0x80000000 = 37.50% +; CHECK: edge for.body3 -> if.end probability is 0x50000000 / 0x80000000 = 62.50% if.end: %1 = load i32, i32* %arrayidx5, align 4 %cmp6 = icmp eq i32 %1, %j.017 br i1 %cmp6, label %for.inc, label %if.end8 -; CHECK: edge if.end -> for.inc probability is 0x40000000 / 0x80000000 = 50.00% -; CHECK: edge if.end -> if.end8 probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge if.end -> for.inc probability is 0x30000000 / 0x80000000 = 37.50% +; CHECK: edge if.end -> if.end8 probability is 0x50000000 / 0x80000000 = 62.50% if.end8: %2 = load i32, i32* %arrayidx9, align 4 %cmp10 = icmp eq i32 %2, %j.017 br i1 %cmp10, label %for.inc, label %if.end12 -; CHECK: edge if.end8 -> for.inc probability is 0x40000000 / 0x80000000 = 50.00% -; CHECK: edge if.end8 -> if.end12 probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge if.end8 -> for.inc probability is 0x30000000 / 0x80000000 = 37.50% +; CHECK: edge if.end8 -> if.end12 probability is 0x50000000 / 0x80000000 = 62.50% if.end12: call void @g2() diff --git a/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll b/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll index df9534ffde0973..b74873c61748b5 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll @@ -88,7 +88,6 @@ exit: ; CHECK-LABEL: test_GEP_across_BB: ; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #528] ; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #532] -; CHECK-NOT: add ; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, #532] ; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, #528] diff --git a/llvm/test/CodeGen/AArch64/branch-relax-alignment.ll b/llvm/test/CodeGen/AArch64/branch-relax-alignment.ll index 7135dff7f5732b..308917be00152d 100644 --- a/llvm/test/CodeGen/AArch64/branch-relax-alignment.ll +++ b/llvm/test/CodeGen/AArch64/branch-relax-alignment.ll @@ -1,19 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-apple-darwin -aarch64-bcc-offset-bits=4 -align-all-nofallthru-blocks=4 < %s | FileCheck %s ; Long branch is assumed because the block has a higher alignment ; requirement than the function. -; CHECK-LABEL: invert_bcc_block_align_higher_func: -; CHECK: b.eq [[JUMP_BB1:LBB[0-9]+_[0-9]+]] -; CHECK-NEXT: b [[JUMP_BB2:LBB[0-9]+_[0-9]+]] - -; CHECK: [[JUMP_BB1]]: -; CHECK: ret -; CHECK: .p2align 4 - -; CHECK: [[JUMP_BB2]]: -; CHECK: ret define i32 @invert_bcc_block_align_higher_func(i32 %x, i32 %y) align 4 #0 { +; CHECK-LABEL: invert_bcc_block_align_higher_func: +; CHECK: ; %bb.0: +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: b.ne LBB0_1 +; CHECK-NEXT: b LBB0_2 +; CHECK-NEXT: LBB0_1: ; %bb2 +; CHECK-NEXT: mov w8, #9 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: str w8, [x8] +; CHECK-NEXT: ret +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: LBB0_2: ; %bb1 +; CHECK-NEXT: mov w8, #42 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: str w8, [x8] +; CHECK-NEXT: ret %1 = icmp eq i32 %x, %y br i1 %1, label %bb1, label %bb2 diff --git a/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll b/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll index e7c6e3b5ef7b16..f8aab08da1cdcc 100644 --- a/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll +++ b/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -debugify-and-strip-all-safe < %s -mtriple=aarch64-linux-gnu | FileCheck %s ; marked as external to prevent possible optimizations @@ -8,12 +9,34 @@ ; (a > 10 && b == c) || (a >= 10 && b == d) define i32 @combine_gt_ge_10() #0 { -; CHECK-LABEL: combine_gt_ge_10 -; CHECK: cmp -; CHECK: b.le -; CHECK: ret -; CHECK-NOT: cmp -; CHECK: b.lt +; CHECK-LABEL: combine_gt_ge_10: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, a +; CHECK-NEXT: ldr w8, [x8, :lo12:a] +; CHECK-NEXT: cmp w8, #10 // =10 +; CHECK-NEXT: adrp x8, b +; CHECK-NEXT: b.le .LBB0_2 +; CHECK-NEXT: // %bb.1: // %land.lhs.true +; CHECK-NEXT: adrp x10, c +; CHECK-NEXT: ldr w9, [x8, :lo12:b] +; CHECK-NEXT: ldr w10, [x10, :lo12:c] +; CHECK-NEXT: cmp w9, w10 +; CHECK-NEXT: b.ne .LBB0_3 +; CHECK-NEXT: b .LBB0_5 +; CHECK-NEXT: .LBB0_2: // %lor.lhs.false +; CHECK-NEXT: b.lt .LBB0_4 +; CHECK-NEXT: .LBB0_3: // %land.lhs.true3 +; CHECK-NEXT: adrp x9, d +; CHECK-NEXT: ldr w8, [x8, :lo12:b] +; CHECK-NEXT: ldr w9, [x9, :lo12:d] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.eq .LBB0_5 +; CHECK-NEXT: .LBB0_4: // %if.end +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_5: +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret entry: %0 = load i32, i32* @a, align 4 %cmp = icmp sgt i32 %0, 10 @@ -45,12 +68,35 @@ return: ; preds = %if.end, %land.lhs.t ; (a > 5 && b == c) || (a < 5 && b == d) define i32 @combine_gt_lt_5() #0 { -; CHECK-LABEL: combine_gt_lt_5 -; CHECK: cmp -; CHECK: b.le -; CHECK: ret -; CHECK-NOT: cmp -; CHECK: b.ge +; CHECK-LABEL: combine_gt_lt_5: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, a +; CHECK-NEXT: ldr w8, [x8, :lo12:a] +; CHECK-NEXT: cmp w8, #5 // =5 +; CHECK-NEXT: b.le .LBB1_2 +; CHECK-NEXT: // %bb.1: // %land.lhs.true +; CHECK-NEXT: adrp x8, b +; CHECK-NEXT: adrp x9, c +; CHECK-NEXT: ldr w8, [x8, :lo12:b] +; CHECK-NEXT: ldr w9, [x9, :lo12:c] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.ne .LBB1_4 +; CHECK-NEXT: b .LBB1_5 +; CHECK-NEXT: .LBB1_2: // %lor.lhs.false +; CHECK-NEXT: b.ge .LBB1_4 +; CHECK-NEXT: // %bb.3: // %land.lhs.true3 +; CHECK-NEXT: adrp x8, b +; CHECK-NEXT: adrp x9, d +; CHECK-NEXT: ldr w8, [x8, :lo12:b] +; CHECK-NEXT: ldr w9, [x9, :lo12:d] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.eq .LBB1_5 +; CHECK-NEXT: .LBB1_4: // %if.end +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_5: +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret entry: %0 = load i32, i32* @a, align 4 %cmp = icmp sgt i32 %0, 5 @@ -82,12 +128,34 @@ return: ; preds = %if.end, %land.lhs.t ; (a < 5 && b == c) || (a <= 5 && b == d) define i32 @combine_lt_ge_5() #0 { -; CHECK-LABEL: combine_lt_ge_5 -; CHECK: cmp -; CHECK: b.ge -; CHECK: ret -; CHECK-NOT: cmp -; CHECK: b.gt +; CHECK-LABEL: combine_lt_ge_5: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, a +; CHECK-NEXT: ldr w8, [x8, :lo12:a] +; CHECK-NEXT: cmp w8, #5 // =5 +; CHECK-NEXT: adrp x8, b +; CHECK-NEXT: b.ge .LBB2_2 +; CHECK-NEXT: // %bb.1: // %land.lhs.true +; CHECK-NEXT: adrp x10, c +; CHECK-NEXT: ldr w9, [x8, :lo12:b] +; CHECK-NEXT: ldr w10, [x10, :lo12:c] +; CHECK-NEXT: cmp w9, w10 +; CHECK-NEXT: b.ne .LBB2_3 +; CHECK-NEXT: b .LBB2_5 +; CHECK-NEXT: .LBB2_2: // %lor.lhs.false +; CHECK-NEXT: b.gt .LBB2_4 +; CHECK-NEXT: .LBB2_3: // %land.lhs.true3 +; CHECK-NEXT: adrp x9, d +; CHECK-NEXT: ldr w8, [x8, :lo12:b] +; CHECK-NEXT: ldr w9, [x9, :lo12:d] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.eq .LBB2_5 +; CHECK-NEXT: .LBB2_4: // %if.end +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB2_5: +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret entry: %0 = load i32, i32* @a, align 4 %cmp = icmp slt i32 %0, 5 @@ -119,12 +187,35 @@ return: ; preds = %if.end, %land.lhs.t ; (a < 5 && b == c) || (a > 5 && b == d) define i32 @combine_lt_gt_5() #0 { -; CHECK-LABEL: combine_lt_gt_5 -; CHECK: cmp -; CHECK: b.ge -; CHECK: ret -; CHECK-NOT: cmp -; CHECK: b.le +; CHECK-LABEL: combine_lt_gt_5: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, a +; CHECK-NEXT: ldr w8, [x8, :lo12:a] +; CHECK-NEXT: cmp w8, #5 // =5 +; CHECK-NEXT: b.ge .LBB3_2 +; CHECK-NEXT: // %bb.1: // %land.lhs.true +; CHECK-NEXT: adrp x8, b +; CHECK-NEXT: adrp x9, c +; CHECK-NEXT: ldr w8, [x8, :lo12:b] +; CHECK-NEXT: ldr w9, [x9, :lo12:c] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.ne .LBB3_4 +; CHECK-NEXT: b .LBB3_5 +; CHECK-NEXT: .LBB3_2: // %lor.lhs.false +; CHECK-NEXT: b.le .LBB3_4 +; CHECK-NEXT: // %bb.3: // %land.lhs.true3 +; CHECK-NEXT: adrp x8, b +; CHECK-NEXT: adrp x9, d +; CHECK-NEXT: ldr w8, [x8, :lo12:b] +; CHECK-NEXT: ldr w9, [x9, :lo12:d] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.eq .LBB3_5 +; CHECK-NEXT: .LBB3_4: // %if.end +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB3_5: +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret entry: %0 = load i32, i32* @a, align 4 %cmp = icmp slt i32 %0, 5 @@ -156,12 +247,35 @@ return: ; preds = %if.end, %land.lhs.t ; (a > -5 && b == c) || (a < -5 && b == d) define i32 @combine_gt_lt_n5() #0 { -; CHECK-LABEL: combine_gt_lt_n5 -; CHECK: cmn -; CHECK: b.le -; CHECK: ret -; CHECK-NOT: cmn -; CHECK: b.ge +; CHECK-LABEL: combine_gt_lt_n5: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, a +; CHECK-NEXT: ldr w8, [x8, :lo12:a] +; CHECK-NEXT: cmn w8, #5 // =5 +; CHECK-NEXT: b.le .LBB4_2 +; CHECK-NEXT: // %bb.1: // %land.lhs.true +; CHECK-NEXT: adrp x8, b +; CHECK-NEXT: adrp x9, c +; CHECK-NEXT: ldr w8, [x8, :lo12:b] +; CHECK-NEXT: ldr w9, [x9, :lo12:c] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.ne .LBB4_4 +; CHECK-NEXT: b .LBB4_5 +; CHECK-NEXT: .LBB4_2: // %lor.lhs.false +; CHECK-NEXT: b.ge .LBB4_4 +; CHECK-NEXT: // %bb.3: // %land.lhs.true3 +; CHECK-NEXT: adrp x8, b +; CHECK-NEXT: adrp x9, d +; CHECK-NEXT: ldr w8, [x8, :lo12:b] +; CHECK-NEXT: ldr w9, [x9, :lo12:d] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.eq .LBB4_5 +; CHECK-NEXT: .LBB4_4: // %if.end +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB4_5: +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret entry: %0 = load i32, i32* @a, align 4 %cmp = icmp sgt i32 %0, -5 @@ -193,12 +307,35 @@ return: ; preds = %if.end, %land.lhs.t ; (a < -5 && b == c) || (a > -5 && b == d) define i32 @combine_lt_gt_n5() #0 { -; CHECK-LABEL: combine_lt_gt_n5 -; CHECK: cmn -; CHECK: b.ge -; CHECK: ret -; CHECK-NOT: cmn -; CHECK: b.le +; CHECK-LABEL: combine_lt_gt_n5: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, a +; CHECK-NEXT: ldr w8, [x8, :lo12:a] +; CHECK-NEXT: cmn w8, #5 // =5 +; CHECK-NEXT: b.ge .LBB5_2 +; CHECK-NEXT: // %bb.1: // %land.lhs.true +; CHECK-NEXT: adrp x8, b +; CHECK-NEXT: adrp x9, c +; CHECK-NEXT: ldr w8, [x8, :lo12:b] +; CHECK-NEXT: ldr w9, [x9, :lo12:c] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.ne .LBB5_4 +; CHECK-NEXT: b .LBB5_5 +; CHECK-NEXT: .LBB5_2: // %lor.lhs.false +; CHECK-NEXT: b.le .LBB5_4 +; CHECK-NEXT: // %bb.3: // %land.lhs.true3 +; CHECK-NEXT: adrp x8, b +; CHECK-NEXT: adrp x9, d +; CHECK-NEXT: ldr w8, [x8, :lo12:b] +; CHECK-NEXT: ldr w9, [x9, :lo12:d] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.eq .LBB5_5 +; CHECK-NEXT: .LBB5_4: // %if.end +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB5_5: +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret entry: %0 = load i32, i32* @a, align 4 %cmp = icmp slt i32 %0, -5 @@ -236,6 +373,38 @@ declare %struct.Struct* @Update(%struct.Struct*) #1 ; no checks for this case, it just should be processed without errors define void @combine_non_adjacent_cmp_br(%struct.Struct* nocapture readonly %hdCall) #0 { +; CHECK-LABEL: combine_non_adjacent_cmp_br: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w21, -24 +; CHECK-NEXT: .cfi_offset w22, -32 +; CHECK-NEXT: .cfi_offset w30, -48 +; CHECK-NEXT: ldr x19, [x0] +; CHECK-NEXT: mov w20, #24 +; CHECK-NEXT: adrp x22, glob +; CHECK-NEXT: add x21, x19, #2 // =2 +; CHECK-NEXT: .LBB6_1: // %land.rhs +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldr x8, [x20] +; CHECK-NEXT: cmp x8, #1 // =1 +; CHECK-NEXT: b.lt .LBB6_3 +; CHECK-NEXT: // %bb.2: // %while.body +; CHECK-NEXT: // in Loop: Header=BB6_1 Depth=1 +; CHECK-NEXT: ldr x0, [x22, :lo12:glob] +; CHECK-NEXT: bl Update +; CHECK-NEXT: sub x21, x21, #2 // =2 +; CHECK-NEXT: cmp x19, x21 +; CHECK-NEXT: b.lt .LBB6_1 +; CHECK-NEXT: .LBB6_3: // %while.end +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload +; CHECK-NEXT: ret entry: %size = getelementptr inbounds %struct.Struct, %struct.Struct* %hdCall, i64 0, i32 0 %0 = load i64, i64* %size, align 8 @@ -262,11 +431,49 @@ while.end: declare void @do_something() #1 define i32 @do_nothing_if_resultant_opcodes_would_differ() #0 { -; CHECK-LABEL: do_nothing_if_resultant_opcodes_would_differ -; CHECK: cmn -; CHECK: b.gt -; CHECK: cmp -; CHECK: b.gt +; CHECK-LABEL: do_nothing_if_resultant_opcodes_would_differ: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: adrp x19, a +; CHECK-NEXT: ldr w8, [x19, :lo12:a] +; CHECK-NEXT: cmn w8, #2 // =2 +; CHECK-NEXT: b.le .LBB7_2 +; CHECK-NEXT: // %bb.1: // %while.end +; CHECK-NEXT: cmp w8, #1 // =1 +; CHECK-NEXT: b.le .LBB7_5 +; CHECK-NEXT: b .LBB7_6 +; CHECK-NEXT: .LBB7_2: // %while.body.preheader +; CHECK-NEXT: sub w20, w8, #1 // =1 +; CHECK-NEXT: .LBB7_3: // %while.body +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: bl do_something +; CHECK-NEXT: adds w20, w20, #1 // =1 +; CHECK-NEXT: b.mi .LBB7_3 +; CHECK-NEXT: // %bb.4: // %while.cond.while.end_crit_edge +; CHECK-NEXT: ldr w8, [x19, :lo12:a] +; CHECK-NEXT: cmp w8, #1 // =1 +; CHECK-NEXT: b.gt .LBB7_6 +; CHECK-NEXT: .LBB7_5: // %land.lhs.true +; CHECK-NEXT: adrp x8, b +; CHECK-NEXT: adrp x9, d +; CHECK-NEXT: ldr w8, [x8, :lo12:b] +; CHECK-NEXT: ldr w9, [x9, :lo12:d] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.eq .LBB7_7 +; CHECK-NEXT: .LBB7_6: // %if.end +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: b .LBB7_8 +; CHECK-NEXT: .LBB7_7: +; CHECK-NEXT: mov w0, #123 +; CHECK-NEXT: .LBB7_8: // %return +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret entry: %0 = load i32, i32* @a, align 4 %cmp4 = icmp slt i32 %0, -1 @@ -306,11 +513,43 @@ return: ; preds = %if.end, %land.lhs.t } define i32 @do_nothing_if_compares_can_not_be_adjusted_to_each_other() #0 { -; CHECK-LABEL: do_nothing_if_compares_can_not_be_adjusted_to_each_other -; CHECK: cmp -; CHECK: b.gt -; CHECK: cmn -; CHECK: b.lt +; CHECK-LABEL: do_nothing_if_compares_can_not_be_adjusted_to_each_other: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: adrp x8, a +; CHECK-NEXT: ldr w8, [x8, :lo12:a] +; CHECK-NEXT: cmp w8, #0 // =0 +; CHECK-NEXT: b.gt .LBB8_3 +; CHECK-NEXT: // %bb.1: // %while.body.preheader +; CHECK-NEXT: sub w19, w8, #1 // =1 +; CHECK-NEXT: .LBB8_2: // %while.body +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: bl do_something +; CHECK-NEXT: adds w19, w19, #1 // =1 +; CHECK-NEXT: b.mi .LBB8_2 +; CHECK-NEXT: .LBB8_3: // %while.end +; CHECK-NEXT: adrp x8, c +; CHECK-NEXT: ldr w8, [x8, :lo12:c] +; CHECK-NEXT: cmn w8, #2 // =2 +; CHECK-NEXT: b.lt .LBB8_5 +; CHECK-NEXT: // %bb.4: // %land.lhs.true +; CHECK-NEXT: adrp x8, b +; CHECK-NEXT: adrp x9, d +; CHECK-NEXT: ldr w8, [x8, :lo12:b] +; CHECK-NEXT: ldr w9, [x9, :lo12:d] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.eq .LBB8_6 +; CHECK-NEXT: .LBB8_5: // %if.end +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB8_6: +; CHECK-NEXT: mov w0, #123 +; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ret entry: %0 = load i32, i32* @a, align 4 %cmp4 = icmp slt i32 %0, 1 @@ -356,19 +595,44 @@ return: ; preds = %if.end, %land.lhs.t ; b.gt .LBB0_5 define i32 @fcmpri(i32 %argc, i8** nocapture readonly %argv) { - ; CHECK-LABEL: fcmpri: -; CHECK: cmp w0, #2 -; CHECK: b.lt .LBB9_3 -; CHECK-NOT: cmp w0, #1 -; CHECK-NOT: b.le .LBB9_3 - -; CHECK-LABEL-DAG: .LBB9_3 -; CHECK: cmp w19, #0 -; CHECK: fcmp d8, #0.0 -; CHECK-NOT: cmp w19, #1 -; CHECK-NOT: b.ge .LBB9_5 - +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: .cfi_offset b8, -32 +; CHECK-NEXT: cmp w0, #2 // =2 +; CHECK-NEXT: b.lt .LBB9_3 +; CHECK-NEXT: // %bb.1: // %land.lhs.true +; CHECK-NEXT: ldr x8, [x1, #8] +; CHECK-NEXT: cbz x8, .LBB9_3 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: mov w0, #3 +; CHECK-NEXT: b .LBB9_4 +; CHECK-NEXT: .LBB9_3: // %if.end +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: bl zoo +; CHECK-NEXT: mov w19, w0 +; CHECK-NEXT: mov w0, #-1 +; CHECK-NEXT: bl yoo +; CHECK-NEXT: cmp w19, #0 // =0 +; CHECK-NEXT: cinc w0, w19, gt +; CHECK-NEXT: mov w1, #2 +; CHECK-NEXT: mov v8.16b, v0.16b +; CHECK-NEXT: bl xoo +; CHECK-NEXT: fmov d0, #-1.00000000 +; CHECK-NEXT: fadd d0, d8, d0 +; CHECK-NEXT: fcmp d8, #0.0 +; CHECK-NEXT: fcsel d0, d8, d0, gt +; CHECK-NEXT: fmov d1, #-2.00000000 +; CHECK-NEXT: bl woo +; CHECK-NEXT: mov w0, #4 +; CHECK-NEXT: .LBB9_4: // %return +; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr d8, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret entry: %cmp = icmp sgt i32 %argc, 1 br i1 %cmp, label %land.lhs.true, label %if.end @@ -405,10 +669,27 @@ return: ; preds = %land.lhs.true, %con define void @cmp_shifted(i32 %in, i32 %lhs, i32 %rhs) { ; CHECK-LABEL: cmp_shifted: -; CHECK: cmp w0, #2, lsl #12 -; [...] -; CHECK: cmp w0, #1 - +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: cmp w0, #2, lsl #12 // =8192 +; CHECK-NEXT: b.lt .LBB10_2 +; CHECK-NEXT: // %bb.1: // %true +; CHECK-NEXT: mov w0, #128 +; CHECK-NEXT: b .LBB10_5 +; CHECK-NEXT: .LBB10_2: // %false +; CHECK-NEXT: cmp w0, #1 // =1 +; CHECK-NEXT: b.lt .LBB10_4 +; CHECK-NEXT: // %bb.3: // %truer +; CHECK-NEXT: mov w0, #42 +; CHECK-NEXT: b .LBB10_5 +; CHECK-NEXT: .LBB10_4: // %falser +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: .LBB10_5: // %true +; CHECK-NEXT: bl zoo +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %tst_low = icmp sgt i32 %in, 8191 br i1 %tst_low, label %true, label %false @@ -430,10 +711,38 @@ falser: } define i32 @combine_gt_ge_sel(i64 %v, i64* %p) #0 { -; CHECK-LABEL: combine_gt_ge_sel -; CHECK: ldr [[reg1:w[0-9]*]], -; CHECK: cmp [[reg1]], #0 -; CHECK: csel {{.*}}, gt +; CHECK-LABEL: combine_gt_ge_sel: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, a +; CHECK-NEXT: ldr w8, [x8, :lo12:a] +; CHECK-NEXT: cmp w8, #0 // =0 +; CHECK-NEXT: csel x9, x0, xzr, gt +; CHECK-NEXT: str x9, [x1] +; CHECK-NEXT: b.le .LBB11_2 +; CHECK-NEXT: // %bb.1: // %lor.lhs.false +; CHECK-NEXT: cmp w8, #2 // =2 +; CHECK-NEXT: b.ge .LBB11_3 +; CHECK-NEXT: b .LBB11_4 +; CHECK-NEXT: .LBB11_2: // %land.lhs.true +; CHECK-NEXT: adrp x8, b +; CHECK-NEXT: adrp x9, c +; CHECK-NEXT: ldr w8, [x8, :lo12:b] +; CHECK-NEXT: ldr w9, [x9, :lo12:c] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.eq .LBB11_5 +; CHECK-NEXT: .LBB11_3: // %land.lhs.true3 +; CHECK-NEXT: adrp x8, b +; CHECK-NEXT: adrp x9, d +; CHECK-NEXT: ldr w8, [x8, :lo12:b] +; CHECK-NEXT: ldr w9, [x9, :lo12:d] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.eq .LBB11_5 +; CHECK-NEXT: .LBB11_4: // %if.end +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB11_5: +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret entry: %0 = load i32, i32* @a, align 4 %cmp = icmp sgt i32 %0, 0 diff --git a/llvm/test/CodeGen/AArch64/cond-br-tuning.ll b/llvm/test/CodeGen/AArch64/cond-br-tuning.ll index e0b6a2f0503732..e98d4110fd2760 100644 --- a/llvm/test/CodeGen/AArch64/cond-br-tuning.ll +++ b/llvm/test/CodeGen/AArch64/cond-br-tuning.ll @@ -1,14 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -debugify-and-strip-all-safe < %s -O3 -mtriple=aarch64-eabi -verify-machineinstrs | FileCheck %s target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-linaro-linux-gnueabi" ; CMN is an alias of ADDS. -; CHECK-LABEL: test_add_cbz: -; CHECK: cmn w0, w1 -; CHECK: b.eq -; CHECK: ret define void @test_add_cbz(i32 %a, i32 %b, i32* %ptr) { +; CHECK-LABEL: test_add_cbz: +; CHECK: // %bb.0: +; CHECK-NEXT: cmn w0, w1 +; CHECK-NEXT: b.eq .LBB0_2 +; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str wzr, [x2] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: // %L2 +; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: str w8, [x2] +; CHECK-NEXT: ret %c = add nsw i32 %a, %b %d = icmp ne i32 %c, 0 br i1 %d, label %L1, label %L2 @@ -20,11 +28,17 @@ L2: ret void } -; CHECK-LABEL: test_add_cbz_multiple_use: -; CHECK: adds -; CHECK: b.eq -; CHECK: ret define void @test_add_cbz_multiple_use(i32 %a, i32 %b, i32* %ptr) { +; CHECK-LABEL: test_add_cbz_multiple_use: +; CHECK: // %bb.0: +; CHECK-NEXT: adds w8, w0, w1 +; CHECK-NEXT: b.eq .LBB1_2 +; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str wzr, [x2] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_2: // %L2 +; CHECK-NEXT: str w8, [x2] +; CHECK-NEXT: ret %c = add nsw i32 %a, %b %d = icmp ne i32 %c, 0 br i1 %d, label %L1, label %L2 @@ -36,10 +50,18 @@ L2: ret void } -; CHECK-LABEL: test_add_cbz_64: -; CHECK: cmn x0, x1 -; CHECK: b.eq define void @test_add_cbz_64(i64 %a, i64 %b, i64* %ptr) { +; CHECK-LABEL: test_add_cbz_64: +; CHECK: // %bb.0: +; CHECK-NEXT: cmn x0, x1 +; CHECK-NEXT: b.eq .LBB2_2 +; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str xzr, [x2] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB2_2: // %L2 +; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: str x8, [x2] +; CHECK-NEXT: ret %c = add nsw i64 %a, %b %d = icmp ne i64 %c, 0 br i1 %d, label %L1, label %L2 @@ -51,10 +73,18 @@ L2: ret void } -; CHECK-LABEL: test_and_cbz: -; CHECK: tst w0, #0x6 -; CHECK: b.eq define void @test_and_cbz(i32 %a, i32* %ptr) { +; CHECK-LABEL: test_and_cbz: +; CHECK: // %bb.0: +; CHECK-NEXT: tst w0, #0x6 +; CHECK-NEXT: b.eq .LBB3_2 +; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str wzr, [x1] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB3_2: // %L2 +; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: str w8, [x1] +; CHECK-NEXT: ret %c = and i32 %a, 6 %d = icmp ne i32 %c, 0 br i1 %d, label %L1, label %L2 @@ -66,10 +96,18 @@ L2: ret void } -; CHECK-LABEL: test_bic_cbnz: -; CHECK: bics wzr, w1, w0 -; CHECK: b.ne define void @test_bic_cbnz(i32 %a, i32 %b, i32* %ptr) { +; CHECK-LABEL: test_bic_cbnz: +; CHECK: // %bb.0: +; CHECK-NEXT: bics wzr, w1, w0 +; CHECK-NEXT: b.eq .LBB4_2 +; CHECK-NEXT: // %bb.1: // %L2 +; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: str w8, [x2] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB4_2: // %L1 +; CHECK-NEXT: str wzr, [x2] +; CHECK-NEXT: ret %c = and i32 %a, %b %d = icmp eq i32 %c, %b br i1 %d, label %L1, label %L2 @@ -81,11 +119,15 @@ L2: ret void } -; CHECK-LABEL: test_add_tbz: -; CHECK: adds -; CHECK: b.pl -; CHECK: ret define void @test_add_tbz(i32 %a, i32 %b, i32* %ptr) { +; CHECK-LABEL: test_add_tbz: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adds w8, w0, w1 +; CHECK-NEXT: b.pl .LBB5_2 +; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str w8, [x2] +; CHECK-NEXT: .LBB5_2: // %L2 +; CHECK-NEXT: ret entry: %add = add nsw i32 %a, %b %cmp36 = icmp sge i32 %add, 0 @@ -97,11 +139,15 @@ L2: ret void } -; CHECK-LABEL: test_subs_tbz: -; CHECK: subs -; CHECK: b.pl -; CHECK: ret define void @test_subs_tbz(i32 %a, i32 %b, i32* %ptr) { +; CHECK-LABEL: test_subs_tbz: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: subs w8, w0, w1 +; CHECK-NEXT: b.pl .LBB6_2 +; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str w8, [x2] +; CHECK-NEXT: .LBB6_2: // %L2 +; CHECK-NEXT: ret entry: %sub = sub nsw i32 %a, %b %cmp36 = icmp sge i32 %sub, 0 @@ -113,11 +159,15 @@ L2: ret void } -; CHECK-LABEL: test_add_tbnz -; CHECK: adds -; CHECK: b.mi -; CHECK: ret define void @test_add_tbnz(i32 %a, i32 %b, i32* %ptr) { +; CHECK-LABEL: test_add_tbnz: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adds w8, w0, w1 +; CHECK-NEXT: b.mi .LBB7_2 +; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str w8, [x2] +; CHECK-NEXT: .LBB7_2: // %L2 +; CHECK-NEXT: ret entry: %add = add nsw i32 %a, %b %cmp36 = icmp slt i32 %add, 0 @@ -129,11 +179,15 @@ L2: ret void } -; CHECK-LABEL: test_subs_tbnz -; CHECK: subs -; CHECK: b.mi -; CHECK: ret define void @test_subs_tbnz(i32 %a, i32 %b, i32* %ptr) { +; CHECK-LABEL: test_subs_tbnz: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: subs w8, w0, w1 +; CHECK-NEXT: b.mi .LBB8_2 +; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str w8, [x2] +; CHECK-NEXT: .LBB8_2: // %L2 +; CHECK-NEXT: ret entry: %sub = sub nsw i32 %a, %b %cmp36 = icmp slt i32 %sub, 0 @@ -149,11 +203,22 @@ declare void @foo() declare void @bar(i32) ; Don't transform since the call will clobber the NZCV bits. -; CHECK-LABEL: test_call_clobber: -; CHECK: and w[[DST:[0-9]+]], w1, #0x6 -; CHECK: bl bar -; CHECK: cbnz w[[DST]] define void @test_call_clobber(i32 %unused, i32 %a) { +; CHECK-LABEL: test_call_clobber: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: and w19, w1, #0x6 +; CHECK-NEXT: mov w0, w19 +; CHECK-NEXT: bl bar +; CHECK-NEXT: cbnz w19, .LBB9_2 +; CHECK-NEXT: // %bb.1: // %if.end +; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB9_2: // %if.then +; CHECK-NEXT: bl foo entry: %c = and i32 %a, 6 call void @bar(i32 %c) diff --git a/llvm/test/CodeGen/AArch64/fast-isel-cmp-branch.ll b/llvm/test/CodeGen/AArch64/fast-isel-cmp-branch.ll index ce47bc42453c8c..d1c762585a9e84 100644 --- a/llvm/test/CodeGen/AArch64/fast-isel-cmp-branch.ll +++ b/llvm/test/CodeGen/AArch64/fast-isel-cmp-branch.ll @@ -1,10 +1,18 @@ -; RUN: llc -aarch64-enable-atomic-cfg-tidy=0 -mtriple=aarch64-apple-darwin < %s | FileCheck %s -; RUN: llc -fast-isel -fast-isel-abort=1 -aarch64-enable-atomic-cfg-tidy=0 -mtriple=aarch64-apple-darwin < %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -aarch64-enable-atomic-cfg-tidy=0 -mtriple=aarch64-apple-darwin < %s | FileCheck %s --check-prefixes=CHECK,NOFASTISEL +; RUN: llc -fast-isel -fast-isel-abort=1 -aarch64-enable-atomic-cfg-tidy=0 -mtriple=aarch64-apple-darwin < %s | FileCheck %s --check-prefixes=CHECK,FASTISEL define i32 @fcmp_oeq(float %x, float %y) { -; CHECK-LABEL: fcmp_oeq -; CHECK: fcmp s0, s1 -; CHECK-NEXT: b.ne {{LBB.+_2}} +; CHECK-LABEL: fcmp_oeq: +; CHECK: ; %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: b.ne LBB0_2 +; CHECK-NEXT: ; %bb.1: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: LBB0_2: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret %1 = fcmp oeq float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -14,9 +22,16 @@ bb1: } define i32 @fcmp_ogt(float %x, float %y) { -; CHECK-LABEL: fcmp_ogt -; CHECK: fcmp s0, s1 -; CHECK-NEXT: b.le {{LBB.+_2}} +; CHECK-LABEL: fcmp_ogt: +; CHECK: ; %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: b.le LBB1_2 +; CHECK-NEXT: ; %bb.1: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: LBB1_2: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret %1 = fcmp ogt float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -26,9 +41,16 @@ bb1: } define i32 @fcmp_oge(float %x, float %y) { -; CHECK-LABEL: fcmp_oge -; CHECK: fcmp s0, s1 -; CHECK-NEXT: b.lt {{LBB.+_2}} +; CHECK-LABEL: fcmp_oge: +; CHECK: ; %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: b.lt LBB2_2 +; CHECK-NEXT: ; %bb.1: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: LBB2_2: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret %1 = fcmp oge float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -38,9 +60,16 @@ bb1: } define i32 @fcmp_olt(float %x, float %y) { -; CHECK-LABEL: fcmp_olt -; CHECK: fcmp s0, s1 -; CHECK-NEXT: b.pl {{LBB.+_2}} +; CHECK-LABEL: fcmp_olt: +; CHECK: ; %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: b.pl LBB3_2 +; CHECK-NEXT: ; %bb.1: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: LBB3_2: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret %1 = fcmp olt float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -50,9 +79,16 @@ bb1: } define i32 @fcmp_ole(float %x, float %y) { -; CHECK-LABEL: fcmp_ole -; CHECK: fcmp s0, s1 -; CHECK-NEXT: b.hi {{LBB.+_2}} +; CHECK-LABEL: fcmp_ole: +; CHECK: ; %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: b.hi LBB4_2 +; CHECK-NEXT: ; %bb.1: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: LBB4_2: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret %1 = fcmp ole float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -62,10 +98,30 @@ bb1: } define i32 @fcmp_one(float %x, float %y) { -; CHECK-LABEL: fcmp_one -; CHECK: fcmp s0, s1 -; CHECK-NEXT: b.mi -; CHECK-NEXT: b.gt +; NOFASTISEL-LABEL: fcmp_one: +; NOFASTISEL: ; %bb.0: +; NOFASTISEL-NEXT: fcmp s0, s1 +; NOFASTISEL-NEXT: b.mi LBB5_1 +; NOFASTISEL-NEXT: b.gt LBB5_1 +; NOFASTISEL-NEXT: b LBB5_2 +; NOFASTISEL-NEXT: LBB5_1: ; %bb1 +; NOFASTISEL-NEXT: mov w0, wzr +; NOFASTISEL-NEXT: ret +; NOFASTISEL-NEXT: LBB5_2: ; %bb2 +; NOFASTISEL-NEXT: mov w0, #1 +; NOFASTISEL-NEXT: ret +; +; FASTISEL-LABEL: fcmp_one: +; FASTISEL: ; %bb.0: +; FASTISEL-NEXT: fcmp s0, s1 +; FASTISEL-NEXT: b.mi LBB5_2 +; FASTISEL-NEXT: b.gt LBB5_2 +; FASTISEL-NEXT: ; %bb.1: ; %bb2 +; FASTISEL-NEXT: mov w0, #1 +; FASTISEL-NEXT: ret +; FASTISEL-NEXT: LBB5_2: ; %bb1 +; FASTISEL-NEXT: mov w0, wzr +; FASTISEL-NEXT: ret %1 = fcmp one float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -75,9 +131,16 @@ bb1: } define i32 @fcmp_ord(float %x, float %y) { -; CHECK-LABEL: fcmp_ord -; CHECK: fcmp s0, s1 -; CHECK-NEXT: b.vs {{LBB.+_2}} +; CHECK-LABEL: fcmp_ord: +; CHECK: ; %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: b.vs LBB6_2 +; CHECK-NEXT: ; %bb.1: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: LBB6_2: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret %1 = fcmp ord float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -87,9 +150,16 @@ bb1: } define i32 @fcmp_uno(float %x, float %y) { -; CHECK-LABEL: fcmp_uno -; CHECK: fcmp s0, s1 -; CHECK-NEXT: b.vs {{LBB.+_2}} +; CHECK-LABEL: fcmp_uno: +; CHECK: ; %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: b.vs LBB7_2 +; CHECK-NEXT: ; %bb.1: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret +; CHECK-NEXT: LBB7_2: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret %1 = fcmp uno float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -99,10 +169,30 @@ bb1: } define i32 @fcmp_ueq(float %x, float %y) { -; CHECK-LABEL: fcmp_ueq -; CHECK: fcmp s0, s1 -; CHECK-NEXT: b.eq {{LBB.+_2}} -; CHECK-NEXT: b.vs {{LBB.+_2}} +; NOFASTISEL-LABEL: fcmp_ueq: +; NOFASTISEL: ; %bb.0: +; NOFASTISEL-NEXT: fcmp s0, s1 +; NOFASTISEL-NEXT: b.eq LBB8_2 +; NOFASTISEL-NEXT: b.vs LBB8_2 +; NOFASTISEL-NEXT: b LBB8_1 +; NOFASTISEL-NEXT: LBB8_1: ; %bb2 +; NOFASTISEL-NEXT: mov w0, #1 +; NOFASTISEL-NEXT: ret +; NOFASTISEL-NEXT: LBB8_2: ; %bb1 +; NOFASTISEL-NEXT: mov w0, wzr +; NOFASTISEL-NEXT: ret +; +; FASTISEL-LABEL: fcmp_ueq: +; FASTISEL: ; %bb.0: +; FASTISEL-NEXT: fcmp s0, s1 +; FASTISEL-NEXT: b.eq LBB8_2 +; FASTISEL-NEXT: b.vs LBB8_2 +; FASTISEL-NEXT: ; %bb.1: ; %bb2 +; FASTISEL-NEXT: mov w0, #1 +; FASTISEL-NEXT: ret +; FASTISEL-NEXT: LBB8_2: ; %bb1 +; FASTISEL-NEXT: mov w0, wzr +; FASTISEL-NEXT: ret %1 = fcmp ueq float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -112,9 +202,16 @@ bb1: } define i32 @fcmp_ugt(float %x, float %y) { -; CHECK-LABEL: fcmp_ugt -; CHECK: fcmp s0, s1 -; CHECK-NEXT: b.ls {{LBB.+_2}} +; CHECK-LABEL: fcmp_ugt: +; CHECK: ; %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: b.ls LBB9_2 +; CHECK-NEXT: ; %bb.1: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: LBB9_2: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret %1 = fcmp ugt float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -124,9 +221,16 @@ bb1: } define i32 @fcmp_uge(float %x, float %y) { -; CHECK-LABEL: fcmp_uge -; CHECK: fcmp s0, s1 -; CHECK-NEXT: b.mi {{LBB.+_2}} +; CHECK-LABEL: fcmp_uge: +; CHECK: ; %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: b.mi LBB10_2 +; CHECK-NEXT: ; %bb.1: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: LBB10_2: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret %1 = fcmp uge float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -136,9 +240,16 @@ bb1: } define i32 @fcmp_ult(float %x, float %y) { -; CHECK-LABEL: fcmp_ult -; CHECK: fcmp s0, s1 -; CHECK-NEXT: b.ge {{LBB.+_2}} +; CHECK-LABEL: fcmp_ult: +; CHECK: ; %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: b.ge LBB11_2 +; CHECK-NEXT: ; %bb.1: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: LBB11_2: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret %1 = fcmp ult float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -148,9 +259,16 @@ bb1: } define i32 @fcmp_ule(float %x, float %y) { -; CHECK-LABEL: fcmp_ule -; CHECK: fcmp s0, s1 -; CHECK-NEXT: b.gt {{LBB.+_2}} +; CHECK-LABEL: fcmp_ule: +; CHECK: ; %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: b.gt LBB12_2 +; CHECK-NEXT: ; %bb.1: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: LBB12_2: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret %1 = fcmp ule float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -160,9 +278,16 @@ bb1: } define i32 @fcmp_une(float %x, float %y) { -; CHECK-LABEL: fcmp_une -; CHECK: fcmp s0, s1 -; CHECK-NEXT: b.eq {{LBB.+_2}} +; CHECK-LABEL: fcmp_une: +; CHECK: ; %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: b.eq LBB13_2 +; CHECK-NEXT: ; %bb.1: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: LBB13_2: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret %1 = fcmp une float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -172,9 +297,16 @@ bb1: } define i32 @icmp_eq(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_eq -; CHECK: cmp w0, w1 -; CHECK-NEXT: b.ne {{LBB.+_2}} +; CHECK-LABEL: icmp_eq: +; CHECK: ; %bb.0: +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: b.eq LBB14_2 +; CHECK-NEXT: ; %bb.1: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret +; CHECK-NEXT: LBB14_2: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret %1 = icmp eq i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -184,9 +316,16 @@ bb1: } define i32 @icmp_ne(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_ne -; CHECK: cmp w0, w1 -; CHECK-NEXT: b.eq {{LBB.+_2}} +; CHECK-LABEL: icmp_ne: +; CHECK: ; %bb.0: +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: b.eq LBB15_2 +; CHECK-NEXT: ; %bb.1: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: LBB15_2: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret %1 = icmp ne i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -196,9 +335,16 @@ bb1: } define i32 @icmp_ugt(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_ugt -; CHECK: cmp w0, w1 -; CHECK-NEXT: b.ls {{LBB.+_2}} +; CHECK-LABEL: icmp_ugt: +; CHECK: ; %bb.0: +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: b.ls LBB16_2 +; CHECK-NEXT: ; %bb.1: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: LBB16_2: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret %1 = icmp ugt i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -208,9 +354,16 @@ bb1: } define i32 @icmp_uge(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_uge -; CHECK: cmp w0, w1 -; CHECK-NEXT: b.lo {{LBB.+_2}} +; CHECK-LABEL: icmp_uge: +; CHECK: ; %bb.0: +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: b.hs LBB17_2 +; CHECK-NEXT: ; %bb.1: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret +; CHECK-NEXT: LBB17_2: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret %1 = icmp uge i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -220,9 +373,16 @@ bb1: } define i32 @icmp_ult(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_ult -; CHECK: cmp w0, w1 -; CHECK-NEXT: b.hs {{LBB.+_2}} +; CHECK-LABEL: icmp_ult: +; CHECK: ; %bb.0: +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: b.hs LBB18_2 +; CHECK-NEXT: ; %bb.1: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: LBB18_2: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret %1 = icmp ult i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -232,9 +392,16 @@ bb1: } define i32 @icmp_ule(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_ule -; CHECK: cmp w0, w1 -; CHECK-NEXT: b.hi {{LBB.+_2}} +; CHECK-LABEL: icmp_ule: +; CHECK: ; %bb.0: +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: b.ls LBB19_2 +; CHECK-NEXT: ; %bb.1: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret +; CHECK-NEXT: LBB19_2: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret %1 = icmp ule i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -244,9 +411,16 @@ bb1: } define i32 @icmp_sgt(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_sgt -; CHECK: cmp w0, w1 -; CHECK-NEXT: b.le {{LBB.+_2}} +; CHECK-LABEL: icmp_sgt: +; CHECK: ; %bb.0: +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: b.le LBB20_2 +; CHECK-NEXT: ; %bb.1: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: LBB20_2: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret %1 = icmp sgt i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -256,9 +430,16 @@ bb1: } define i32 @icmp_sge(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_sge -; CHECK: cmp w0, w1 -; CHECK-NEXT: b.lt {{LBB.+_2}} +; CHECK-LABEL: icmp_sge: +; CHECK: ; %bb.0: +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: b.ge LBB21_2 +; CHECK-NEXT: ; %bb.1: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret +; CHECK-NEXT: LBB21_2: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret %1 = icmp sge i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -268,9 +449,16 @@ bb1: } define i32 @icmp_slt(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_slt -; CHECK: cmp w0, w1 -; CHECK-NEXT: b.ge {{LBB.+_2}} +; CHECK-LABEL: icmp_slt: +; CHECK: ; %bb.0: +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: b.ge LBB22_2 +; CHECK-NEXT: ; %bb.1: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: LBB22_2: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret %1 = icmp slt i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -280,9 +468,16 @@ bb1: } define i32 @icmp_sle(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_sle -; CHECK: cmp w0, w1 -; CHECK-NEXT: b.gt {{LBB.+_2}} +; CHECK-LABEL: icmp_sle: +; CHECK: ; %bb.0: +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: b.le LBB23_2 +; CHECK-NEXT: ; %bb.1: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret +; CHECK-NEXT: LBB23_2: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret %1 = icmp sle i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: diff --git a/llvm/test/CodeGen/ARM/2011-12-14-machine-sink.ll b/llvm/test/CodeGen/ARM/2011-12-14-machine-sink.ll index 88019f450e3663..5716d7dbc40574 100644 --- a/llvm/test/CodeGen/ARM/2011-12-14-machine-sink.ll +++ b/llvm/test/CodeGen/ARM/2011-12-14-machine-sink.ll @@ -1,9 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; REQUIRES: asserts ; RUN: llc < %s -o /dev/null -stats 2>&1 | FileCheck %s -check-prefix=STATS ; Radar 10266272 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" target triple = "thumbv7-apple-ios4.0.0" -; STATS-NOT: machine-sink + +; STATS: 1 machine-cse - Number of common subexpression eliminated +; STATS: 1 machine-sink - Number of critical edges split +; STATS: 1 machine-sink - Number of machine instructions sunk + define i32 @foo(i32 %h, i32 %arg1) nounwind readonly ssp { entry: diff --git a/llvm/test/CodeGen/ARM/cmpxchg-weak.ll b/llvm/test/CodeGen/ARM/cmpxchg-weak.ll index 5ee07828526c56..17f2ed74ecc05b 100644 --- a/llvm/test/CodeGen/ARM/cmpxchg-weak.ll +++ b/llvm/test/CodeGen/ARM/cmpxchg-weak.ll @@ -5,27 +5,27 @@ define void @test_cmpxchg_weak(i32 *%addr, i32 %desired, i32 %new) { %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst monotonic %oldval = extractvalue { i32, i1 } %pair, 0 -; CHECK-NEXT: %bb.0: -; CHECK-NEXT: ldrex [[LOADED:r[0-9]+]], [r0] -; CHECK-NEXT: cmp [[LOADED]], r1 -; CHECK-NEXT: bne [[LDFAILBB:LBB[0-9]+_[0-9]+]] -; CHECK-NEXT: %bb.1: -; CHECK-NEXT: dmb ish -; CHECK-NEXT: strex [[SUCCESS:r[0-9]+]], r2, [r0] -; CHECK-NEXT: cmp [[SUCCESS]], #0 -; CHECK-NEXT: beq [[SUCCESSBB:LBB[0-9]+_[0-9]+]] -; CHECK-NEXT: %bb.2: -; CHECK-NEXT: str r3, [r0] -; CHECK-NEXT: bx lr -; CHECK-NEXT: [[LDFAILBB]]: +; CHECK-NEXT: @ %bb.0: @ %cmpxchg.start +; CHECK-NEXT: ldrex r3, [r0] +; CHECK-NEXT: cmp r3, r1 +; CHECK-NEXT: beq LBB0_2 +; CHECK-NEXT: @ %bb.1: @ %cmpxchg.nostore ; CHECK-NEXT: clrex +; CHECK-NEXT: b LBB0_3 +; CHECK-NEXT: LBB0_2: @ %cmpxchg.fencedstore +; CHECK-NEXT: dmb ish +; CHECK-NEXT: strex r1, r2, [r0] +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: beq LBB0_4 +; CHECK-NEXT: LBB0_3: @ %cmpxchg.end ; CHECK-NEXT: str r3, [r0] ; CHECK-NEXT: bx lr -; CHECK-NEXT: [[SUCCESSBB]]: +; CHECK-NEXT: LBB0_4: @ %cmpxchg.success ; CHECK-NEXT: dmb ish ; CHECK-NEXT: str r3, [r0] ; CHECK-NEXT: bx lr + store i32 %oldval, i32* %addr ret void } @@ -37,23 +37,24 @@ define i1 @test_cmpxchg_weak_to_bool(i32, i32 *%addr, i32 %desired, i32 %new) { %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst monotonic %success = extractvalue { i32, i1 } %pair, 1 -; CHECK-NEXT: %bb.0: -; CHECK-NEXT: ldrex [[LOADED:r[0-9]+]], [r1] -; CHECK-NEXT: cmp [[LOADED]], r2 -; CHECK-NEXT: bne [[LDFAILBB:LBB[0-9]+_[0-9]+]] -; CHECK-NEXT: %bb.1: -; CHECK-NEXT: dmb ish +; CHECK-NEXT: @ %bb.0: @ %cmpxchg.start +; CHECK-NEXT: ldrex r0, [r1] +; CHECK-NEXT: cmp r0, r2 +; CHECK-NEXT: beq LBB1_2 +; CHECK-NEXT: @ %bb.1: @ %cmpxchg.nostore ; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: strex [[SUCCESS:r[0-9]+]], r3, [r1] -; CHECK-NEXT: cmp [[SUCCESS]], #0 +; CHECK-NEXT: clrex +; CHECK-NEXT: bx lr +; CHECK-NEXT: LBB1_2: @ %cmpxchg.fencedstore +; CHECK-NEXT: dmb ish +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: strex r2, r3, [r1] +; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: bxne lr ; CHECK-NEXT: mov r0, #1 ; CHECK-NEXT: dmb ish ; CHECK-NEXT: bx lr -; CHECK-NEXT: [[LDFAILBB]]: -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: clrex -; CHECK-NEXT: bx lr + ret i1 %success } diff --git a/llvm/test/CodeGen/ARM/lsr-unfolded-offset.ll b/llvm/test/CodeGen/ARM/lsr-unfolded-offset.ll index a9a353cad57586..8656c858be5a08 100644 --- a/llvm/test/CodeGen/ARM/lsr-unfolded-offset.ll +++ b/llvm/test/CodeGen/ARM/lsr-unfolded-offset.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -regalloc=greedy -arm-atomic-cfg-tidy=0 < %s | FileCheck %s ; LSR shouldn't introduce more induction variables than needed, increasing @@ -7,8 +8,11 @@ ; CHECK: sub sp, #{{40|36|32|28|24}} ; CHECK: %for.inc -; CHECK-NOT: ldr -; CHECK: add +; CHECK: adds r6, #1 +; CHECK: adds r4, #24 +; CHECK: cmp r1, r6 +; CHECK: bne LBB0_3 + target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" target triple = "thumbv7-apple-ios" diff --git a/llvm/test/CodeGen/ARM/machine-cse-cmp.ll b/llvm/test/CodeGen/ARM/machine-cse-cmp.ll index 49dbb03135f5a4..e079f9fd965768 100644 --- a/llvm/test/CodeGen/ARM/machine-cse-cmp.ll +++ b/llvm/test/CodeGen/ARM/machine-cse-cmp.ll @@ -52,7 +52,7 @@ entry: ; CHECK-LABEL: f3: ; CHECK-NOT: sub ; CHECK: cmp -; CHECK: blt +; CHECK: bge %0 = load i32, i32* %offset, align 4 %cmp = icmp slt i32 %0, %size %s = sub nsw i32 %0, %size diff --git a/llvm/test/CodeGen/Hexagon/newvaluejump2.ll b/llvm/test/CodeGen/Hexagon/newvaluejump2.ll index 99c9d1a60af7c4..f03c7b40703988 100644 --- a/llvm/test/CodeGen/Hexagon/newvaluejump2.ll +++ b/llvm/test/CodeGen/Hexagon/newvaluejump2.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=hexagon -mcpu=hexagonv5 -disable-hexagon-misched < %s \ ; RUN: | FileCheck %s ; Check that we generate new value jump, both registers, with one @@ -5,8 +6,35 @@ @Reg = common global i32 0, align 4 define i32 @main() nounwind { +; CHECK-LABEL: main: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: { +; CHECK-NEXT: r1 = memw(gp+#Reg) +; CHECK-NEXT: allocframe(r29,#8):raw +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = memw(r29+#4) +; CHECK-NEXT: if (!cmp.gt(r0.new,r1)) jump:nt .LBB0_1 +; CHECK-NEXT: } +; CHECK-NEXT: // %bb.2: // %if.else +; CHECK-NEXT: { +; CHECK-NEXT: call baz +; CHECK-NEXT: r1:0 = combine(#20,#10) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = #0 +; CHECK-NEXT: dealloc_return +; CHECK-NEXT: } +; CHECK-NEXT: .LBB0_1: // %if.then +; CHECK-NEXT: { +; CHECK-NEXT: call bar +; CHECK-NEXT: r1:0 = combine(#2,#1) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = #0 +; CHECK-NEXT: dealloc_return +; CHECK-NEXT: } entry: -; CHECK: if (cmp.gt(r{{[0-9]+}}.new,r{{[0-9]+}})) jump:{{[t|nt]}} .LBB{{[0-9]+}}_{{[0-9]+}} %Reg2 = alloca i32, align 4 %0 = load i32, i32* %Reg2, align 4 %1 = load i32, i32* @Reg, align 4 diff --git a/llvm/test/CodeGen/Mips/brcongt.ll b/llvm/test/CodeGen/Mips/brcongt.ll index 7dffdb4112118b..223245bc622de0 100644 --- a/llvm/test/CodeGen/Mips/brcongt.ll +++ b/llvm/test/CodeGen/Mips/brcongt.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16 @i = global i32 5, align 4 @@ -6,14 +7,32 @@ @result = global i32 0, align 4 define void @test() nounwind { +; 16-LABEL: test: +; 16: # %bb.0: # %entry +; 16-NEXT: lui $2, %hi(_gp_disp) +; 16-NEXT: addiu $2, $2, %lo(_gp_disp) +; 16-NEXT: li $2, %hi(_gp_disp) +; 16-NEXT: addiu $3, $pc, %lo(_gp_disp) +; 16-NEXT: sll $2, $2, 16 +; 16-NEXT: addu $2, $3, $2 +; 16-NEXT: lw $3, %got(i)($2) +; 16-NEXT: lw $4, %got(j)($2) +; 16-NEXT: lw $3, 0($3) +; 16-NEXT: lw $4, 0($4) +; 16-NEXT: slt $4, $3 +; 16-NEXT: bteqz $BB0_2 # 16 bit inst +; 16-NEXT: # %bb.1: # %if.end +; 16-NEXT: jrc $ra +; 16-NEXT: $BB0_2: # %if.then +; 16-NEXT: lw $2, %got(result)($2) +; 16-NEXT: li $3, 1 +; 16-NEXT: sw $3, 0($2) +; 16-NEXT: jrc $ra entry: %0 = load i32, i32* @i, align 4 %1 = load i32, i32* @j, align 4 %cmp = icmp sgt i32 %0, %1 br i1 %cmp, label %if.end, label %if.then -; 16: slt ${{[0-9]+}}, ${{[0-9]+}} -; 16: btnez $[[LABEL:[0-9A-Ba-b_]+]] -; 16: $[[LABEL]]: if.then: ; preds = %entry store i32 1, i32* @result, align 4 br label %if.end diff --git a/llvm/test/CodeGen/Mips/brconlt.ll b/llvm/test/CodeGen/Mips/brconlt.ll index 65f6c347b6710d..3b4ea80d020558 100644 --- a/llvm/test/CodeGen/Mips/brconlt.ll +++ b/llvm/test/CodeGen/Mips/brconlt.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16 ; RUN: llc -march=mips -mattr=micromips -mcpu=mips32r6 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=MM32R6 @@ -7,16 +8,52 @@ @result = global i32 0, align 4 define void @test() nounwind { +; 16-LABEL: test: +; 16: # %bb.0: # %entry +; 16-NEXT: lui $2, %hi(_gp_disp) +; 16-NEXT: addiu $2, $2, %lo(_gp_disp) +; 16-NEXT: li $2, %hi(_gp_disp) +; 16-NEXT: addiu $3, $pc, %lo(_gp_disp) +; 16-NEXT: sll $2, $2, 16 +; 16-NEXT: addu $2, $3, $2 +; 16-NEXT: lw $3, %got(i)($2) +; 16-NEXT: lw $4, %got(j)($2) +; 16-NEXT: lw $3, 0($3) +; 16-NEXT: lw $4, 0($4) +; 16-NEXT: slt $4, $3 +; 16-NEXT: bteqz $BB0_2 # 16 bit inst +; 16-NEXT: # %bb.1: # %if.end +; 16-NEXT: jrc $ra +; 16-NEXT: $BB0_2: # %if.then +; 16-NEXT: lw $2, %got(result)($2) +; 16-NEXT: li $3, 1 +; 16-NEXT: sw $3, 0($2) +; 16-NEXT: jrc $ra +; +; MM32R6-LABEL: test: +; MM32R6: # %bb.0: # %entry +; MM32R6-NEXT: lui $2, %hi(_gp_disp) +; MM32R6-NEXT: addiu $2, $2, %lo(_gp_disp) +; MM32R6-NEXT: addu $2, $2, $25 +; MM32R6-NEXT: lw $3, %got(i)($2) +; MM32R6-NEXT: lw $4, %got(j)($2) +; MM32R6-NEXT: lw16 $3, 0($3) +; MM32R6-NEXT: lw16 $4, 0($4) +; MM32R6-NEXT: slt $1, $4, $3 +; MM32R6-NEXT: beqzc $1, $BB0_2 +; MM32R6-NEXT: # %bb.1: # %if.end +; MM32R6-NEXT: jrc $ra +; MM32R6-NEXT: $BB0_2: # %if.then +; MM32R6-NEXT: lw $2, %got(result)($2) +; MM32R6-NEXT: li16 $3, 1 +; MM32R6-NEXT: sw16 $3, 0($2) +; MM32R6-NEXT: jrc $ra entry: %0 = load i32, i32* @j, align 4 %1 = load i32, i32* @i, align 4 %cmp = icmp slt i32 %0, %1 br i1 %cmp, label %if.end, label %if.then -; 16: slt ${{[0-9]+}}, ${{[0-9]+}} -; MM32R6: slt ${{[0-9]+}}, ${{[0-9]+}} -; 16: btnez $[[LABEL:[0-9A-Ba-b_]+]] -; 16: $[[LABEL]]: if.then: ; preds = %entry store i32 1, i32* @result, align 4 diff --git a/llvm/test/CodeGen/Mips/brconne.ll b/llvm/test/CodeGen/Mips/brconne.ll index e0cbe378fe3c6a..da11e842ada77c 100644 --- a/llvm/test/CodeGen/Mips/brconne.ll +++ b/llvm/test/CodeGen/Mips/brconne.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16 @i = global i32 5, align 4 @@ -5,15 +6,32 @@ @result = global i32 0, align 4 define void @test() nounwind { +; 16-LABEL: test: +; 16: # %bb.0: # %entry +; 16-NEXT: lui $2, %hi(_gp_disp) +; 16-NEXT: addiu $2, $2, %lo(_gp_disp) +; 16-NEXT: li $2, %hi(_gp_disp) +; 16-NEXT: addiu $3, $pc, %lo(_gp_disp) +; 16-NEXT: sll $2, $2, 16 +; 16-NEXT: addu $2, $3, $2 +; 16-NEXT: lw $3, %got(i)($2) +; 16-NEXT: lw $4, %got(j)($2) +; 16-NEXT: lw $3, 0($3) +; 16-NEXT: lw $4, 0($4) +; 16-NEXT: cmp $4, $3 +; 16-NEXT: bteqz $BB0_2 # 16 bit inst +; 16-NEXT: # %bb.1: # %if.end +; 16-NEXT: jrc $ra +; 16-NEXT: $BB0_2: # %if.then +; 16-NEXT: lw $2, %got(result)($2) +; 16-NEXT: li $3, 1 +; 16-NEXT: sw $3, 0($2) +; 16-NEXT: jrc $ra entry: %0 = load i32, i32* @j, align 4 %1 = load i32, i32* @i, align 4 %cmp = icmp eq i32 %0, %1 br i1 %cmp, label %if.then, label %if.end -; 16: cmp ${{[0-9]+}}, ${{[0-9]+}} -; 16: btnez $[[LABEL:[0-9A-Ba-b_]+]] -; 16: lw ${{[0-9]+}}, %got(result)(${{[0-9]+}}) -; 16: $[[LABEL]]: if.then: ; preds = %entry store i32 1, i32* @result, align 4 diff --git a/llvm/test/CodeGen/Mips/compactbranches/no-beqzc-bnezc.ll b/llvm/test/CodeGen/Mips/compactbranches/no-beqzc-bnezc.ll index 5e607c2639f3b4..27194ef77f7c24 100644 --- a/llvm/test/CodeGen/Mips/compactbranches/no-beqzc-bnezc.ll +++ b/llvm/test/CodeGen/Mips/compactbranches/no-beqzc-bnezc.ll @@ -1,16 +1,57 @@ -; RUN: llc -march=mipsel -mcpu=mips32r6 -disable-mips-delay-filler < %s | FileCheck %s -; RUN: llc -march=mips -mcpu=mips32r6 -disable-mips-delay-filler < %s -filetype=obj \ -; RUN: -o - | llvm-objdump -d - | FileCheck %s --check-prefix=ENCODING -; RUN: llc -march=mipsel -mcpu=mips64r6 -disable-mips-delay-filler -target-abi=n64 < %s | FileCheck %s -; RUN: llc -march=mips -mcpu=mips64r6 -disable-mips-delay-filler -target-abi=n64 < %s -filetype=obj \ -; RUN: -o - | llvm-objdump -d - | FileCheck %s --check-prefix=ENCODING +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=mipsel -mcpu=mips32r6 -disable-mips-delay-filler < %s | FileCheck %s --check-prefixes=ENCODING,MIPSEL32 +; RUN: llc -march=mips -mcpu=mips32r6 -disable-mips-delay-filler < %s | FileCheck %s --check-prefixes=ENCODING,MIPS32 +; RUN: llc -march=mipsel -mcpu=mips64r6 -disable-mips-delay-filler -target-abi=n64 < %s | FileCheck %s --check-prefixes=ENCODING,MIPSEL64 +; RUN: llc -march=mips -mcpu=mips64r6 -disable-mips-delay-filler -target-abi=n64 < %s | FileCheck %s --check-prefixes=ENCODING,MIPS64 ; bnezc and beqzc have restriction that $rt != 0 define i32 @f() { ; CHECK-LABEL: f: ; CHECK-NOT: bnezc $0 - +; MIPSEL32-LABEL: f: +; MIPSEL32: # %bb.0: +; MIPSEL32-NEXT: bnez $zero, $BB0_2 +; MIPSEL32-NEXT: nop +; MIPSEL32-NEXT: # %bb.1: # %if.then +; MIPSEL32-NEXT: addiu $2, $zero, 1 +; MIPSEL32-NEXT: jrc $ra +; MIPSEL32-NEXT: $BB0_2: # %if.end +; MIPSEL32-NEXT: addiu $2, $zero, 0 +; MIPSEL32-NEXT: jrc $ra +; +; MIPS32-LABEL: f: +; MIPS32: # %bb.0: +; MIPS32-NEXT: bnez $zero, $BB0_2 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.1: # %if.then +; MIPS32-NEXT: addiu $2, $zero, 1 +; MIPS32-NEXT: jrc $ra +; MIPS32-NEXT: $BB0_2: # %if.end +; MIPS32-NEXT: addiu $2, $zero, 0 +; MIPS32-NEXT: jrc $ra +; +; MIPSEL64-LABEL: f: +; MIPSEL64: # %bb.0: +; MIPSEL64-NEXT: bnez $zero, .LBB0_2 +; MIPSEL64-NEXT: nop +; MIPSEL64-NEXT: # %bb.1: # %if.then +; MIPSEL64-NEXT: addiu $2, $zero, 1 +; MIPSEL64-NEXT: jrc $ra +; MIPSEL64-NEXT: .LBB0_2: # %if.end +; MIPSEL64-NEXT: addiu $2, $zero, 0 +; MIPSEL64-NEXT: jrc $ra +; +; MIPS64-LABEL: f: +; MIPS64: # %bb.0: +; MIPS64-NEXT: bnez $zero, .LBB0_2 +; MIPS64-NEXT: nop +; MIPS64-NEXT: # %bb.1: # %if.then +; MIPS64-NEXT: addiu $2, $zero, 1 +; MIPS64-NEXT: jrc $ra +; MIPS64-NEXT: .LBB0_2: # %if.end +; MIPS64-NEXT: addiu $2, $zero, 0 +; MIPS64-NEXT: jrc $ra %cmp = icmp eq i32 1, 1 br i1 %cmp, label %if.then, label %if.end @@ -24,7 +65,49 @@ define i32 @f() { define i32 @f1() { ; CHECK-LABEL: f1: ; CHECK-NOT: beqzc $0 - +; MIPSEL32-LABEL: f1: +; MIPSEL32: # %bb.0: +; MIPSEL32-NEXT: b $BB1_2 +; MIPSEL32-NEXT: nop +; MIPSEL32-NEXT: # %bb.1: # %if.end +; MIPSEL32-NEXT: addiu $2, $zero, 0 +; MIPSEL32-NEXT: jrc $ra +; MIPSEL32-NEXT: $BB1_2: # %if.then +; MIPSEL32-NEXT: addiu $2, $zero, 1 +; MIPSEL32-NEXT: jrc $ra +; +; MIPS32-LABEL: f1: +; MIPS32: # %bb.0: +; MIPS32-NEXT: b $BB1_2 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.1: # %if.end +; MIPS32-NEXT: addiu $2, $zero, 0 +; MIPS32-NEXT: jrc $ra +; MIPS32-NEXT: $BB1_2: # %if.then +; MIPS32-NEXT: addiu $2, $zero, 1 +; MIPS32-NEXT: jrc $ra +; +; MIPSEL64-LABEL: f1: +; MIPSEL64: # %bb.0: +; MIPSEL64-NEXT: b .LBB1_2 +; MIPSEL64-NEXT: nop +; MIPSEL64-NEXT: # %bb.1: # %if.end +; MIPSEL64-NEXT: addiu $2, $zero, 0 +; MIPSEL64-NEXT: jrc $ra +; MIPSEL64-NEXT: .LBB1_2: # %if.then +; MIPSEL64-NEXT: addiu $2, $zero, 1 +; MIPSEL64-NEXT: jrc $ra +; +; MIPS64-LABEL: f1: +; MIPS64: # %bb.0: +; MIPS64-NEXT: b .LBB1_2 +; MIPS64-NEXT: nop +; MIPS64-NEXT: # %bb.1: # %if.end +; MIPS64-NEXT: addiu $2, $zero, 0 +; MIPS64-NEXT: jrc $ra +; MIPS64-NEXT: .LBB1_2: # %if.then +; MIPS64-NEXT: addiu $2, $zero, 1 +; MIPS64-NEXT: jrc $ra %cmp = icmp eq i32 0, 0 br i1 %cmp, label %if.then, label %if.end @@ -40,9 +123,49 @@ define i32 @f1() { ; beqc and bnec have the restriction that $rs < $rt. define i32 @f2(i32 %a, i32 %b) { -; ENCODING-LABEL: : -; ENCODING-NOT: beqc $5, $4 -; ENCODING-NOT: bnec $5, $4 +; MIPSEL32-LABEL: f2: +; MIPSEL32: # %bb.0: +; MIPSEL32-NEXT: beqc $5, $4, $BB2_2 +; MIPSEL32-NEXT: # %bb.1: # %if.end +; MIPSEL32-NEXT: addiu $2, $zero, 0 +; MIPSEL32-NEXT: jrc $ra +; MIPSEL32-NEXT: $BB2_2: # %if.then +; MIPSEL32-NEXT: addiu $2, $zero, 1 +; MIPSEL32-NEXT: jrc $ra +; +; MIPS32-LABEL: f2: +; MIPS32: # %bb.0: +; MIPS32-NEXT: beqc $5, $4, $BB2_2 +; MIPS32-NEXT: # %bb.1: # %if.end +; MIPS32-NEXT: addiu $2, $zero, 0 +; MIPS32-NEXT: jrc $ra +; MIPS32-NEXT: $BB2_2: # %if.then +; MIPS32-NEXT: addiu $2, $zero, 1 +; MIPS32-NEXT: jrc $ra +; +; MIPSEL64-LABEL: f2: +; MIPSEL64: # %bb.0: +; MIPSEL64-NEXT: sll $1, $4, 0 +; MIPSEL64-NEXT: sll $2, $5, 0 +; MIPSEL64-NEXT: beqc $2, $1, .LBB2_2 +; MIPSEL64-NEXT: # %bb.1: # %if.end +; MIPSEL64-NEXT: addiu $2, $zero, 0 +; MIPSEL64-NEXT: jrc $ra +; MIPSEL64-NEXT: .LBB2_2: # %if.then +; MIPSEL64-NEXT: addiu $2, $zero, 1 +; MIPSEL64-NEXT: jrc $ra +; +; MIPS64-LABEL: f2: +; MIPS64: # %bb.0: +; MIPS64-NEXT: sll $1, $4, 0 +; MIPS64-NEXT: sll $2, $5, 0 +; MIPS64-NEXT: beqc $2, $1, .LBB2_2 +; MIPS64-NEXT: # %bb.1: # %if.end +; MIPS64-NEXT: addiu $2, $zero, 0 +; MIPS64-NEXT: jrc $ra +; MIPS64-NEXT: .LBB2_2: # %if.then +; MIPS64-NEXT: addiu $2, $zero, 1 +; MIPS64-NEXT: jrc $ra %cmp = icmp eq i32 %b, %a br i1 %cmp, label %if.then, label %if.end @@ -57,7 +180,53 @@ define i32 @f2(i32 %a, i32 %b) { define i64 @f3() { ; CHECK-LABEL: f3: ; CHECK-NOT: bnezc $0 - +; MIPSEL32-LABEL: f3: +; MIPSEL32: # %bb.0: +; MIPSEL32-NEXT: bnez $zero, $BB3_2 +; MIPSEL32-NEXT: nop +; MIPSEL32-NEXT: # %bb.1: # %if.then +; MIPSEL32-NEXT: addiu $2, $zero, 1 +; MIPSEL32-NEXT: addiu $3, $zero, 0 +; MIPSEL32-NEXT: jrc $ra +; MIPSEL32-NEXT: $BB3_2: # %if.end +; MIPSEL32-NEXT: addiu $2, $zero, 0 +; MIPSEL32-NEXT: addiu $3, $zero, 0 +; MIPSEL32-NEXT: jrc $ra +; +; MIPS32-LABEL: f3: +; MIPS32: # %bb.0: +; MIPS32-NEXT: bnez $zero, $BB3_2 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.1: # %if.then +; MIPS32-NEXT: addiu $2, $zero, 0 +; MIPS32-NEXT: addiu $3, $zero, 1 +; MIPS32-NEXT: jrc $ra +; MIPS32-NEXT: $BB3_2: # %if.end +; MIPS32-NEXT: addiu $2, $zero, 0 +; MIPS32-NEXT: addiu $3, $zero, 0 +; MIPS32-NEXT: jrc $ra +; +; MIPSEL64-LABEL: f3: +; MIPSEL64: # %bb.0: +; MIPSEL64-NEXT: bnez $zero, .LBB3_2 +; MIPSEL64-NEXT: nop +; MIPSEL64-NEXT: # %bb.1: # %if.then +; MIPSEL64-NEXT: daddiu $2, $zero, 1 +; MIPSEL64-NEXT: jrc $ra +; MIPSEL64-NEXT: .LBB3_2: # %if.end +; MIPSEL64-NEXT: daddiu $2, $zero, 0 +; MIPSEL64-NEXT: jrc $ra +; +; MIPS64-LABEL: f3: +; MIPS64: # %bb.0: +; MIPS64-NEXT: bnez $zero, .LBB3_2 +; MIPS64-NEXT: nop +; MIPS64-NEXT: # %bb.1: # %if.then +; MIPS64-NEXT: daddiu $2, $zero, 1 +; MIPS64-NEXT: jrc $ra +; MIPS64-NEXT: .LBB3_2: # %if.end +; MIPS64-NEXT: daddiu $2, $zero, 0 +; MIPS64-NEXT: jrc $ra %cmp = icmp eq i64 1, 1 br i1 %cmp, label %if.then, label %if.end @@ -71,7 +240,53 @@ define i64 @f3() { define i64 @f4() { ; CHECK-LABEL: f4: ; CHECK-NOT: beqzc $0 - +; MIPSEL32-LABEL: f4: +; MIPSEL32: # %bb.0: +; MIPSEL32-NEXT: b $BB4_2 +; MIPSEL32-NEXT: nop +; MIPSEL32-NEXT: # %bb.1: # %if.end +; MIPSEL32-NEXT: addiu $2, $zero, 0 +; MIPSEL32-NEXT: addiu $3, $zero, 0 +; MIPSEL32-NEXT: jrc $ra +; MIPSEL32-NEXT: $BB4_2: # %if.then +; MIPSEL32-NEXT: addiu $2, $zero, 1 +; MIPSEL32-NEXT: addiu $3, $zero, 0 +; MIPSEL32-NEXT: jrc $ra +; +; MIPS32-LABEL: f4: +; MIPS32: # %bb.0: +; MIPS32-NEXT: b $BB4_2 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.1: # %if.end +; MIPS32-NEXT: addiu $2, $zero, 0 +; MIPS32-NEXT: addiu $3, $zero, 0 +; MIPS32-NEXT: jrc $ra +; MIPS32-NEXT: $BB4_2: # %if.then +; MIPS32-NEXT: addiu $2, $zero, 0 +; MIPS32-NEXT: addiu $3, $zero, 1 +; MIPS32-NEXT: jrc $ra +; +; MIPSEL64-LABEL: f4: +; MIPSEL64: # %bb.0: +; MIPSEL64-NEXT: b .LBB4_2 +; MIPSEL64-NEXT: nop +; MIPSEL64-NEXT: # %bb.1: # %if.end +; MIPSEL64-NEXT: daddiu $2, $zero, 0 +; MIPSEL64-NEXT: jrc $ra +; MIPSEL64-NEXT: .LBB4_2: # %if.then +; MIPSEL64-NEXT: daddiu $2, $zero, 1 +; MIPSEL64-NEXT: jrc $ra +; +; MIPS64-LABEL: f4: +; MIPS64: # %bb.0: +; MIPS64-NEXT: b .LBB4_2 +; MIPS64-NEXT: nop +; MIPS64-NEXT: # %bb.1: # %if.end +; MIPS64-NEXT: daddiu $2, $zero, 0 +; MIPS64-NEXT: jrc $ra +; MIPS64-NEXT: .LBB4_2: # %if.then +; MIPS64-NEXT: daddiu $2, $zero, 1 +; MIPS64-NEXT: jrc $ra %cmp = icmp eq i64 0, 0 br i1 %cmp, label %if.then, label %if.end @@ -87,9 +302,55 @@ define i64 @f4() { ; beqc and bnec have the restriction that $rs < $rt. define i64 @f5(i64 %a, i64 %b) { -; ENCODING-LABEL: : -; ENCODING-NOT: beqc $5, $4 -; ENCODING-NOT: bnec $5, $4 +; MIPSEL32-LABEL: f5: +; MIPSEL32: # %bb.0: +; MIPSEL32-NEXT: xor $1, $7, $5 +; MIPSEL32-NEXT: xor $2, $6, $4 +; MIPSEL32-NEXT: or $1, $2, $1 +; MIPSEL32-NEXT: beqzc $1, $BB5_2 +; MIPSEL32-NEXT: # %bb.1: # %if.end +; MIPSEL32-NEXT: addiu $2, $zero, 0 +; MIPSEL32-NEXT: addiu $3, $zero, 0 +; MIPSEL32-NEXT: jrc $ra +; MIPSEL32-NEXT: $BB5_2: # %if.then +; MIPSEL32-NEXT: addiu $2, $zero, 1 +; MIPSEL32-NEXT: addiu $3, $zero, 0 +; MIPSEL32-NEXT: jrc $ra +; +; MIPS32-LABEL: f5: +; MIPS32: # %bb.0: +; MIPS32-NEXT: xor $1, $6, $4 +; MIPS32-NEXT: xor $2, $7, $5 +; MIPS32-NEXT: or $1, $2, $1 +; MIPS32-NEXT: beqzc $1, $BB5_2 +; MIPS32-NEXT: # %bb.1: # %if.end +; MIPS32-NEXT: addiu $2, $zero, 0 +; MIPS32-NEXT: addiu $3, $zero, 0 +; MIPS32-NEXT: jrc $ra +; MIPS32-NEXT: $BB5_2: # %if.then +; MIPS32-NEXT: addiu $2, $zero, 0 +; MIPS32-NEXT: addiu $3, $zero, 1 +; MIPS32-NEXT: jrc $ra +; +; MIPSEL64-LABEL: f5: +; MIPSEL64: # %bb.0: +; MIPSEL64-NEXT: beqc $5, $4, .LBB5_2 +; MIPSEL64-NEXT: # %bb.1: # %if.end +; MIPSEL64-NEXT: daddiu $2, $zero, 0 +; MIPSEL64-NEXT: jrc $ra +; MIPSEL64-NEXT: .LBB5_2: # %if.then +; MIPSEL64-NEXT: daddiu $2, $zero, 1 +; MIPSEL64-NEXT: jrc $ra +; +; MIPS64-LABEL: f5: +; MIPS64: # %bb.0: +; MIPS64-NEXT: beqc $5, $4, .LBB5_2 +; MIPS64-NEXT: # %bb.1: # %if.end +; MIPS64-NEXT: daddiu $2, $zero, 0 +; MIPS64-NEXT: jrc $ra +; MIPS64-NEXT: .LBB5_2: # %if.then +; MIPS64-NEXT: daddiu $2, $zero, 1 +; MIPS64-NEXT: jrc $ra %cmp = icmp eq i64 %b, %a br i1 %cmp, label %if.then, label %if.end @@ -104,7 +365,47 @@ define i64 @f5(i64 %a, i64 %b) { define i32 @f6(i32 %a) { ; CHECK-LABEL: f6: ; CHECK: beqzc ${{[0-9]+}}, {{((\$)|(\.L))}}BB - +; MIPSEL32-LABEL: f6: +; MIPSEL32: # %bb.0: +; MIPSEL32-NEXT: beqzc $4, $BB6_2 +; MIPSEL32-NEXT: # %bb.1: # %if.end +; MIPSEL32-NEXT: addiu $2, $zero, 0 +; MIPSEL32-NEXT: jrc $ra +; MIPSEL32-NEXT: $BB6_2: # %if.then +; MIPSEL32-NEXT: addiu $2, $zero, 1 +; MIPSEL32-NEXT: jrc $ra +; +; MIPS32-LABEL: f6: +; MIPS32: # %bb.0: +; MIPS32-NEXT: beqzc $4, $BB6_2 +; MIPS32-NEXT: # %bb.1: # %if.end +; MIPS32-NEXT: addiu $2, $zero, 0 +; MIPS32-NEXT: jrc $ra +; MIPS32-NEXT: $BB6_2: # %if.then +; MIPS32-NEXT: addiu $2, $zero, 1 +; MIPS32-NEXT: jrc $ra +; +; MIPSEL64-LABEL: f6: +; MIPSEL64: # %bb.0: +; MIPSEL64-NEXT: sll $1, $4, 0 +; MIPSEL64-NEXT: beqzc $1, .LBB6_2 +; MIPSEL64-NEXT: # %bb.1: # %if.end +; MIPSEL64-NEXT: addiu $2, $zero, 0 +; MIPSEL64-NEXT: jrc $ra +; MIPSEL64-NEXT: .LBB6_2: # %if.then +; MIPSEL64-NEXT: addiu $2, $zero, 1 +; MIPSEL64-NEXT: jrc $ra +; +; MIPS64-LABEL: f6: +; MIPS64: # %bb.0: +; MIPS64-NEXT: sll $1, $4, 0 +; MIPS64-NEXT: beqzc $1, .LBB6_2 +; MIPS64-NEXT: # %bb.1: # %if.end +; MIPS64-NEXT: addiu $2, $zero, 0 +; MIPS64-NEXT: jrc $ra +; MIPS64-NEXT: .LBB6_2: # %if.then +; MIPS64-NEXT: addiu $2, $zero, 1 +; MIPS64-NEXT: jrc $ra %cmp = icmp eq i32 %a, 0 br i1 %cmp, label %if.then, label %if.end @@ -118,7 +419,47 @@ define i32 @f6(i32 %a) { define i32 @f7(i32 %a) { ; CHECK-LABEL: f7: ; CHECK: bnezc ${{[0-9]+}}, {{((\$)|(\.L))}}BB - +; MIPSEL32-LABEL: f7: +; MIPSEL32: # %bb.0: +; MIPSEL32-NEXT: beqzc $4, $BB7_2 +; MIPSEL32-NEXT: # %bb.1: # %if.end +; MIPSEL32-NEXT: addiu $2, $zero, 0 +; MIPSEL32-NEXT: jrc $ra +; MIPSEL32-NEXT: $BB7_2: # %if.then +; MIPSEL32-NEXT: addiu $2, $zero, 1 +; MIPSEL32-NEXT: jrc $ra +; +; MIPS32-LABEL: f7: +; MIPS32: # %bb.0: +; MIPS32-NEXT: beqzc $4, $BB7_2 +; MIPS32-NEXT: # %bb.1: # %if.end +; MIPS32-NEXT: addiu $2, $zero, 0 +; MIPS32-NEXT: jrc $ra +; MIPS32-NEXT: $BB7_2: # %if.then +; MIPS32-NEXT: addiu $2, $zero, 1 +; MIPS32-NEXT: jrc $ra +; +; MIPSEL64-LABEL: f7: +; MIPSEL64: # %bb.0: +; MIPSEL64-NEXT: sll $1, $4, 0 +; MIPSEL64-NEXT: beqzc $1, .LBB7_2 +; MIPSEL64-NEXT: # %bb.1: # %if.end +; MIPSEL64-NEXT: addiu $2, $zero, 0 +; MIPSEL64-NEXT: jrc $ra +; MIPSEL64-NEXT: .LBB7_2: # %if.then +; MIPSEL64-NEXT: addiu $2, $zero, 1 +; MIPSEL64-NEXT: jrc $ra +; +; MIPS64-LABEL: f7: +; MIPS64: # %bb.0: +; MIPS64-NEXT: sll $1, $4, 0 +; MIPS64-NEXT: beqzc $1, .LBB7_2 +; MIPS64-NEXT: # %bb.1: # %if.end +; MIPS64-NEXT: addiu $2, $zero, 0 +; MIPS64-NEXT: jrc $ra +; MIPS64-NEXT: .LBB7_2: # %if.then +; MIPS64-NEXT: addiu $2, $zero, 1 +; MIPS64-NEXT: jrc $ra %cmp = icmp eq i32 0, %a br i1 %cmp, label %if.then, label %if.end diff --git a/llvm/test/CodeGen/Mips/lcb2.ll b/llvm/test/CodeGen/Mips/lcb2.ll index 4987c606e3300d..6a0be713c47f87 100644 --- a/llvm/test/CodeGen/Mips/lcb2.ll +++ b/llvm/test/CodeGen/Mips/lcb2.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands=true < %s | FileCheck %s -check-prefix=lcb ; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands=true < %s | FileCheck %s -check-prefix=lcbn @@ -8,6 +9,51 @@ ; Function Attrs: nounwind optsize define i32 @bnez() #0 { +; lcb-LABEL: bnez: +; lcb: # %bb.0: # %entry +; lcb-NEXT: li $2, %hi(i) +; lcb-NEXT: sll $2, $2, 16 +; lcb-NEXT: lw $3, %lo(i)($2) +; lcb-NEXT: bnez $3, $BB0_2 +; lcb-NEXT: # %bb.1: # %if.then +; lcb-NEXT: li $3, 0 +; lcb-NEXT: #APP +; lcb-NEXT: .set push +; lcb-NEXT: .set at +; lcb-NEXT: .set macro +; lcb-NEXT: .set reorder +; lcb-EMPTY: +; lcb-NEXT: .space 10000 +; lcb-EMPTY: +; lcb-NEXT: .set pop +; lcb-NEXT: #NO_APP +; lcb-NEXT: sw $3, %lo(i)($2) +; lcb-NEXT: $BB0_2: # %if.end +; lcb-NEXT: li $2, 0 +; lcb-NEXT: jrc $ra +; +; lcbn-LABEL: bnez: +; lcbn: # %bb.0: # %entry +; lcbn-NEXT: li $2, %hi(i) +; lcbn-NEXT: sll $2, $2, 16 +; lcbn-NEXT: lw $3, %lo(i)($2) +; lcbn-NEXT: bnez $3, $BB0_2 +; lcbn-NEXT: # %bb.1: # %if.then +; lcbn-NEXT: li $3, 0 +; lcbn-NEXT: #APP +; lcbn-NEXT: .set push +; lcbn-NEXT: .set at +; lcbn-NEXT: .set macro +; lcbn-NEXT: .set reorder +; lcbn-EMPTY: +; lcbn-NEXT: .space 10000 +; lcbn-EMPTY: +; lcbn-NEXT: .set pop +; lcbn-NEXT: #NO_APP +; lcbn-NEXT: sw $3, %lo(i)($2) +; lcbn-NEXT: $BB0_2: # %if.end +; lcbn-NEXT: li $2, 0 +; lcbn-NEXT: jrc $ra entry: %0 = load i32, i32* @i, align 4, !tbaa !1 %cmp = icmp eq i32 %0, 0 @@ -21,15 +67,90 @@ if.then: ; preds = %entry if.end: ; preds = %if.then, %entry ret i32 0 } -; lcb: .ent bnez -; lcbn: .ent bnez -; lcb: bnez ${{[0-9]+}}, $BB{{[0-9]+}}_{{[0-9]+}} -; lcbn-NOT: bnez ${{[0-9]+}}, $BB{{[0-9]+}}_{{[0-9]+}} # 16 bit inst -; lcb: .end bnez -; lcbn: .end bnez ; Function Attrs: nounwind optsize define i32 @beqz() #0 { +; lcb-LABEL: beqz: +; lcb: # %bb.0: # %entry +; lcb-NEXT: li $2, %hi(i) +; lcb-NEXT: sll $2, $2, 16 +; lcb-NEXT: lw $2, %lo(i)($2) +; lcb-NEXT: beqz $2, $BB1_2 +; lcb-NEXT: # %bb.1: # %if.else +; lcb-NEXT: li $2, %hi(j) +; lcb-NEXT: sll $2, $2, 16 +; lcb-NEXT: li $3, 55 +; lcb-NEXT: sw $3, %lo(j)($2) +; lcb-NEXT: #APP +; lcb-NEXT: .set push +; lcb-NEXT: .set at +; lcb-NEXT: .set macro +; lcb-NEXT: .set reorder +; lcb-EMPTY: +; lcb-NEXT: .space 10000 +; lcb-EMPTY: +; lcb-NEXT: .set pop +; lcb-NEXT: #NO_APP +; lcb-NEXT: b $BB1_3 +; lcb-NEXT: $BB1_2: # %if.then +; lcb-NEXT: li $2, %hi(j) +; lcb-NEXT: sll $2, $2, 16 +; lcb-NEXT: li $3, 10 +; lcb-NEXT: sw $3, %lo(j)($2) +; lcb-NEXT: #APP +; lcb-NEXT: .set push +; lcb-NEXT: .set at +; lcb-NEXT: .set macro +; lcb-NEXT: .set reorder +; lcb-EMPTY: +; lcb-NEXT: .space 10000 +; lcb-EMPTY: +; lcb-NEXT: .set pop +; lcb-NEXT: #NO_APP +; lcb-NEXT: $BB1_3: # %if.end +; lcb-NEXT: li $2, 0 +; lcb-NEXT: jrc $ra +; +; lcbn-LABEL: beqz: +; lcbn: # %bb.0: # %entry +; lcbn-NEXT: li $2, %hi(i) +; lcbn-NEXT: sll $2, $2, 16 +; lcbn-NEXT: lw $2, %lo(i)($2) +; lcbn-NEXT: beqz $2, $BB1_2 +; lcbn-NEXT: # %bb.1: # %if.else +; lcbn-NEXT: li $2, %hi(j) +; lcbn-NEXT: sll $2, $2, 16 +; lcbn-NEXT: li $3, 55 +; lcbn-NEXT: sw $3, %lo(j)($2) +; lcbn-NEXT: #APP +; lcbn-NEXT: .set push +; lcbn-NEXT: .set at +; lcbn-NEXT: .set macro +; lcbn-NEXT: .set reorder +; lcbn-EMPTY: +; lcbn-NEXT: .space 10000 +; lcbn-EMPTY: +; lcbn-NEXT: .set pop +; lcbn-NEXT: #NO_APP +; lcbn-NEXT: b $BB1_3 +; lcbn-NEXT: $BB1_2: # %if.then +; lcbn-NEXT: li $2, %hi(j) +; lcbn-NEXT: sll $2, $2, 16 +; lcbn-NEXT: li $3, 10 +; lcbn-NEXT: sw $3, %lo(j)($2) +; lcbn-NEXT: #APP +; lcbn-NEXT: .set push +; lcbn-NEXT: .set at +; lcbn-NEXT: .set macro +; lcbn-NEXT: .set reorder +; lcbn-EMPTY: +; lcbn-NEXT: .space 10000 +; lcbn-EMPTY: +; lcbn-NEXT: .set pop +; lcbn-NEXT: #NO_APP +; lcbn-NEXT: $BB1_3: # %if.end +; lcbn-NEXT: li $2, 0 +; lcbn-NEXT: jrc $ra entry: %0 = load i32, i32* @i, align 4, !tbaa !1 %cmp = icmp eq i32 %0, 0 @@ -49,16 +170,93 @@ if.end: ; preds = %if.else, %if.then ret i32 0 } -; lcb: .ent beqz -; lcbn: .ent beqz -; lcb: beqz ${{[0-9]+}}, $BB{{[0-9]+}}_{{[0-9]+}} -; lcbn-NOT: beqz ${{[0-9]+}}, $BB{{[0-9]+}}_{{[0-9]+}} # 16 bit inst -; lcb: .end beqz -; lcbn: .end beqz - - ; Function Attrs: nounwind optsize define void @bteqz() #0 { +; lcb-LABEL: bteqz: +; lcb: # %bb.0: # %entry +; lcb-NEXT: li $2, %hi(j) +; lcb-NEXT: sll $2, $2, 16 +; lcb-NEXT: lw $2, %lo(j)($2) +; lcb-NEXT: li $3, %hi(i) +; lcb-NEXT: sll $3, $3, 16 +; lcb-NEXT: lw $3, %lo(i)($3) +; lcb-NEXT: cmp $3, $2 +; lcb-NEXT: bteqz $BB2_2 +; lcb-NEXT: # %bb.1: # %if.else +; lcb-NEXT: li $2, %hi(k) +; lcb-NEXT: sll $2, $2, 16 +; lcb-NEXT: #APP +; lcb-NEXT: .set push +; lcb-NEXT: .set at +; lcb-NEXT: .set macro +; lcb-NEXT: .set reorder +; lcb-EMPTY: +; lcb-NEXT: .space 1000 +; lcb-EMPTY: +; lcb-NEXT: .set pop +; lcb-NEXT: #NO_APP +; lcb-NEXT: li $3, 2 +; lcb-NEXT: sw $3, %lo(k)($2) +; lcb-NEXT: jrc $ra +; lcb-NEXT: $BB2_2: # %if.then +; lcb-NEXT: li $2, %hi(k) +; lcb-NEXT: sll $2, $2, 16 +; lcb-NEXT: li $3, 1 +; lcb-NEXT: sw $3, %lo(k)($2) +; lcb-NEXT: #APP +; lcb-NEXT: .set push +; lcb-NEXT: .set at +; lcb-NEXT: .set macro +; lcb-NEXT: .set reorder +; lcb-EMPTY: +; lcb-NEXT: .space 1000 +; lcb-EMPTY: +; lcb-NEXT: .set pop +; lcb-NEXT: #NO_APP +; lcb-NEXT: jrc $ra +; +; lcbn-LABEL: bteqz: +; lcbn: # %bb.0: # %entry +; lcbn-NEXT: li $2, %hi(j) +; lcbn-NEXT: sll $2, $2, 16 +; lcbn-NEXT: lw $2, %lo(j)($2) +; lcbn-NEXT: li $3, %hi(i) +; lcbn-NEXT: sll $3, $3, 16 +; lcbn-NEXT: lw $3, %lo(i)($3) +; lcbn-NEXT: cmp $3, $2 +; lcbn-NEXT: bteqz $BB2_2 +; lcbn-NEXT: # %bb.1: # %if.else +; lcbn-NEXT: li $2, %hi(k) +; lcbn-NEXT: sll $2, $2, 16 +; lcbn-NEXT: #APP +; lcbn-NEXT: .set push +; lcbn-NEXT: .set at +; lcbn-NEXT: .set macro +; lcbn-NEXT: .set reorder +; lcbn-EMPTY: +; lcbn-NEXT: .space 1000 +; lcbn-EMPTY: +; lcbn-NEXT: .set pop +; lcbn-NEXT: #NO_APP +; lcbn-NEXT: li $3, 2 +; lcbn-NEXT: sw $3, %lo(k)($2) +; lcbn-NEXT: jrc $ra +; lcbn-NEXT: $BB2_2: # %if.then +; lcbn-NEXT: li $2, %hi(k) +; lcbn-NEXT: sll $2, $2, 16 +; lcbn-NEXT: li $3, 1 +; lcbn-NEXT: sw $3, %lo(k)($2) +; lcbn-NEXT: #APP +; lcbn-NEXT: .set push +; lcbn-NEXT: .set at +; lcbn-NEXT: .set macro +; lcbn-NEXT: .set reorder +; lcbn-EMPTY: +; lcbn-NEXT: .space 1000 +; lcbn-EMPTY: +; lcbn-NEXT: .set pop +; lcbn-NEXT: #NO_APP +; lcbn-NEXT: jrc $ra entry: %0 = load i32, i32* @i, align 4, !tbaa !1 %1 = load i32, i32* @j, align 4, !tbaa !1 @@ -79,16 +277,65 @@ if.end: ; preds = %if.else, %if.then ret void } -; lcb: .ent bteqz -; lcbn: .ent bteqz -; lcb: btnez $BB{{[0-9]+}}_{{[0-9]+}} -; lcbn-NOT: btnez $BB{{[0-9]+}}_{{[0-9]+}} # 16 bit inst -; lcb: .end bteqz -; lcbn: .end bteqz - - ; Function Attrs: nounwind optsize define void @btz() #0 { +; lcb-LABEL: btz: +; lcb: # %bb.0: # %entry +; lcb-NEXT: li $2, %hi(i) +; lcb-NEXT: sll $2, $2, 16 +; lcb-NEXT: lw $4, %lo(i)($2) +; lcb-NEXT: li $3, %hi(j) +; lcb-NEXT: sll $3, $3, 16 +; lcb-NEXT: lw $5, %lo(j)($3) +; lcb-NEXT: slt $5, $4 +; lcb-NEXT: bteqz $BB3_2 +; lcb-NEXT: $BB3_1: # %if.then +; lcb-NEXT: # =>This Inner Loop Header: Depth=1 +; lcb-NEXT: #APP +; lcb-NEXT: .set push +; lcb-NEXT: .set at +; lcb-NEXT: .set macro +; lcb-NEXT: .set reorder +; lcb-EMPTY: +; lcb-NEXT: .space 60000 +; lcb-EMPTY: +; lcb-NEXT: .set pop +; lcb-NEXT: #NO_APP +; lcb-NEXT: lw $4, %lo(i)($2) +; lcb-NEXT: lw $5, %lo(j)($3) +; lcb-NEXT: slt $5, $4 +; lcb-NEXT: btnez $BB3_1 +; lcb-NEXT: $BB3_2: # %if.end +; lcb-NEXT: jrc $ra +; +; lcbn-LABEL: btz: +; lcbn: # %bb.0: # %entry +; lcbn-NEXT: li $2, %hi(i) +; lcbn-NEXT: sll $2, $2, 16 +; lcbn-NEXT: lw $4, %lo(i)($2) +; lcbn-NEXT: li $3, %hi(j) +; lcbn-NEXT: sll $3, $3, 16 +; lcbn-NEXT: lw $5, %lo(j)($3) +; lcbn-NEXT: slt $5, $4 +; lcbn-NEXT: bteqz $BB3_2 +; lcbn-NEXT: $BB3_1: # %if.then +; lcbn-NEXT: # =>This Inner Loop Header: Depth=1 +; lcbn-NEXT: #APP +; lcbn-NEXT: .set push +; lcbn-NEXT: .set at +; lcbn-NEXT: .set macro +; lcbn-NEXT: .set reorder +; lcbn-EMPTY: +; lcbn-NEXT: .space 60000 +; lcbn-EMPTY: +; lcbn-NEXT: .set pop +; lcbn-NEXT: #NO_APP +; lcbn-NEXT: lw $4, %lo(i)($2) +; lcbn-NEXT: lw $5, %lo(j)($3) +; lcbn-NEXT: slt $5, $4 +; lcbn-NEXT: btnez $BB3_1 +; lcbn-NEXT: $BB3_2: # %if.end +; lcbn-NEXT: jrc $ra entry: %0 = load i32, i32* @i, align 4, !tbaa !1 %1 = load i32, i32* @j, align 4, !tbaa !1 @@ -106,15 +353,6 @@ if.end: ; preds = %if.then, %entry ret void } -; lcb: .ent btz -; lcbn: .ent btz -; lcb: bteqz $BB{{[0-9]+}}_{{[0-9]+}} -; lcbn-NOT: bteqz $BB{{[0-9]+}}_{{[0-9]+}} # 16 bit inst -; lcb: btnez $BB{{[0-9]+}}_{{[0-9]+}} -; lcbn-NOT: btnez $BB{{[0-9]+}}_{{[0-9]+}} # 16 bit inst -; lcb: .end btz -; lcbn: .end btz - attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind } diff --git a/llvm/test/CodeGen/Mips/lcb5.ll b/llvm/test/CodeGen/Mips/lcb5.ll index 96e924a44f725c..6b8ebfd07e6ffc 100644 --- a/llvm/test/CodeGen/Mips/lcb5.ll +++ b/llvm/test/CodeGen/Mips/lcb5.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=ci @i = global i32 0, align 4 @@ -6,6 +7,41 @@ ; Function Attrs: nounwind optsize define i32 @x0() #0 { +; ci-LABEL: x0: +; ci: # %bb.0: # %entry +; ci-NEXT: li $2, %hi(i) +; ci-NEXT: sll $2, $2, 16 +; ci-NEXT: lw $3, %lo(i)($2) +; ci-NEXT: beqz $3, $BB0_2 +; ci-NEXT: # %bb.1: # %if.else +; ci-NEXT: li $3, 1 +; ci-NEXT: #APP +; ci-NEXT: .set push +; ci-NEXT: .set at +; ci-NEXT: .set macro +; ci-NEXT: .set reorder +; ci-EMPTY: +; ci-NEXT: .space 1004 +; ci-EMPTY: +; ci-NEXT: .set pop +; ci-NEXT: #NO_APP +; ci-NEXT: b $BB0_3 # 16 bit inst +; ci-NEXT: $BB0_2: # %if.then +; ci-NEXT: li $3, 0 +; ci-NEXT: #APP +; ci-NEXT: .set push +; ci-NEXT: .set at +; ci-NEXT: .set macro +; ci-NEXT: .set reorder +; ci-EMPTY: +; ci-NEXT: .space 1000 +; ci-EMPTY: +; ci-NEXT: .set pop +; ci-NEXT: #NO_APP +; ci-NEXT: $BB0_3: # %if.end +; ci-NEXT: sw $3, %lo(i)($2) +; ci-NEXT: li $2, 0 +; ci-NEXT: jrc $ra entry: %0 = load i32, i32* @i, align 4, !tbaa !1 %cmp = icmp eq i32 %0, 0 @@ -25,13 +61,48 @@ if.end: ; preds = %if.else, %if.then ret i32 0 } -; ci: .ent x0 -; ci: beqz $3, $BB0_2 -; ci: $BB0_2: -; ci: .end x0 - ; Function Attrs: nounwind optsize define i32 @x1() #0 { +; ci-LABEL: x1: +; ci: # %bb.0: # %entry +; ci-NEXT: li $2, %hi(i) +; ci-NEXT: sll $2, $2, 16 +; ci-NEXT: lw $3, %lo(i)($2) +; ci-NEXT: bnez $3, $BB1_1 # 16 bit inst +; ci-NEXT: jal $BB1_2 # branch +; ci-NEXT: nop +; ci-NEXT: $BB1_1: # %if.else +; ci-NEXT: li $3, 1 +; ci-NEXT: #APP +; ci-NEXT: .set push +; ci-NEXT: .set at +; ci-NEXT: .set macro +; ci-NEXT: .set reorder +; ci-EMPTY: +; ci-NEXT: .space 1000004 +; ci-EMPTY: +; ci-NEXT: .set pop +; ci-NEXT: #NO_APP +; ci-NEXT: jal $BB1_3 # branch +; ci-NEXT: nop +; ci-NEXT: .p2align 2 +; ci-NEXT: $BB1_2: # %if.then +; ci-NEXT: li $3, 0 +; ci-NEXT: #APP +; ci-NEXT: .set push +; ci-NEXT: .set at +; ci-NEXT: .set macro +; ci-NEXT: .set reorder +; ci-EMPTY: +; ci-NEXT: .space 1000000 +; ci-EMPTY: +; ci-NEXT: .set pop +; ci-NEXT: #NO_APP +; ci-NEXT: .p2align 2 +; ci-NEXT: $BB1_3: # %if.end +; ci-NEXT: sw $3, %lo(i)($2) +; ci-NEXT: li $2, 0 +; ci-NEXT: jrc $ra entry: %0 = load i32, i32* @i, align 4, !tbaa !1 %cmp = icmp eq i32 %0, 0 @@ -51,15 +122,49 @@ if.end: ; preds = %if.else, %if.then ret i32 0 } -; ci: .ent x1 -; ci: bnez $3, $BB1_1 # 16 bit inst -; ci: jal $BB1_2 # branch -; ci: nop -; ci: $BB1_1: -; ci: .end x1 ; Function Attrs: nounwind optsize define i32 @y0() #0 { +; ci-LABEL: y0: +; ci: # %bb.0: # %entry +; ci-NEXT: li $2, %hi(i) +; ci-NEXT: sll $2, $2, 16 +; ci-NEXT: lw $2, %lo(i)($2) +; ci-NEXT: beqz $2, $BB2_2 +; ci-NEXT: # %bb.1: # %if.else +; ci-NEXT: li $2, %hi(j) +; ci-NEXT: sll $2, $2, 16 +; ci-NEXT: li $3, 55 +; ci-NEXT: sw $3, %lo(j)($2) +; ci-NEXT: #APP +; ci-NEXT: .set push +; ci-NEXT: .set at +; ci-NEXT: .set macro +; ci-NEXT: .set reorder +; ci-EMPTY: +; ci-NEXT: .space 1004 +; ci-EMPTY: +; ci-NEXT: .set pop +; ci-NEXT: #NO_APP +; ci-NEXT: b $BB2_3 # 16 bit inst +; ci-NEXT: $BB2_2: # %if.then +; ci-NEXT: li $2, %hi(j) +; ci-NEXT: sll $2, $2, 16 +; ci-NEXT: li $3, 10 +; ci-NEXT: sw $3, %lo(j)($2) +; ci-NEXT: #APP +; ci-NEXT: .set push +; ci-NEXT: .set at +; ci-NEXT: .set macro +; ci-NEXT: .set reorder +; ci-EMPTY: +; ci-NEXT: .space 1000 +; ci-EMPTY: +; ci-NEXT: .set pop +; ci-NEXT: #NO_APP +; ci-NEXT: $BB2_3: # %if.end +; ci-NEXT: li $2, 0 +; ci-NEXT: jrc $ra entry: %0 = load i32, i32* @i, align 4, !tbaa !1 %cmp = icmp eq i32 %0, 0 @@ -79,12 +184,53 @@ if.end: ; preds = %if.else, %if.then ret i32 0 } -; ci: .ent y0 -; ci: beqz $2, $BB2_2 -; ci: .end y0 - ; Function Attrs: nounwind optsize define i32 @y1() #0 { +; ci-LABEL: y1: +; ci: # %bb.0: # %entry +; ci-NEXT: li $2, %hi(i) +; ci-NEXT: sll $2, $2, 16 +; ci-NEXT: lw $2, %lo(i)($2) +; ci-NEXT: bnez $2, $BB3_1 # 16 bit inst +; ci-NEXT: jal $BB3_2 # branch +; ci-NEXT: nop +; ci-NEXT: $BB3_1: # %if.else +; ci-NEXT: li $2, %hi(j) +; ci-NEXT: sll $2, $2, 16 +; ci-NEXT: li $3, 55 +; ci-NEXT: sw $3, %lo(j)($2) +; ci-NEXT: #APP +; ci-NEXT: .set push +; ci-NEXT: .set at +; ci-NEXT: .set macro +; ci-NEXT: .set reorder +; ci-EMPTY: +; ci-NEXT: .space 1000004 +; ci-EMPTY: +; ci-NEXT: .set pop +; ci-NEXT: #NO_APP +; ci-NEXT: jal $BB3_3 # branch +; ci-NEXT: nop +; ci-NEXT: .p2align 2 +; ci-NEXT: $BB3_2: # %if.then +; ci-NEXT: li $2, %hi(j) +; ci-NEXT: sll $2, $2, 16 +; ci-NEXT: li $3, 10 +; ci-NEXT: sw $3, %lo(j)($2) +; ci-NEXT: #APP +; ci-NEXT: .set push +; ci-NEXT: .set at +; ci-NEXT: .set macro +; ci-NEXT: .set reorder +; ci-EMPTY: +; ci-NEXT: .space 1000000 +; ci-EMPTY: +; ci-NEXT: .set pop +; ci-NEXT: #NO_APP +; ci-NEXT: .p2align 2 +; ci-NEXT: $BB3_3: # %if.end +; ci-NEXT: li $2, 0 +; ci-NEXT: jrc $ra entry: %0 = load i32, i32* @i, align 4, !tbaa !1 %cmp = icmp eq i32 %0, 0 @@ -104,15 +250,51 @@ if.end: ; preds = %if.else, %if.then ret i32 0 } -; ci: .ent y1 -; ci: bnez $2, $BB3_1 # 16 bit inst -; ci: jal $BB3_2 # branch -; ci: nop -; ci: $BB3_1: -; ci: .end y1 ; Function Attrs: nounwind optsize define void @z0() #0 { +; ci-LABEL: z0: +; ci: # %bb.0: # %entry +; ci-NEXT: li $2, %hi(j) +; ci-NEXT: sll $2, $2, 16 +; ci-NEXT: lw $2, %lo(j)($2) +; ci-NEXT: li $3, %hi(i) +; ci-NEXT: sll $3, $3, 16 +; ci-NEXT: lw $3, %lo(i)($3) +; ci-NEXT: cmp $3, $2 +; ci-NEXT: bteqz $BB4_2 +; ci-NEXT: # %bb.1: # %if.else +; ci-NEXT: li $2, %hi(k) +; ci-NEXT: sll $2, $2, 16 +; ci-NEXT: #APP +; ci-NEXT: .set push +; ci-NEXT: .set at +; ci-NEXT: .set macro +; ci-NEXT: .set reorder +; ci-EMPTY: +; ci-NEXT: .space 10004 +; ci-EMPTY: +; ci-NEXT: .set pop +; ci-NEXT: #NO_APP +; ci-NEXT: li $3, 2 +; ci-NEXT: sw $3, %lo(k)($2) +; ci-NEXT: jrc $ra +; ci-NEXT: $BB4_2: # %if.then +; ci-NEXT: li $2, %hi(k) +; ci-NEXT: sll $2, $2, 16 +; ci-NEXT: li $3, 1 +; ci-NEXT: sw $3, %lo(k)($2) +; ci-NEXT: #APP +; ci-NEXT: .set push +; ci-NEXT: .set at +; ci-NEXT: .set macro +; ci-NEXT: .set reorder +; ci-EMPTY: +; ci-NEXT: .space 10000 +; ci-EMPTY: +; ci-NEXT: .set pop +; ci-NEXT: #NO_APP +; ci-NEXT: jrc $ra entry: %0 = load i32, i32* @i, align 4, !tbaa !1 %1 = load i32, i32* @j, align 4, !tbaa !1 @@ -133,12 +315,54 @@ if.end: ; preds = %if.else, %if.then ret void } -; ci: .ent z0 -; ci: btnez $BB4_2 -; ci: .end z0 ; Function Attrs: nounwind optsize define void @z1() #0 { +; ci-LABEL: z1: +; ci: # %bb.0: # %entry +; ci-NEXT: li $2, %hi(j) +; ci-NEXT: sll $2, $2, 16 +; ci-NEXT: lw $2, %lo(j)($2) +; ci-NEXT: li $3, %hi(i) +; ci-NEXT: sll $3, $3, 16 +; ci-NEXT: lw $3, %lo(i)($3) +; ci-NEXT: cmp $3, $2 +; ci-NEXT: btnez $BB5_1 # 16 bit inst +; ci-NEXT: jal $BB5_2 # branch +; ci-NEXT: nop +; ci-NEXT: $BB5_1: # %if.else +; ci-NEXT: li $2, %hi(k) +; ci-NEXT: sll $2, $2, 16 +; ci-NEXT: #APP +; ci-NEXT: .set push +; ci-NEXT: .set at +; ci-NEXT: .set macro +; ci-NEXT: .set reorder +; ci-EMPTY: +; ci-NEXT: .space 10000004 +; ci-EMPTY: +; ci-NEXT: .set pop +; ci-NEXT: #NO_APP +; ci-NEXT: li $3, 2 +; ci-NEXT: sw $3, %lo(k)($2) +; ci-NEXT: jrc $ra +; ci-NEXT: .p2align 2 +; ci-NEXT: $BB5_2: # %if.then +; ci-NEXT: li $2, %hi(k) +; ci-NEXT: sll $2, $2, 16 +; ci-NEXT: li $3, 1 +; ci-NEXT: sw $3, %lo(k)($2) +; ci-NEXT: #APP +; ci-NEXT: .set push +; ci-NEXT: .set at +; ci-NEXT: .set macro +; ci-NEXT: .set reorder +; ci-EMPTY: +; ci-NEXT: .space 10000000 +; ci-EMPTY: +; ci-NEXT: .set pop +; ci-NEXT: #NO_APP +; ci-NEXT: jrc $ra entry: %0 = load i32, i32* @i, align 4, !tbaa !1 %1 = load i32, i32* @j, align 4, !tbaa !1 @@ -159,15 +383,37 @@ if.end: ; preds = %if.else, %if.then ret void } -; ci: .ent z1 -; ci: bteqz $BB5_1 # 16 bit inst -; ci: jal $BB5_2 # branch -; ci: nop -; ci: $BB5_1: -; ci: .end z1 ; Function Attrs: nounwind optsize define void @z3() #0 { +; ci-LABEL: z3: +; ci: # %bb.0: # %entry +; ci-NEXT: li $2, %hi(i) +; ci-NEXT: sll $2, $2, 16 +; ci-NEXT: lw $4, %lo(i)($2) +; ci-NEXT: li $3, %hi(j) +; ci-NEXT: sll $3, $3, 16 +; ci-NEXT: lw $5, %lo(j)($3) +; ci-NEXT: slt $5, $4 +; ci-NEXT: bteqz $BB6_2 +; ci-NEXT: $BB6_1: # %if.then +; ci-NEXT: # =>This Inner Loop Header: Depth=1 +; ci-NEXT: #APP +; ci-NEXT: .set push +; ci-NEXT: .set at +; ci-NEXT: .set macro +; ci-NEXT: .set reorder +; ci-EMPTY: +; ci-NEXT: .space 10000 +; ci-EMPTY: +; ci-NEXT: .set pop +; ci-NEXT: #NO_APP +; ci-NEXT: lw $4, %lo(i)($2) +; ci-NEXT: lw $5, %lo(j)($3) +; ci-NEXT: slt $5, $4 +; ci-NEXT: btnez $BB6_1 +; ci-NEXT: $BB6_2: # %if.end +; ci-NEXT: jrc $ra entry: %0 = load i32, i32* @i, align 4, !tbaa !1 %1 = load i32, i32* @j, align 4, !tbaa !1 @@ -185,12 +431,42 @@ if.end: ; preds = %if.then, %entry ret void } -; ci: .ent z3 -; ci: bteqz $BB6_2 -; ci: .end z3 - ; Function Attrs: nounwind optsize define void @z4() #0 { +; ci-LABEL: z4: +; ci: # %bb.0: # %entry +; ci-NEXT: li $2, %hi(i) +; ci-NEXT: sll $2, $2, 16 +; ci-NEXT: lw $4, %lo(i)($2) +; ci-NEXT: li $3, %hi(j) +; ci-NEXT: sll $3, $3, 16 +; ci-NEXT: lw $5, %lo(j)($3) +; ci-NEXT: slt $5, $4 +; ci-NEXT: btnez $BB7_1 # 16 bit inst +; ci-NEXT: jal $BB7_2 # branch +; ci-NEXT: nop +; ci-NEXT: .p2align 2 +; ci-NEXT: $BB7_1: # %if.then +; ci-NEXT: # =>This Inner Loop Header: Depth=1 +; ci-NEXT: #APP +; ci-NEXT: .set push +; ci-NEXT: .set at +; ci-NEXT: .set macro +; ci-NEXT: .set reorder +; ci-EMPTY: +; ci-NEXT: .space 10000000 +; ci-EMPTY: +; ci-NEXT: .set pop +; ci-NEXT: #NO_APP +; ci-NEXT: lw $4, %lo(i)($2) +; ci-NEXT: lw $5, %lo(j)($3) +; ci-NEXT: slt $5, $4 +; ci-NEXT: bteqz $BB7_2 # 16 bit inst +; ci-NEXT: jal $BB7_1 # branch +; ci-NEXT: nop +; ci-NEXT: .p2align 2 +; ci-NEXT: $BB7_2: # %if.end +; ci-NEXT: jrc $ra entry: %0 = load i32, i32* @i, align 4, !tbaa !1 %1 = load i32, i32* @j, align 4, !tbaa !1 @@ -208,14 +484,6 @@ if.end: ; preds = %if.then, %entry ret void } -; ci: .ent z4 -; ci: btnez $BB7_1 # 16 bit inst -; ci: jal $BB7_2 # branch -; ci: nop -; ci: .p2align 2 -; ci: $BB7_1: -; ci: .end z4 - attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind } diff --git a/llvm/test/CodeGen/Mips/longbranch/compact-branches-long-branch.ll b/llvm/test/CodeGen/Mips/longbranch/compact-branches-long-branch.ll index 709cd477a778e6..dbd071f81cbfed 100644 --- a/llvm/test/CodeGen/Mips/longbranch/compact-branches-long-branch.ll +++ b/llvm/test/CodeGen/Mips/longbranch/compact-branches-long-branch.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -march=mips -mcpu=mips32r6 -force-mips-long-branch | FileCheck %s ; Check that when MIPS32R6 with the static relocation model with the usage of @@ -9,11 +10,29 @@ declare i32 @f(i32) declare i32 @g() -; CHECK-LABEL: test1: -; CHECK: bnezc -; CHECK-NEXT: nop - define i32 @test1(i32 %a) { +; CHECK-LABEL: test1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiu $sp, $sp, -24 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset 31, -4 +; CHECK-NEXT: bnezc $4, $BB0_2 +; CHECK-NEXT: nop +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: bc $BB0_3 +; CHECK-NEXT: $BB0_2: # %cond.false +; CHECK-NEXT: jal g +; CHECK-NEXT: nop +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 +; CHECK-NEXT: $BB0_3: # %cond.true +; CHECK-NEXT: jal f +; CHECK-NEXT: nop +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 entry: %0 = icmp eq i32 %a, 0 br i1 %0, label %cond.true, label %cond.false @@ -25,11 +44,30 @@ cond.false: ret i32 %2 } -; CHECK-LABEL: test2: -; CHECK: bgezc -; CHECK-NEXT: nop define i32 @test2(i32 %a) { +; CHECK-LABEL: test2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiu $sp, $sp, -24 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset 31, -4 +; CHECK-NEXT: bgezc $4, $BB1_2 +; CHECK-NEXT: nop +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: bc $BB1_3 +; CHECK-NEXT: $BB1_2: # %cond.true +; CHECK-NEXT: jal f +; CHECK-NEXT: nop +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 +; CHECK-NEXT: $BB1_3: # %cond.false +; CHECK-NEXT: jal g +; CHECK-NEXT: nop +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 entry: %0 = icmp sge i32 %a, 0 br i1 %0, label %cond.true, label %cond.false @@ -41,11 +79,30 @@ cond.false: ret i32 %2 } -; CHECK-LABEL: test3: -; CHECK: blezc -; CHECK-NEXT: nop define i32 @test3(i32 %a) { +; CHECK-LABEL: test3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiu $sp, $sp, -24 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset 31, -4 +; CHECK-NEXT: blezc $4, $BB2_2 +; CHECK-NEXT: nop +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: bc $BB2_3 +; CHECK-NEXT: $BB2_2: # %cond.true +; CHECK-NEXT: jal f +; CHECK-NEXT: nop +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 +; CHECK-NEXT: $BB2_3: # %cond.false +; CHECK-NEXT: jal g +; CHECK-NEXT: nop +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 entry: %0 = icmp sle i32 %a, 0 br i1 %0, label %cond.true, label %cond.false @@ -57,11 +114,30 @@ cond.false: ret i32 %2 } -; CHECK-LABEL: test4: -; CHECK: bgtzc -; CHECK-NEXT: nop define i32 @test4(i32 %a) { +; CHECK-LABEL: test4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiu $sp, $sp, -24 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset 31, -4 +; CHECK-NEXT: bgtzc $4, $BB3_2 +; CHECK-NEXT: nop +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: bc $BB3_3 +; CHECK-NEXT: $BB3_2: # %cond.true +; CHECK-NEXT: jal f +; CHECK-NEXT: nop +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 +; CHECK-NEXT: $BB3_3: # %cond.false +; CHECK-NEXT: jal g +; CHECK-NEXT: nop +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 entry: %0 = icmp sgt i32 %a, 0 br i1 %0, label %cond.true, label %cond.false @@ -73,11 +149,29 @@ cond.false: ret i32 %2 } -; CHECK-LABEL: test5: -; CHECK: bgezc -; CHECK-NEXT: nop - define i32 @test5(i32 %a) { +; CHECK-LABEL: test5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiu $sp, $sp, -24 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset 31, -4 +; CHECK-NEXT: bgezc $4, $BB4_2 +; CHECK-NEXT: nop +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: bc $BB4_3 +; CHECK-NEXT: $BB4_2: # %cond.false +; CHECK-NEXT: jal g +; CHECK-NEXT: nop +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 +; CHECK-NEXT: $BB4_3: # %cond.true +; CHECK-NEXT: jal f +; CHECK-NEXT: nop +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 entry: %0 = icmp slt i32 %a, 0 br i1 %0, label %cond.true, label %cond.false @@ -89,11 +183,30 @@ cond.false: ret i32 %2 } -; CHECK-LABEL: test6: -; CHECK: bnezc -; CHECK-NEXT: nop - define i32 @test6(i32 %a, i32 %b) { +; CHECK-LABEL: test6: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiu $sp, $sp, -24 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset 31, -4 +; CHECK-NEXT: sltu $1, $5, $4 +; CHECK-NEXT: bnezc $1, $BB5_2 +; CHECK-NEXT: nop +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: bc $BB5_3 +; CHECK-NEXT: $BB5_2: # %cond.true +; CHECK-NEXT: jal f +; CHECK-NEXT: nop +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 +; CHECK-NEXT: $BB5_3: # %cond.false +; CHECK-NEXT: jal g +; CHECK-NEXT: nop +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 entry: %0 = icmp ugt i32 %a, %b br i1 %0, label %cond.true, label %cond.false @@ -105,11 +218,31 @@ cond.false: ret i32 %2 } -; CHECK-LABEL: test7: -; CHECK: beqzc -; CHECK-NEXT: nop define i32 @test7(i32 %a, i32 %b) { +; CHECK-LABEL: test7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiu $sp, $sp, -24 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset 31, -4 +; CHECK-NEXT: sltu $1, $4, $5 +; CHECK-NEXT: bnezc $1, $BB6_2 +; CHECK-NEXT: nop +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: bc $BB6_3 +; CHECK-NEXT: $BB6_2: # %cond.false +; CHECK-NEXT: jal g +; CHECK-NEXT: nop +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 +; CHECK-NEXT: $BB6_3: # %cond.true +; CHECK-NEXT: jal f +; CHECK-NEXT: nop +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 entry: %0 = icmp uge i32 %a, %b br i1 %0, label %cond.true, label %cond.false @@ -121,11 +254,31 @@ cond.false: ret i32 %2 } -; CHECK-LABEL: test8: -; CHECK: bnezc -; CHECK-NEXT: nop define i32 @test8(i32 %a, i32 %b) { +; CHECK-LABEL: test8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiu $sp, $sp, -24 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset 31, -4 +; CHECK-NEXT: sltu $1, $4, $5 +; CHECK-NEXT: bnezc $1, $BB7_2 +; CHECK-NEXT: nop +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: bc $BB7_3 +; CHECK-NEXT: $BB7_2: # %cond.true +; CHECK-NEXT: jal f +; CHECK-NEXT: nop +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 +; CHECK-NEXT: $BB7_3: # %cond.false +; CHECK-NEXT: jal g +; CHECK-NEXT: nop +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 entry: %0 = icmp ult i32 %a, %b br i1 %0, label %cond.true, label %cond.false @@ -137,11 +290,31 @@ cond.false: ret i32 %2 } -; CHECK-LABEL: test9: -; CHECK: beqzc -; CHECK-NEXT: nop define i32 @test9(i32 %a, i32 %b) { +; CHECK-LABEL: test9: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiu $sp, $sp, -24 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset 31, -4 +; CHECK-NEXT: sltu $1, $5, $4 +; CHECK-NEXT: bnezc $1, $BB8_2 +; CHECK-NEXT: nop +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: bc $BB8_3 +; CHECK-NEXT: $BB8_2: # %cond.false +; CHECK-NEXT: jal g +; CHECK-NEXT: nop +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 +; CHECK-NEXT: $BB8_3: # %cond.true +; CHECK-NEXT: jal f +; CHECK-NEXT: nop +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 entry: %0 = icmp ule i32 %a, %b br i1 %0, label %cond.true, label %cond.false diff --git a/llvm/test/CodeGen/Mips/seleq.ll b/llvm/test/CodeGen/Mips/seleq.ll index 7d1e034d68c7c1..34565ea2727583 100644 --- a/llvm/test/CodeGen/Mips/seleq.ll +++ b/llvm/test/CodeGen/Mips/seleq.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=MIPS16 @t = global i32 10, align 4 @f = global i32 199, align 4 @@ -11,6 +12,74 @@ @z4 = common global i32 0, align 4 define void @calc_seleq() nounwind { +; MIPS16-LABEL: calc_seleq: +; MIPS16: # %bb.0: # %entry +; MIPS16-NEXT: lui $2, %hi(_gp_disp) +; MIPS16-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS16-NEXT: li $2, %hi(_gp_disp) +; MIPS16-NEXT: addiu $3, $pc, %lo(_gp_disp) +; MIPS16-NEXT: sll $2, $2, 16 +; MIPS16-NEXT: addu $2, $3, $2 +; MIPS16-NEXT: lw $4, %got(b)($2) +; MIPS16-NEXT: lw $5, 0($4) +; MIPS16-NEXT: lw $3, %got(a)($2) +; MIPS16-NEXT: lw $6, 0($3) +; MIPS16-NEXT: cmp $6, $5 +; MIPS16-NEXT: bteqz $BB0_2 # 16 bit inst +; MIPS16-NEXT: # %bb.1: # %cond.false +; MIPS16-NEXT: lw $5, %got(t)($2) +; MIPS16-NEXT: lw $5, 0($5) +; MIPS16-NEXT: b $BB0_3 # 16 bit inst +; MIPS16-NEXT: $BB0_2: # %cond.true +; MIPS16-NEXT: lw $5, %got(f)($2) +; MIPS16-NEXT: lw $5, 0($5) +; MIPS16-NEXT: $BB0_3: # %cond.end +; MIPS16-NEXT: lw $6, %got(z1)($2) +; MIPS16-NEXT: sw $5, 0($6) +; MIPS16-NEXT: lw $5, 0($3) +; MIPS16-NEXT: lw $4, 0($4) +; MIPS16-NEXT: cmp $4, $5 +; MIPS16-NEXT: bteqz $BB0_5 # 16 bit inst +; MIPS16-NEXT: # %bb.4: # %cond.false3 +; MIPS16-NEXT: lw $4, %got(t)($2) +; MIPS16-NEXT: lw $4, 0($4) +; MIPS16-NEXT: b $BB0_6 # 16 bit inst +; MIPS16-NEXT: $BB0_5: # %cond.true2 +; MIPS16-NEXT: lw $4, %got(f)($2) +; MIPS16-NEXT: lw $4, 0($4) +; MIPS16-NEXT: $BB0_6: # %cond.end4 +; MIPS16-NEXT: lw $5, %got(z2)($2) +; MIPS16-NEXT: sw $4, 0($5) +; MIPS16-NEXT: lw $5, 0($3) +; MIPS16-NEXT: lw $4, %got(c)($2) +; MIPS16-NEXT: lw $6, 0($4) +; MIPS16-NEXT: cmp $6, $5 +; MIPS16-NEXT: bteqz $BB0_8 # 16 bit inst +; MIPS16-NEXT: # %bb.7: # %cond.false8 +; MIPS16-NEXT: lw $5, %got(f)($2) +; MIPS16-NEXT: lw $5, 0($5) +; MIPS16-NEXT: b $BB0_9 # 16 bit inst +; MIPS16-NEXT: $BB0_8: # %cond.true7 +; MIPS16-NEXT: lw $5, %got(t)($2) +; MIPS16-NEXT: lw $5, 0($5) +; MIPS16-NEXT: $BB0_9: # %cond.end9 +; MIPS16-NEXT: lw $6, %got(z3)($2) +; MIPS16-NEXT: sw $5, 0($6) +; MIPS16-NEXT: lw $4, 0($4) +; MIPS16-NEXT: lw $3, 0($3) +; MIPS16-NEXT: cmp $3, $4 +; MIPS16-NEXT: bteqz $BB0_11 # 16 bit inst +; MIPS16-NEXT: # %bb.10: # %cond.false13 +; MIPS16-NEXT: lw $3, %got(f)($2) +; MIPS16-NEXT: lw $3, 0($3) +; MIPS16-NEXT: b $BB0_12 # 16 bit inst +; MIPS16-NEXT: $BB0_11: # %cond.true12 +; MIPS16-NEXT: lw $3, %got(t)($2) +; MIPS16-NEXT: lw $3, 0($3) +; MIPS16-NEXT: $BB0_12: # %cond.end14 +; MIPS16-NEXT: lw $2, %got(z4)($2) +; MIPS16-NEXT: sw $3, 0($2) +; MIPS16-NEXT: jrc $ra entry: %0 = load i32, i32* @a, align 4 %1 = load i32, i32* @b, align 4 @@ -80,16 +149,3 @@ cond.end14: ; preds = %cond.false13, %cond } attributes #0 = { nounwind "target-cpu"="mips32" "target-features"="+o32,+mips32" } - -; 16: cmp ${{[0-9]+}}, ${{[0-9]+}} -; 16: btnez $BB{{[0-9]+}}_{{[0-9]}} - -; 16: cmp ${{[0-9]+}}, ${{[0-9]+}} -; 16: btnez $BB{{[0-9]+}}_{{[0-9]}} - -; 16: cmp ${{[0-9]+}}, ${{[0-9]+}} -; 16: btnez $BB{{[0-9]+}}_{{[0-9]}} - -; 16: cmp ${{[0-9]+}}, ${{[0-9]+}} -; 16: btnez $BB{{[0-9]+}}_{{[0-9]}} - diff --git a/llvm/test/CodeGen/Mips/selle.ll b/llvm/test/CodeGen/Mips/selle.ll index 8925aac10c4d19..ffad4ba1c349d9 100644 --- a/llvm/test/CodeGen/Mips/selle.ll +++ b/llvm/test/CodeGen/Mips/selle.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=MIPS16 @t = global i32 10, align 4 @f = global i32 199, align 4 @@ -12,6 +13,74 @@ @.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1 define void @calc_z() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" { +; MIPS16-LABEL: calc_z: +; MIPS16: # %bb.0: # %entry +; MIPS16-NEXT: lui $2, %hi(_gp_disp) +; MIPS16-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS16-NEXT: li $2, %hi(_gp_disp) +; MIPS16-NEXT: addiu $3, $pc, %lo(_gp_disp) +; MIPS16-NEXT: sll $2, $2, 16 +; MIPS16-NEXT: addu $2, $3, $2 +; MIPS16-NEXT: lw $3, %got(a)($2) +; MIPS16-NEXT: lw $5, 0($3) +; MIPS16-NEXT: lw $4, %got(b)($2) +; MIPS16-NEXT: lw $6, 0($4) +; MIPS16-NEXT: slt $6, $5 +; MIPS16-NEXT: bteqz $BB0_2 # 16 bit inst +; MIPS16-NEXT: # %bb.1: # %cond.false +; MIPS16-NEXT: lw $5, %got(f)($2) +; MIPS16-NEXT: lw $5, 0($5) +; MIPS16-NEXT: b $BB0_3 # 16 bit inst +; MIPS16-NEXT: $BB0_2: # %cond.true +; MIPS16-NEXT: lw $5, %got(t)($2) +; MIPS16-NEXT: lw $5, 0($5) +; MIPS16-NEXT: $BB0_3: # %cond.end +; MIPS16-NEXT: lw $6, %got(z1)($2) +; MIPS16-NEXT: sw $5, 0($6) +; MIPS16-NEXT: lw $4, 0($4) +; MIPS16-NEXT: lw $5, 0($3) +; MIPS16-NEXT: slt $5, $4 +; MIPS16-NEXT: bteqz $BB0_5 # 16 bit inst +; MIPS16-NEXT: # %bb.4: # %cond.false3 +; MIPS16-NEXT: lw $4, %got(t)($2) +; MIPS16-NEXT: lw $4, 0($4) +; MIPS16-NEXT: b $BB0_6 # 16 bit inst +; MIPS16-NEXT: $BB0_5: # %cond.true2 +; MIPS16-NEXT: lw $4, %got(f)($2) +; MIPS16-NEXT: lw $4, 0($4) +; MIPS16-NEXT: $BB0_6: # %cond.end4 +; MIPS16-NEXT: lw $5, %got(z2)($2) +; MIPS16-NEXT: sw $4, 0($5) +; MIPS16-NEXT: lw $4, %got(c)($2) +; MIPS16-NEXT: lw $5, 0($4) +; MIPS16-NEXT: lw $6, 0($3) +; MIPS16-NEXT: slt $6, $5 +; MIPS16-NEXT: bteqz $BB0_8 # 16 bit inst +; MIPS16-NEXT: # %bb.7: # %cond.false8 +; MIPS16-NEXT: lw $5, %got(f)($2) +; MIPS16-NEXT: lw $5, 0($5) +; MIPS16-NEXT: b $BB0_9 # 16 bit inst +; MIPS16-NEXT: $BB0_8: # %cond.true7 +; MIPS16-NEXT: lw $5, %got(t)($2) +; MIPS16-NEXT: lw $5, 0($5) +; MIPS16-NEXT: $BB0_9: # %cond.end9 +; MIPS16-NEXT: lw $6, %got(z3)($2) +; MIPS16-NEXT: sw $5, 0($6) +; MIPS16-NEXT: lw $3, 0($3) +; MIPS16-NEXT: lw $4, 0($4) +; MIPS16-NEXT: slt $4, $3 +; MIPS16-NEXT: bteqz $BB0_11 # 16 bit inst +; MIPS16-NEXT: # %bb.10: # %cond.false13 +; MIPS16-NEXT: lw $3, %got(f)($2) +; MIPS16-NEXT: lw $3, 0($3) +; MIPS16-NEXT: b $BB0_12 # 16 bit inst +; MIPS16-NEXT: $BB0_11: # %cond.true12 +; MIPS16-NEXT: lw $3, %got(t)($2) +; MIPS16-NEXT: lw $3, 0($3) +; MIPS16-NEXT: $BB0_12: # %cond.end14 +; MIPS16-NEXT: lw $2, %got(z4)($2) +; MIPS16-NEXT: sw $3, 0($2) +; MIPS16-NEXT: jrc $ra entry: %0 = load i32, i32* @a, align 4 %1 = load i32, i32* @b, align 4 @@ -80,17 +149,6 @@ cond.end14: ; preds = %cond.false13, %cond ret void } -; 16: slt ${{[0-9]+}}, ${{[0-9]+}} -; 16: btnez $BB{{[0-9]+}}_{{[0-9]}} - -; 16: slt ${{[0-9]+}}, ${{[0-9]+}} -; 16: btnez $BB{{[0-9]+}}_{{[0-9]}} - -; 16: slt ${{[0-9]+}}, ${{[0-9]+}} -; 16: btnez $BB{{[0-9]+}}_{{[0-9]}} - -; 16: slt ${{[0-9]+}}, ${{[0-9]+}} -; 16: btnez $BB{{[0-9]+}}_{{[0-9]}} attributes #0 = { nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" } attributes #1 = { "target-cpu"="mips16" "target-features"="+mips16,+o32" } diff --git a/llvm/test/CodeGen/PowerPC/brcond.ll b/llvm/test/CodeGen/PowerPC/brcond.ll index b8c98427f107f6..3df169dd64da03 100644 --- a/llvm/test/CodeGen/PowerPC/brcond.ll +++ b/llvm/test/CodeGen/PowerPC/brcond.ll @@ -1,12 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \ ; RUN: -ppc-reduce-cr-logicals=false < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown \ ; RUN: -ppc-reduce-cr-logicals=false < %s | FileCheck %s define signext i32 @testi32slt(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 { -; CHECK-LABEL: testi32slt -; CHECK: crorc [[REG:[0-9]+]], 2, 6 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testi32slt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpw 5, 6 +; CHECK-NEXT: cmpw 1, 3, 4 +; CHECK-NEXT: crorc 20, 2, 6 +; CHECK-NEXT: bc 12, 20, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: extsw 3, 7 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB0_2: # %iffalse +; CHECK-NEXT: extsw 3, 8 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i32 %c3, %c4 %cmp3tmp = icmp eq i32 %c1, %c2 @@ -19,9 +29,18 @@ iffalse: } define signext i32 @testi32ult(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 { -; CHECK-LABEL: testi32ult -; CHECK: crorc [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testi32ult: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpw 5, 6 +; CHECK-NEXT: cmpw 1, 3, 4 +; CHECK-NEXT: crorc 20, 6, 2 +; CHECK-NEXT: bc 12, 20, .LBB1_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: extsw 3, 7 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB1_2: # %iffalse +; CHECK-NEXT: extsw 3, 8 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i32 %c3, %c4 %cmp3tmp = icmp eq i32 %c1, %c2 @@ -34,9 +53,18 @@ iffalse: } define signext i32 @testi32sle(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 { -; CHECK-LABEL: testi32sle -; CHECK: crandc [[REG:[0-9]+]], 2, 6 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testi32sle: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpw 5, 6 +; CHECK-NEXT: cmpw 1, 3, 4 +; CHECK-NEXT: crandc 20, 2, 6 +; CHECK-NEXT: bc 4, 20, .LBB2_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: extsw 3, 8 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB2_2: # %iftrue +; CHECK-NEXT: extsw 3, 7 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i32 %c3, %c4 %cmp3tmp = icmp eq i32 %c1, %c2 @@ -49,9 +77,18 @@ iffalse: } define signext i32 @testi32ule(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 { -; CHECK-LABEL: testi32ule -; CHECK: crandc [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testi32ule: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpw 5, 6 +; CHECK-NEXT: cmpw 1, 3, 4 +; CHECK-NEXT: crandc 20, 6, 2 +; CHECK-NEXT: bc 4, 20, .LBB3_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: extsw 3, 8 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB3_2: # %iftrue +; CHECK-NEXT: extsw 3, 7 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i32 %c3, %c4 %cmp3tmp = icmp eq i32 %c1, %c2 @@ -65,8 +102,17 @@ iffalse: define signext i32 @testi32eq(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 { ; CHECK-LABEL: testi32eq: -; CHECK: crxor [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpw 5, 6 +; CHECK-NEXT: cmpw 1, 3, 4 +; CHECK-NEXT: crxor 20, 6, 2 +; CHECK-NEXT: bc 4, 20, .LBB4_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: extsw 3, 8 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB4_2: # %iftrue +; CHECK-NEXT: extsw 3, 7 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i32 %c3, %c4 %cmp3tmp = icmp eq i32 %c1, %c2 @@ -80,8 +126,17 @@ iffalse: define signext i32 @testi32sge(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 { ; CHECK-LABEL: testi32sge: -; CHECK: crandc [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpw 5, 6 +; CHECK-NEXT: cmpw 1, 3, 4 +; CHECK-NEXT: crandc 20, 6, 2 +; CHECK-NEXT: bc 4, 20, .LBB5_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: extsw 3, 8 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB5_2: # %iftrue +; CHECK-NEXT: extsw 3, 7 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i32 %c3, %c4 %cmp3tmp = icmp eq i32 %c1, %c2 @@ -95,8 +150,17 @@ iffalse: define signext i32 @testi32uge(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 { ; CHECK-LABEL: testi32uge: -; CHECK: crandc [[REG:[0-9]+]], 2, 6 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpw 5, 6 +; CHECK-NEXT: cmpw 1, 3, 4 +; CHECK-NEXT: crandc 20, 2, 6 +; CHECK-NEXT: bc 4, 20, .LBB6_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: extsw 3, 8 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB6_2: # %iftrue +; CHECK-NEXT: extsw 3, 7 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i32 %c3, %c4 %cmp3tmp = icmp eq i32 %c1, %c2 @@ -110,8 +174,17 @@ iffalse: define signext i32 @testi32sgt(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 { ; CHECK-LABEL: testi32sgt: -; CHECK: crorc [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpw 5, 6 +; CHECK-NEXT: cmpw 1, 3, 4 +; CHECK-NEXT: crorc 20, 6, 2 +; CHECK-NEXT: bc 12, 20, .LBB7_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: extsw 3, 7 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB7_2: # %iffalse +; CHECK-NEXT: extsw 3, 8 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i32 %c3, %c4 %cmp3tmp = icmp eq i32 %c1, %c2 @@ -125,8 +198,17 @@ iffalse: define signext i32 @testi32ugt(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 { ; CHECK-LABEL: testi32ugt: -; CHECK: crorc [[REG:[0-9]+]], 2, 6 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpw 5, 6 +; CHECK-NEXT: cmpw 1, 3, 4 +; CHECK-NEXT: crorc 20, 2, 6 +; CHECK-NEXT: bc 12, 20, .LBB8_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: extsw 3, 7 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB8_2: # %iffalse +; CHECK-NEXT: extsw 3, 8 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i32 %c3, %c4 %cmp3tmp = icmp eq i32 %c1, %c2 @@ -140,8 +222,17 @@ iffalse: define signext i32 @testi32ne(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 { ; CHECK-LABEL: testi32ne: -; CHECK: creqv [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpw 5, 6 +; CHECK-NEXT: cmpw 1, 3, 4 +; CHECK-NEXT: creqv 20, 6, 2 +; CHECK-NEXT: bc 12, 20, .LBB9_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: extsw 3, 7 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB9_2: # %iffalse +; CHECK-NEXT: extsw 3, 8 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i32 %c3, %c4 %cmp3tmp = icmp eq i32 %c1, %c2 @@ -154,9 +245,18 @@ iffalse: } define i64 @testi64slt(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 { -; CHECK-LABEL: testi64slt -; CHECK: crorc [[REG:[0-9]+]], 2, 6 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testi64slt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpd 5, 6 +; CHECK-NEXT: cmpd 1, 3, 4 +; CHECK-NEXT: crorc 20, 2, 6 +; CHECK-NEXT: bc 12, 20, .LBB10_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: mr 3, 7 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB10_2: # %iffalse +; CHECK-NEXT: mr 3, 8 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i64 %c3, %c4 %cmp3tmp = icmp eq i64 %c1, %c2 @@ -169,9 +269,18 @@ iffalse: } define i64 @testi64ult(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 { -; CHECK-LABEL: testi64ult -; CHECK: crorc [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testi64ult: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpd 5, 6 +; CHECK-NEXT: cmpd 1, 3, 4 +; CHECK-NEXT: crorc 20, 6, 2 +; CHECK-NEXT: bc 12, 20, .LBB11_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: mr 3, 7 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB11_2: # %iffalse +; CHECK-NEXT: mr 3, 8 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i64 %c3, %c4 %cmp3tmp = icmp eq i64 %c1, %c2 @@ -184,9 +293,18 @@ iffalse: } define i64 @testi64sle(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 { -; CHECK-LABEL: testi64sle -; CHECK: crandc [[REG:[0-9]+]], 2, 6 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testi64sle: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpd 5, 6 +; CHECK-NEXT: cmpd 1, 3, 4 +; CHECK-NEXT: crandc 20, 2, 6 +; CHECK-NEXT: bc 4, 20, .LBB12_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: mr 3, 8 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB12_2: # %iftrue +; CHECK-NEXT: mr 3, 7 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i64 %c3, %c4 %cmp3tmp = icmp eq i64 %c1, %c2 @@ -199,9 +317,18 @@ iffalse: } define i64 @testi64ule(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 { -; CHECK-LABEL: testi64ule -; CHECK: crandc [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testi64ule: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpd 5, 6 +; CHECK-NEXT: cmpd 1, 3, 4 +; CHECK-NEXT: crandc 20, 6, 2 +; CHECK-NEXT: bc 4, 20, .LBB13_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: mr 3, 8 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB13_2: # %iftrue +; CHECK-NEXT: mr 3, 7 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i64 %c3, %c4 %cmp3tmp = icmp eq i64 %c1, %c2 @@ -214,9 +341,18 @@ iffalse: } define i64 @testi64eq(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 { -; CHECK-LABEL: testi64eq -; CHECK: crxor [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testi64eq: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpd 5, 6 +; CHECK-NEXT: cmpd 1, 3, 4 +; CHECK-NEXT: crxor 20, 6, 2 +; CHECK-NEXT: bc 4, 20, .LBB14_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: mr 3, 8 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB14_2: # %iftrue +; CHECK-NEXT: mr 3, 7 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i64 %c3, %c4 %cmp3tmp = icmp eq i64 %c1, %c2 @@ -229,9 +365,18 @@ iffalse: } define i64 @testi64sge(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 { -; CHECK-LABEL: testi64sge -; CHECK: crandc [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testi64sge: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpd 5, 6 +; CHECK-NEXT: cmpd 1, 3, 4 +; CHECK-NEXT: crandc 20, 6, 2 +; CHECK-NEXT: bc 4, 20, .LBB15_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: mr 3, 8 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB15_2: # %iftrue +; CHECK-NEXT: mr 3, 7 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i64 %c3, %c4 %cmp3tmp = icmp eq i64 %c1, %c2 @@ -244,9 +389,18 @@ iffalse: } define i64 @testi64uge(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 { -; CHECK-LABEL: testi64uge -; CHECK: crandc [[REG:[0-9]+]], 2, 6 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testi64uge: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpd 5, 6 +; CHECK-NEXT: cmpd 1, 3, 4 +; CHECK-NEXT: crandc 20, 2, 6 +; CHECK-NEXT: bc 4, 20, .LBB16_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: mr 3, 8 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB16_2: # %iftrue +; CHECK-NEXT: mr 3, 7 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i64 %c3, %c4 %cmp3tmp = icmp eq i64 %c1, %c2 @@ -259,9 +413,18 @@ iffalse: } define i64 @testi64sgt(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 { -; CHECK-LABEL: testi64sgt -; CHECK: crorc [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testi64sgt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpd 5, 6 +; CHECK-NEXT: cmpd 1, 3, 4 +; CHECK-NEXT: crorc 20, 6, 2 +; CHECK-NEXT: bc 12, 20, .LBB17_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: mr 3, 7 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB17_2: # %iffalse +; CHECK-NEXT: mr 3, 8 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i64 %c3, %c4 %cmp3tmp = icmp eq i64 %c1, %c2 @@ -274,9 +437,18 @@ iffalse: } define i64 @testi64ugt(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 { -; CHECK-LABEL: testi64ugt -; CHECK: crorc [[REG:[0-9]+]], 2, 6 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testi64ugt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpd 5, 6 +; CHECK-NEXT: cmpd 1, 3, 4 +; CHECK-NEXT: crorc 20, 2, 6 +; CHECK-NEXT: bc 12, 20, .LBB18_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: mr 3, 7 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB18_2: # %iffalse +; CHECK-NEXT: mr 3, 8 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i64 %c3, %c4 %cmp3tmp = icmp eq i64 %c1, %c2 @@ -289,9 +461,18 @@ iffalse: } define i64 @testi64ne(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 { -; CHECK-LABEL: testi64ne -; CHECK: creqv [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testi64ne: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpd 5, 6 +; CHECK-NEXT: cmpd 1, 3, 4 +; CHECK-NEXT: creqv 20, 6, 2 +; CHECK-NEXT: bc 12, 20, .LBB19_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: mr 3, 7 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB19_2: # %iffalse +; CHECK-NEXT: mr 3, 8 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i64 %c3, %c4 %cmp3tmp = icmp eq i64 %c1, %c2 @@ -304,9 +485,18 @@ iffalse: } define float @testfloatslt(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 { -; CHECK-LABEL: testfloatslt -; CHECK: crorc [[REG:[0-9]+]], 2, 6 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testfloatslt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: crorc 20, 2, 6 +; CHECK-NEXT: bc 12, 20, .LBB20_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB20_2: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq float %c3, %c4 %cmp3tmp = fcmp oeq float %c1, %c2 @@ -319,9 +509,18 @@ iffalse: } define float @testfloatult(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 { -; CHECK-LABEL: testfloatult -; CHECK: crorc [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testfloatult: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: crorc 20, 6, 2 +; CHECK-NEXT: bc 12, 20, .LBB21_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB21_2: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq float %c3, %c4 %cmp3tmp = fcmp oeq float %c1, %c2 @@ -334,9 +533,18 @@ iffalse: } define float @testfloatsle(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 { -; CHECK-LABEL: testfloatsle -; CHECK: crandc [[REG:[0-9]+]], 2, 6 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testfloatsle: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: crandc 20, 2, 6 +; CHECK-NEXT: bc 4, 20, .LBB22_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB22_2: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq float %c3, %c4 %cmp3tmp = fcmp oeq float %c1, %c2 @@ -349,9 +557,18 @@ iffalse: } define float @testfloatule(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 { -; CHECK-LABEL: testfloatule -; CHECK: crandc [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testfloatule: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: crandc 20, 6, 2 +; CHECK-NEXT: bc 4, 20, .LBB23_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB23_2: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq float %c3, %c4 %cmp3tmp = fcmp oeq float %c1, %c2 @@ -364,9 +581,18 @@ iffalse: } define float @testfloateq(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 { -; CHECK-LABEL: testfloateq -; CHECK: crxor [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testfloateq: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: crxor 20, 6, 2 +; CHECK-NEXT: bc 4, 20, .LBB24_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB24_2: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq float %c3, %c4 %cmp3tmp = fcmp oeq float %c1, %c2 @@ -379,9 +605,18 @@ iffalse: } define float @testfloatsge(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 { -; CHECK-LABEL: testfloatsge -; CHECK: crandc [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testfloatsge: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: crandc 20, 6, 2 +; CHECK-NEXT: bc 4, 20, .LBB25_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB25_2: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq float %c3, %c4 %cmp3tmp = fcmp oeq float %c1, %c2 @@ -394,9 +629,18 @@ iffalse: } define float @testfloatuge(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 { -; CHECK-LABEL: testfloatuge -; CHECK: crandc [[REG:[0-9]+]], 2, 6 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testfloatuge: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: crandc 20, 2, 6 +; CHECK-NEXT: bc 4, 20, .LBB26_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB26_2: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq float %c3, %c4 %cmp3tmp = fcmp oeq float %c1, %c2 @@ -409,9 +653,18 @@ iffalse: } define float @testfloatsgt(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 { -; CHECK-LABEL: testfloatsgt -; CHECK: crorc [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testfloatsgt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: crorc 20, 6, 2 +; CHECK-NEXT: bc 12, 20, .LBB27_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB27_2: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq float %c3, %c4 %cmp3tmp = fcmp oeq float %c1, %c2 @@ -424,9 +677,18 @@ iffalse: } define float @testfloatugt(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 { -; CHECK-LABEL: testfloatugt -; CHECK: crorc [[REG:[0-9]+]], 2, 6 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testfloatugt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: crorc 20, 2, 6 +; CHECK-NEXT: bc 12, 20, .LBB28_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB28_2: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq float %c3, %c4 %cmp3tmp = fcmp oeq float %c1, %c2 @@ -439,9 +701,18 @@ iffalse: } define float @testfloatne(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 { -; CHECK-LABEL: testfloatne -; CHECK: creqv [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testfloatne: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: creqv 20, 6, 2 +; CHECK-NEXT: bc 12, 20, .LBB29_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB29_2: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq float %c3, %c4 %cmp3tmp = fcmp oeq float %c1, %c2 @@ -454,9 +725,18 @@ iffalse: } define double @testdoubleslt(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 { -; CHECK-LABEL: testdoubleslt -; CHECK: crorc [[REG:[0-9]+]], 2, 6 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testdoubleslt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: crorc 20, 2, 6 +; CHECK-NEXT: bc 12, 20, .LBB30_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB30_2: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq double %c3, %c4 %cmp3tmp = fcmp oeq double %c1, %c2 @@ -470,8 +750,17 @@ iffalse: define double @testdoubleult(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 { ; CHECK-LABEL: testdoubleult: -; CHECK: crorc [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: crorc 20, 6, 2 +; CHECK-NEXT: bc 12, 20, .LBB31_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB31_2: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq double %c3, %c4 %cmp3tmp = fcmp oeq double %c1, %c2 @@ -484,9 +773,18 @@ iffalse: } define double @testdoublesle(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 { -; CHECK-LABEL: testdoublesle -; CHECK: crandc [[REG:[0-9]+]], 2, 6 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testdoublesle: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: crandc 20, 2, 6 +; CHECK-NEXT: bc 4, 20, .LBB32_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB32_2: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq double %c3, %c4 %cmp3tmp = fcmp oeq double %c1, %c2 @@ -500,8 +798,17 @@ iffalse: define double @testdoubleule(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 { ; CHECK-LABEL: testdoubleule: -; CHECK: crandc [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: crandc 20, 6, 2 +; CHECK-NEXT: bc 4, 20, .LBB33_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB33_2: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq double %c3, %c4 %cmp3tmp = fcmp oeq double %c1, %c2 @@ -514,9 +821,18 @@ iffalse: } define double @testdoubleeq(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 { -; CHECK-LABEL: testdoubleeq -; CHECK: crxor [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testdoubleeq: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: crxor 20, 6, 2 +; CHECK-NEXT: bc 4, 20, .LBB34_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB34_2: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq double %c3, %c4 %cmp3tmp = fcmp oeq double %c1, %c2 @@ -529,9 +845,18 @@ iffalse: } define double @testdoublesge(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 { -; CHECK-LABEL: testdoublesge -; CHECK: crandc [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testdoublesge: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: crandc 20, 6, 2 +; CHECK-NEXT: bc 4, 20, .LBB35_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB35_2: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq double %c3, %c4 %cmp3tmp = fcmp oeq double %c1, %c2 @@ -544,9 +869,18 @@ iffalse: } define double @testdoubleuge(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 { -; CHECK-LABEL: testdoubleuge -; CHECK: crandc [[REG:[0-9]+]], 2, 6 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testdoubleuge: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: crandc 20, 2, 6 +; CHECK-NEXT: bc 4, 20, .LBB36_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB36_2: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq double %c3, %c4 %cmp3tmp = fcmp oeq double %c1, %c2 @@ -560,8 +894,17 @@ iffalse: define double @testdoublesgt(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 { ; CHECK-LABEL: testdoublesgt: -; CHECK: crorc [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: crorc 20, 6, 2 +; CHECK-NEXT: bc 12, 20, .LBB37_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB37_2: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq double %c3, %c4 %cmp3tmp = fcmp oeq double %c1, %c2 @@ -574,9 +917,18 @@ iffalse: } define double @testdoubleugt(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 { -; CHECK-LABEL: testdoubleugt -; CHECK: crorc [[REG:[0-9]+]], 2, 6 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testdoubleugt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: crorc 20, 2, 6 +; CHECK-NEXT: bc 12, 20, .LBB38_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB38_2: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq double %c3, %c4 %cmp3tmp = fcmp oeq double %c1, %c2 @@ -589,9 +941,18 @@ iffalse: } define double @testdoublene(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 { -; CHECK-LABEL: testdoublene -; CHECK: creqv [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testdoublene: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: creqv 20, 6, 2 +; CHECK-NEXT: bc 12, 20, .LBB39_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB39_2: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq double %c3, %c4 %cmp3tmp = fcmp oeq double %c1, %c2 diff --git a/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll b/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll index ce2f93871359ae..c60efc2401609b 100644 --- a/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll +++ b/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll @@ -43,12 +43,12 @@ define signext i32 @zeroEqualityTest01(i8* %x, i8* %y) { ; CHECK-NEXT: ld 3, 8(3) ; CHECK-NEXT: ld 4, 8(4) ; CHECK-NEXT: cmpld 3, 4 -; CHECK-NEXT: li 3, 0 ; CHECK-NEXT: beq 0, .LBB1_3 ; CHECK-NEXT: .LBB1_2: # %res_block ; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: .LBB1_3: # %endblock -; CHECK-NEXT: clrldi 3, 3, 32 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB1_3: +; CHECK-NEXT: li 3, 0 ; CHECK-NEXT: blr %call = tail call signext i32 @memcmp(i8* %x, i8* %y, i64 16) %not.tobool = icmp ne i32 %call, 0 @@ -73,12 +73,12 @@ define signext i32 @zeroEqualityTest03(i8* %x, i8* %y) { ; CHECK-NEXT: lbz 3, 6(3) ; CHECK-NEXT: lbz 4, 6(4) ; CHECK-NEXT: cmplw 3, 4 -; CHECK-NEXT: li 3, 0 ; CHECK-NEXT: beq 0, .LBB2_4 ; CHECK-NEXT: .LBB2_3: # %res_block ; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: .LBB2_4: # %endblock -; CHECK-NEXT: clrldi 3, 3, 32 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB2_4: +; CHECK-NEXT: li 3, 0 ; CHECK-NEXT: blr %call = tail call signext i32 @memcmp(i8* %x, i8* %y, i64 7) %not.lnot = icmp ne i32 %call, 0 @@ -136,14 +136,16 @@ define signext i32 @equalityFoldOneConstant(i8* %X) { ; CHECK-NEXT: sldi 4, 4, 32 ; CHECK-NEXT: ori 4, 4, 2 ; CHECK-NEXT: cmpld 3, 4 -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: beq 0, .LBB6_3 +; CHECK-NEXT: beq 0, .LBB6_4 ; CHECK-NEXT: .LBB6_2: # %res_block ; CHECK-NEXT: li 3, 1 ; CHECK-NEXT: .LBB6_3: # %endblock ; CHECK-NEXT: cntlzw 3, 3 ; CHECK-NEXT: srwi 3, 3, 5 ; CHECK-NEXT: blr +; CHECK-NEXT: .LBB6_4: +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: b .LBB6_3 %call = tail call signext i32 @memcmp(i8* bitcast ([15 x i32]* @zeroEqualityTest04.buffer1 to i8*), i8* %X, i64 16) %not.tobool = icmp eq i32 %call, 0 %cond = zext i1 %not.tobool to i32 diff --git a/llvm/test/CodeGen/PowerPC/redundant-copy-after-tail-dup.ll b/llvm/test/CodeGen/PowerPC/redundant-copy-after-tail-dup.ll index f6506b3c87f869..c65acff5f0e106 100644 --- a/llvm/test/CodeGen/PowerPC/redundant-copy-after-tail-dup.ll +++ b/llvm/test/CodeGen/PowerPC/redundant-copy-after-tail-dup.ll @@ -17,24 +17,28 @@ define dso_local i1 @t(%class.A* %this, i32 %color, i32 %vertex) local_unnamed_addr { ; CHECK-P9-LABEL: t: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: li r5, 1 -; CHECK-P9-NEXT: bc 12, 4*cr5+lt, .LBB0_4 +; CHECK-P9-NEXT: bc 12, 4*cr5+lt, .LBB0_3 ; CHECK-P9-NEXT: # %bb.1: # %land.lhs.true -; CHECK-P9-NEXT: bc 12, 4*cr5+lt, .LBB0_5 -; CHECK-P9-NEXT: .LBB0_2: # %for.inc +; CHECK-P9-NEXT: li r4, 1 +; CHECK-P9-NEXT: bc 4, 4*cr5+lt, .LBB0_4 +; CHECK-P9-NEXT: # %bb.2: # %cleanup16 +; CHECK-P9-NEXT: mr r3, r4 +; CHECK-P9-NEXT: blr +; CHECK-P9-NEXT: .LBB0_3: # %lor.lhs.false +; CHECK-P9-NEXT: cmplwi r4, 0 +; CHECK-P9-NEXT: beq cr0, .LBB0_6 +; CHECK-P9-NEXT: .LBB0_4: # %for.inc ; CHECK-P9-NEXT: lhz r3, 5308(r3) ; CHECK-P9-NEXT: cmplwi r3, 2 -; CHECK-P9-NEXT: bge- cr0, .LBB0_6 -; CHECK-P9-NEXT: # %bb.3: # %land.lhs.true.1 +; CHECK-P9-NEXT: bge- cr0, .LBB0_7 +; CHECK-P9-NEXT: # %bb.5: # %land.lhs.true.1 ; CHECK-P9-NEXT: li r3, 0 ; CHECK-P9-NEXT: blr -; CHECK-P9-NEXT: .LBB0_4: # %lor.lhs.false -; CHECK-P9-NEXT: cmplwi r4, 0 -; CHECK-P9-NEXT: bne cr0, .LBB0_2 -; CHECK-P9-NEXT: .LBB0_5: # %cleanup16 -; CHECK-P9-NEXT: mr r3, r5 +; CHECK-P9-NEXT: .LBB0_6: +; CHECK-P9-NEXT: li r4, 1 +; CHECK-P9-NEXT: mr r3, r4 ; CHECK-P9-NEXT: blr -; CHECK-P9-NEXT: .LBB0_6: # %lor.lhs.false.1 +; CHECK-P9-NEXT: .LBB0_7: # %lor.lhs.false.1 entry: br i1 undef, label %land.lhs.true, label %lor.lhs.false diff --git a/llvm/test/CodeGen/RISCV/branch.ll b/llvm/test/CodeGen/RISCV/branch.ll index e834499280328d..562b0fe5cf07c5 100644 --- a/llvm/test/CodeGen/RISCV/branch.ll +++ b/llvm/test/CodeGen/RISCV/branch.ll @@ -6,41 +6,42 @@ define void @foo(i32 %a, i32 *%b, i1 %c) nounwind { ; RV32I-LABEL: foo: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: beq a3, a0, .LBB0_12 +; RV32I-NEXT: beq a3, a0, .LBB0_2 ; RV32I-NEXT: # %bb.1: # %test2 ; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: bne a3, a0, .LBB0_12 -; RV32I-NEXT: # %bb.2: # %test3 +; RV32I-NEXT: beq a3, a0, .LBB0_3 +; RV32I-NEXT: .LBB0_2: # %end +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB0_3: # %test3 ; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: blt a3, a0, .LBB0_12 -; RV32I-NEXT: # %bb.3: # %test4 +; RV32I-NEXT: blt a3, a0, .LBB0_2 +; RV32I-NEXT: # %bb.4: # %test4 ; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: bge a3, a0, .LBB0_12 -; RV32I-NEXT: # %bb.4: # %test5 +; RV32I-NEXT: bge a3, a0, .LBB0_2 +; RV32I-NEXT: # %bb.5: # %test5 ; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: bltu a3, a0, .LBB0_12 -; RV32I-NEXT: # %bb.5: # %test6 +; RV32I-NEXT: bltu a3, a0, .LBB0_2 +; RV32I-NEXT: # %bb.6: # %test6 ; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: bgeu a3, a0, .LBB0_12 -; RV32I-NEXT: # %bb.6: # %test7 +; RV32I-NEXT: bgeu a3, a0, .LBB0_2 +; RV32I-NEXT: # %bb.7: # %test7 ; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: blt a0, a3, .LBB0_12 -; RV32I-NEXT: # %bb.7: # %test8 +; RV32I-NEXT: blt a0, a3, .LBB0_2 +; RV32I-NEXT: # %bb.8: # %test8 ; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: bge a0, a3, .LBB0_12 -; RV32I-NEXT: # %bb.8: # %test9 +; RV32I-NEXT: bge a0, a3, .LBB0_2 +; RV32I-NEXT: # %bb.9: # %test9 ; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: bltu a0, a3, .LBB0_12 -; RV32I-NEXT: # %bb.9: # %test10 +; RV32I-NEXT: bltu a0, a3, .LBB0_2 +; RV32I-NEXT: # %bb.10: # %test10 ; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: bgeu a0, a3, .LBB0_12 -; RV32I-NEXT: # %bb.10: # %test11 +; RV32I-NEXT: bgeu a0, a3, .LBB0_2 +; RV32I-NEXT: # %bb.11: # %test11 ; RV32I-NEXT: lw a0, 0(a1) ; RV32I-NEXT: andi a0, a2, 1 -; RV32I-NEXT: bnez a0, .LBB0_12 -; RV32I-NEXT: # %bb.11: # %test12 +; RV32I-NEXT: bnez a0, .LBB0_2 +; RV32I-NEXT: # %bb.12: # %test12 ; RV32I-NEXT: lw a0, 0(a1) -; RV32I-NEXT: .LBB0_12: # %end ; RV32I-NEXT: ret %val1 = load volatile i32, i32* %b %tst1 = icmp eq i32 %val1, %a diff --git a/llvm/test/CodeGen/RISCV/rv64m-w-insts-legalization.ll b/llvm/test/CodeGen/RISCV/rv64m-w-insts-legalization.ll index 8af41d2608449d..5635c9ee687244 100644 --- a/llvm/test/CodeGen/RISCV/rv64m-w-insts-legalization.ll +++ b/llvm/test/CodeGen/RISCV/rv64m-w-insts-legalization.ll @@ -5,15 +5,13 @@ define signext i32 @mulw(i32 signext %s, i32 signext %n, i32 signext %k) nounwin ; CHECK-LABEL: mulw: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi a2, zero, 1 -; CHECK-NEXT: bge a0, a1, .LBB0_3 -; CHECK-NEXT: # %bb.1: # %for.body.preheader -; CHECK-NEXT: addi a2, zero, 1 -; CHECK-NEXT: .LBB0_2: # %for.body +; CHECK-NEXT: bge a0, a1, .LBB0_2 +; CHECK-NEXT: .LBB0_1: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: mulw a2, a0, a2 ; CHECK-NEXT: addiw a0, a0, 1 -; CHECK-NEXT: blt a0, a1, .LBB0_2 -; CHECK-NEXT: .LBB0_3: # %for.cond.cleanup +; CHECK-NEXT: blt a0, a1, .LBB0_1 +; CHECK-NEXT: .LBB0_2: # %for.cond.cleanup ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-37.ll b/llvm/test/CodeGen/SystemZ/int-cmp-37.ll index 28cabf7f5bb713..6126000a897843 100644 --- a/llvm/test/CodeGen/SystemZ/int-cmp-37.ll +++ b/llvm/test/CodeGen/SystemZ/int-cmp-37.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; Test 32-bit comparisons in which the second operand is zero-extended ; from a PC-relative i16. ; @@ -9,9 +10,16 @@ ; Check unsigned comparison. define i32 @f1(i32 %src1) { ; CHECK-LABEL: f1: -; CHECK: clhrl %r2, g -; CHECK-NEXT: jl -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clhrl %r2, g +; CHECK-NEXT: jhe .LBB0_2 +; CHECK-NEXT: # %bb.1: # %exit +; CHECK-NEXT: ahi %r2, 1 +; CHECK-NEXT: br %r14 +; CHECK-NEXT: .LBB0_2: # %mulb +; CHECK-NEXT: msr %r2, %r2 +; CHECK-NEXT: ahi %r2, 1 +; CHECK-NEXT: br %r14 entry: %val = load i16, i16 *@g %src2 = zext i16 %val to i32 @@ -29,8 +37,16 @@ exit: ; Check signed comparison. define i32 @f2(i32 %src1) { ; CHECK-LABEL: f2: -; CHECK-NOT: clhrl -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: llhrl %r0, g +; CHECK-NEXT: crjhe %r2, %r0, .LBB1_2 +; CHECK-NEXT: # %bb.1: # %exit +; CHECK-NEXT: ahi %r2, 1 +; CHECK-NEXT: br %r14 +; CHECK-NEXT: .LBB1_2: # %mulb +; CHECK-NEXT: msr %r2, %r2 +; CHECK-NEXT: ahi %r2, 1 +; CHECK-NEXT: br %r14 entry: %val = load i16, i16 *@g %src2 = zext i16 %val to i32 @@ -48,9 +64,14 @@ exit: ; Check equality. define i32 @f3(i32 %src1) { ; CHECK-LABEL: f3: -; CHECK: clhrl %r2, g -; CHECK-NEXT: je -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clhrl %r2, g +; CHECK-NEXT: je .LBB2_2 +; CHECK-NEXT: # %bb.1: # %mulb +; CHECK-NEXT: msr %r2, %r2 +; CHECK-NEXT: .LBB2_2: # %exit +; CHECK-NEXT: ahi %r2, 1 +; CHECK-NEXT: br %r14 entry: %val = load i16, i16 *@g %src2 = zext i16 %val to i32 @@ -68,9 +89,16 @@ exit: ; Check inequality. define i32 @f4(i32 %src1) { ; CHECK-LABEL: f4: -; CHECK: clhrl %r2, g -; CHECK-NEXT: jlh -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clhrl %r2, g +; CHECK-NEXT: je .LBB3_2 +; CHECK-NEXT: # %bb.1: # %exit +; CHECK-NEXT: ahi %r2, 1 +; CHECK-NEXT: br %r14 +; CHECK-NEXT: .LBB3_2: # %mulb +; CHECK-NEXT: msr %r2, %r2 +; CHECK-NEXT: ahi %r2, 1 +; CHECK-NEXT: br %r14 entry: %val = load i16, i16 *@g %src2 = zext i16 %val to i32 @@ -88,10 +116,17 @@ exit: ; Repeat f1 with an unaligned address. define i32 @f5(i32 %src1) { ; CHECK-LABEL: f5: -; CHECK: lgrl [[REG:%r[0-5]]], h@GOT -; CHECK: llh [[VAL:%r[0-5]]], 0([[REG]]) -; CHECK: clrjl %r2, [[VAL]], -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lgrl %r1, h@GOT +; CHECK-NEXT: llh %r0, 0(%r1) +; CHECK-NEXT: clrjhe %r2, %r0, .LBB4_2 +; CHECK-NEXT: # %bb.1: # %exit +; CHECK-NEXT: ahi %r2, 1 +; CHECK-NEXT: br %r14 +; CHECK-NEXT: .LBB4_2: # %mulb +; CHECK-NEXT: msr %r2, %r2 +; CHECK-NEXT: ahi %r2, 1 +; CHECK-NEXT: br %r14 entry: %val = load i16, i16 *@h, align 1 %src2 = zext i16 %val to i32 @@ -109,9 +144,16 @@ exit: ; Check the comparison can be reversed if that allows CLHRL to be used. define i32 @f6(i32 %src2) { ; CHECK-LABEL: f6: -; CHECK: clhrl %r2, g -; CHECK-NEXT: jh {{\.L.*}} -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clhrl %r2, g +; CHECK-NEXT: jle .LBB5_2 +; CHECK-NEXT: # %bb.1: # %exit +; CHECK-NEXT: ahi %r2, 1 +; CHECK-NEXT: br %r14 +; CHECK-NEXT: .LBB5_2: # %mulb +; CHECK-NEXT: msr %r2, %r2 +; CHECK-NEXT: ahi %r2, 1 +; CHECK-NEXT: br %r14 entry: %val = load i16, i16 *@g %src1 = zext i16 %val to i32 diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-40.ll b/llvm/test/CodeGen/SystemZ/int-cmp-40.ll index 09bf903be7b234..f14f48e1d3d03f 100644 --- a/llvm/test/CodeGen/SystemZ/int-cmp-40.ll +++ b/llvm/test/CodeGen/SystemZ/int-cmp-40.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; Test 64-bit comparisons in which the second operand is zero-extended ; from a PC-relative i16. ; @@ -9,9 +10,16 @@ ; Check unsigned comparison. define i64 @f1(i64 %src1) { ; CHECK-LABEL: f1: -; CHECK: clghrl %r2, g -; CHECK-NEXT: jl -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clghrl %r2, g +; CHECK-NEXT: jhe .LBB0_2 +; CHECK-NEXT: # %bb.1: # %exit +; CHECK-NEXT: la %r2, 1(%r2) +; CHECK-NEXT: br %r14 +; CHECK-NEXT: .LBB0_2: # %mulb +; CHECK-NEXT: msgr %r2, %r2 +; CHECK-NEXT: la %r2, 1(%r2) +; CHECK-NEXT: br %r14 entry: %val = load i16, i16 *@g %src2 = zext i16 %val to i64 @@ -29,8 +37,16 @@ exit: ; Check signed comparison. define i64 @f2(i64 %src1) { ; CHECK-LABEL: f2: -; CHECK-NOT: clghrl -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: llghrl %r0, g +; CHECK-NEXT: cgrjhe %r2, %r0, .LBB1_2 +; CHECK-NEXT: # %bb.1: # %exit +; CHECK-NEXT: la %r2, 1(%r2) +; CHECK-NEXT: br %r14 +; CHECK-NEXT: .LBB1_2: # %mulb +; CHECK-NEXT: msgr %r2, %r2 +; CHECK-NEXT: la %r2, 1(%r2) +; CHECK-NEXT: br %r14 entry: %val = load i16, i16 *@g %src2 = zext i16 %val to i64 @@ -48,9 +64,14 @@ exit: ; Check equality. define i64 @f3(i64 %src1) { ; CHECK-LABEL: f3: -; CHECK: clghrl %r2, g -; CHECK-NEXT: je -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clghrl %r2, g +; CHECK-NEXT: je .LBB2_2 +; CHECK-NEXT: # %bb.1: # %mulb +; CHECK-NEXT: msgr %r2, %r2 +; CHECK-NEXT: .LBB2_2: # %exit +; CHECK-NEXT: la %r2, 1(%r2) +; CHECK-NEXT: br %r14 entry: %val = load i16, i16 *@g %src2 = zext i16 %val to i64 @@ -68,9 +89,16 @@ exit: ; Check inequality. define i64 @f4(i64 %src1) { ; CHECK-LABEL: f4: -; CHECK: clghrl %r2, g -; CHECK-NEXT: jlh -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clghrl %r2, g +; CHECK-NEXT: je .LBB3_2 +; CHECK-NEXT: # %bb.1: # %exit +; CHECK-NEXT: la %r2, 1(%r2) +; CHECK-NEXT: br %r14 +; CHECK-NEXT: .LBB3_2: # %mulb +; CHECK-NEXT: msgr %r2, %r2 +; CHECK-NEXT: la %r2, 1(%r2) +; CHECK-NEXT: br %r14 entry: %val = load i16, i16 *@g %src2 = zext i16 %val to i64 @@ -88,10 +116,17 @@ exit: ; Repeat f1 with an unaligned address. define i64 @f5(i64 %src1) { ; CHECK-LABEL: f5: -; CHECK: lgrl [[REG:%r[0-5]]], h@GOT -; CHECK: llgh [[VAL:%r[0-5]]], 0([[REG]]) -; CHECK: clgrjl %r2, [[VAL]], -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lgrl %r1, h@GOT +; CHECK-NEXT: llgh %r0, 0(%r1) +; CHECK-NEXT: clgrjhe %r2, %r0, .LBB4_2 +; CHECK-NEXT: # %bb.1: # %exit +; CHECK-NEXT: la %r2, 1(%r2) +; CHECK-NEXT: br %r14 +; CHECK-NEXT: .LBB4_2: # %mulb +; CHECK-NEXT: msgr %r2, %r2 +; CHECK-NEXT: la %r2, 1(%r2) +; CHECK-NEXT: br %r14 entry: %val = load i16, i16 *@h, align 1 %src2 = zext i16 %val to i64 @@ -109,9 +144,16 @@ exit: ; Check the comparison can be reversed if that allows CLGHRL to be used. define i64 @f6(i64 %src2) { ; CHECK-LABEL: f6: -; CHECK: clghrl %r2, g -; CHECK-NEXT: jh {{\.L.*}} -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clghrl %r2, g +; CHECK-NEXT: jle .LBB5_2 +; CHECK-NEXT: # %bb.1: # %exit +; CHECK-NEXT: la %r2, 1(%r2) +; CHECK-NEXT: br %r14 +; CHECK-NEXT: .LBB5_2: # %mulb +; CHECK-NEXT: msgr %r2, %r2 +; CHECK-NEXT: la %r2, 1(%r2) +; CHECK-NEXT: br %r14 entry: %val = load i16, i16 *@g %src1 = zext i16 %val to i64 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll index 261222f60f17af..9ad5cdf60ce343 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll @@ -903,8 +903,7 @@ define arm_aapcs_vfpcc void @float_int_int_mul(i32* nocapture readonly %a, i32* ; CHECK-NEXT: le lr, .LBB4_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block ; CHECK-NEXT: cmp r12, r3 -; CHECK-NEXT: it eq -; CHECK-NEXT: popeq {r4, r5, r6, pc} +; CHECK-NEXT: beq .LBB4_8 ; CHECK-NEXT: .LBB4_6: @ %for.body.preheader11 ; CHECK-NEXT: sub.w lr, r3, r12 ; CHECK-NEXT: add.w r0, r0, r12, lsl #2 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll index 32a1c17dbbff3b..0bd7ac870974d6 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll @@ -17,7 +17,7 @@ define dso_local void @varying_outer_2d_reduction(i16* nocapture readonly %Input, i16* nocapture %Output, i16 signext %Size, i16 signext %N, i16 signext %Scale) local_unnamed_addr { ; ENABLED-LABEL: varying_outer_2d_reduction: ; ENABLED: @ %bb.0: @ %entry -; ENABLED-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr} +; ENABLED-NEXT: push.w {r4, r5, r6, r7, r9, r10, r11, lr} ; ENABLED-NEXT: sub sp, #4 ; ENABLED-NEXT: cmp r3, #1 ; ENABLED-NEXT: str r0, [sp] @ 4-byte Spill @@ -54,7 +54,7 @@ define dso_local void @varying_outer_2d_reduction(i16* nocapture readonly %Input ; ENABLED-NEXT: subs r0, #4 ; ENABLED-NEXT: subs r4, r2, r6 ; ENABLED-NEXT: vmov.i32 q0, #0x0 -; ENABLED-NEXT: add.w r8, r7, r0, lsr #2 +; ENABLED-NEXT: add.w r11, r7, r0, lsr #2 ; ENABLED-NEXT: mov r7, r10 ; ENABLED-NEXT: dlstp.32 lr, r4 ; ENABLED-NEXT: ldr r0, [sp] @ 4-byte Reload @@ -63,9 +63,9 @@ define dso_local void @varying_outer_2d_reduction(i16* nocapture readonly %Input ; ENABLED-NEXT: @ => This Inner Loop Header: Depth=2 ; ENABLED-NEXT: vldrh.s32 q1, [r0], #8 ; ENABLED-NEXT: vldrh.s32 q2, [r7], #8 -; ENABLED-NEXT: mov lr, r8 +; ENABLED-NEXT: mov lr, r11 ; ENABLED-NEXT: vmul.i32 q1, q2, q1 -; ENABLED-NEXT: sub.w r8, r8, #1 +; ENABLED-NEXT: sub.w r11, r11, #1 ; ENABLED-NEXT: vshl.s32 q1, r5 ; ENABLED-NEXT: vadd.i32 q0, q1, q0 ; ENABLED-NEXT: letp lr, .LBB0_6 @@ -75,11 +75,11 @@ define dso_local void @varying_outer_2d_reduction(i16* nocapture readonly %Input ; ENABLED-NEXT: b .LBB0_3 ; ENABLED-NEXT: .LBB0_8: @ %for.end17 ; ENABLED-NEXT: add sp, #4 -; ENABLED-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc} +; ENABLED-NEXT: pop.w {r4, r5, r6, r7, r9, r10, r11, pc} ; ; NOREDUCTIONS-LABEL: varying_outer_2d_reduction: ; NOREDUCTIONS: @ %bb.0: @ %entry -; NOREDUCTIONS-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr} +; NOREDUCTIONS-NEXT: push.w {r4, r5, r6, r7, r9, r10, r11, lr} ; NOREDUCTIONS-NEXT: sub sp, #4 ; NOREDUCTIONS-NEXT: cmp r3, #1 ; NOREDUCTIONS-NEXT: str r0, [sp] @ 4-byte Spill @@ -116,7 +116,7 @@ define dso_local void @varying_outer_2d_reduction(i16* nocapture readonly %Input ; NOREDUCTIONS-NEXT: subs r0, #4 ; NOREDUCTIONS-NEXT: subs r4, r2, r6 ; NOREDUCTIONS-NEXT: vmov.i32 q0, #0x0 -; NOREDUCTIONS-NEXT: add.w r8, r7, r0, lsr #2 +; NOREDUCTIONS-NEXT: add.w r11, r7, r0, lsr #2 ; NOREDUCTIONS-NEXT: mov r7, r10 ; NOREDUCTIONS-NEXT: dlstp.32 lr, r4 ; NOREDUCTIONS-NEXT: ldr r0, [sp] @ 4-byte Reload @@ -125,9 +125,9 @@ define dso_local void @varying_outer_2d_reduction(i16* nocapture readonly %Input ; NOREDUCTIONS-NEXT: @ => This Inner Loop Header: Depth=2 ; NOREDUCTIONS-NEXT: vldrh.s32 q1, [r0], #8 ; NOREDUCTIONS-NEXT: vldrh.s32 q2, [r7], #8 -; NOREDUCTIONS-NEXT: mov lr, r8 +; NOREDUCTIONS-NEXT: mov lr, r11 ; NOREDUCTIONS-NEXT: vmul.i32 q1, q2, q1 -; NOREDUCTIONS-NEXT: sub.w r8, r8, #1 +; NOREDUCTIONS-NEXT: sub.w r11, r11, #1 ; NOREDUCTIONS-NEXT: vshl.s32 q1, r5 ; NOREDUCTIONS-NEXT: vadd.i32 q0, q1, q0 ; NOREDUCTIONS-NEXT: letp lr, .LBB0_6 @@ -137,8 +137,7 @@ define dso_local void @varying_outer_2d_reduction(i16* nocapture readonly %Input ; NOREDUCTIONS-NEXT: b .LBB0_3 ; NOREDUCTIONS-NEXT: .LBB0_8: @ %for.end17 ; NOREDUCTIONS-NEXT: add sp, #4 -; NOREDUCTIONS-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc} -; +; NOREDUCTIONS-NEXT: pop.w {r4, r5, r6, r7, r9, r10, r11, pc} entry: %conv = sext i16 %N to i32 %cmp36 = icmp sgt i16 %N, 0 diff --git a/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll b/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll index 2db5bf59ecfaeb..a3b88cfc23130d 100644 --- a/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll +++ b/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll @@ -302,8 +302,7 @@ define void @fma8(float* noalias nocapture readonly %A, float* noalias nocapture ; CHECK-NEXT: le lr, .LBB2_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block ; CHECK-NEXT: cmp r12, r3 -; CHECK-NEXT: it eq -; CHECK-NEXT: popeq {r4, r5, r6, pc} +; CHECK-NEXT: beq .LBB2_8 ; CHECK-NEXT: .LBB2_6: @ %for.body.preheader12 ; CHECK-NEXT: sub.w lr, r3, r12 ; CHECK-NEXT: add.w r0, r0, r12, lsl #2 diff --git a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll index 646124e0cf9836..c99bcf7dccbb2b 100644 --- a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll +++ b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll @@ -36,8 +36,7 @@ define void @fma(float* noalias nocapture readonly %A, float* noalias nocapture ; CHECK-NEXT: le lr, .LBB0_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block ; CHECK-NEXT: cmp r12, r3 -; CHECK-NEXT: it eq -; CHECK-NEXT: popeq {r4, r5, r6, pc} +; CHECK-NEXT: beq .LBB0_8 ; CHECK-NEXT: .LBB0_6: @ %for.body.preheader12 ; CHECK-NEXT: sub.w lr, r3, r12 ; CHECK-NEXT: add.w r0, r0, r12, lsl #2 @@ -220,7 +219,7 @@ define i8* @test(i8* nocapture readonly %input_row, i8* nocapture readonly %inpu ; CHECK-NEXT: ldr r3, [sp, #64] ; CHECK-NEXT: mov.w r11, #0 ; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: ldr.w r9, [sp, #56] +; CHECK-NEXT: ldr r4, [sp, #56] ; CHECK-NEXT: add.w r0, r1, r3, lsl #1 ; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill ; CHECK-NEXT: adds r0, r1, r3 @@ -229,56 +228,56 @@ define i8* @test(i8* nocapture readonly %input_row, i8* nocapture readonly %inpu ; CHECK-NEXT: add r0, r1 ; CHECK-NEXT: str r0, [sp] @ 4-byte Spill ; CHECK-NEXT: adds r0, r3, #7 -; CHECK-NEXT: lsrs r0, r0, #3 +; CHECK-NEXT: lsr.w r9, r0, #3 ; CHECK-NEXT: b .LBB2_5 ; CHECK-NEXT: .LBB2_3: @ in Loop: Header=BB2_5 Depth=1 -; CHECK-NEXT: mov r8, r12 -; CHECK-NEXT: mov r10, r12 -; CHECK-NEXT: mov r6, r12 +; CHECK-NEXT: mov r8, r10 +; CHECK-NEXT: mov r12, r10 +; CHECK-NEXT: mov r6, r10 ; CHECK-NEXT: .LBB2_4: @ %for.cond.cleanup23 ; CHECK-NEXT: @ in Loop: Header=BB2_5 Depth=1 -; CHECK-NEXT: ldr r3, [sp, #72] -; CHECK-NEXT: add.w r1, r10, r8 -; CHECK-NEXT: add r1, r6 -; CHECK-NEXT: add r1, r12 -; CHECK-NEXT: strb.w r1, [r3, r11] +; CHECK-NEXT: ldr r1, [sp, #72] +; CHECK-NEXT: add.w r0, r12, r8 +; CHECK-NEXT: add r0, r6 +; CHECK-NEXT: add r0, r10 +; CHECK-NEXT: strb.w r0, [r1, r11] ; CHECK-NEXT: add.w r11, r11, #1 ; CHECK-NEXT: cmp r11, r2 ; CHECK-NEXT: beq .LBB2_8 ; CHECK-NEXT: .LBB2_5: @ %for.body ; CHECK-NEXT: @ =>This Loop Header: Depth=1 ; CHECK-NEXT: @ Child Loop BB2_7 Depth 2 -; CHECK-NEXT: ldr r1, [sp, #68] -; CHECK-NEXT: subs.w lr, r0, r0 -; CHECK-NEXT: ldr.w r12, [r1, r11, lsl #2] +; CHECK-NEXT: ldr r0, [sp, #68] +; CHECK-NEXT: subs.w lr, r9, r9 +; CHECK-NEXT: ldr.w r10, [r0, r11, lsl #2] ; CHECK-NEXT: ble .LBB2_3 ; CHECK-NEXT: @ %bb.6: @ %for.body24.preheader ; CHECK-NEXT: @ in Loop: Header=BB2_5 Depth=1 -; CHECK-NEXT: ldr r3, [sp, #64] -; CHECK-NEXT: mov r6, r12 -; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #64] +; CHECK-NEXT: mov r6, r10 +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: dls lr, lr -; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: mov r10, r12 -; CHECK-NEXT: mla r7, r11, r3, r1 +; CHECK-NEXT: ldr r3, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: mov r12, r10 +; CHECK-NEXT: mla r7, r11, r1, r0 ; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: ldrd r4, r3, [sp] @ 8-byte Folded Reload -; CHECK-NEXT: mov r8, r12 +; CHECK-NEXT: ldrd r5, r0, [sp] @ 8-byte Folded Reload +; CHECK-NEXT: mov r8, r10 ; CHECK-NEXT: .LBB2_7: @ %for.body24 ; CHECK-NEXT: @ Parent Loop BB2_5 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-NEXT: vldrb.s16 q0, [r4], #8 -; CHECK-NEXT: vadd.i16 q1, q0, r9 +; CHECK-NEXT: vldrb.s16 q0, [r5], #8 +; CHECK-NEXT: vadd.i16 q1, q0, r4 ; CHECK-NEXT: vldrb.s16 q0, [r7], #8 -; CHECK-NEXT: vmlava.s16 r12, q0, q1 -; CHECK-NEXT: vldrb.s16 q1, [r5], #8 -; CHECK-NEXT: vadd.i16 q1, q1, r9 -; CHECK-NEXT: vmlava.s16 r6, q0, q1 -; CHECK-NEXT: vldrb.s16 q1, [r3], #8 -; CHECK-NEXT: vadd.i16 q1, q1, r9 ; CHECK-NEXT: vmlava.s16 r10, q0, q1 +; CHECK-NEXT: vldrb.s16 q1, [r3], #8 +; CHECK-NEXT: vadd.i16 q1, q1, r4 +; CHECK-NEXT: vmlava.s16 r6, q0, q1 +; CHECK-NEXT: vldrb.s16 q1, [r0], #8 +; CHECK-NEXT: vadd.i16 q1, q1, r4 +; CHECK-NEXT: vmlava.s16 r12, q0, q1 ; CHECK-NEXT: vldrb.s16 q1, [r1], #8 -; CHECK-NEXT: vadd.i16 q1, q1, r9 +; CHECK-NEXT: vadd.i16 q1, q1, r4 ; CHECK-NEXT: vmlava.s16 r8, q0, q1 ; CHECK-NEXT: le lr, .LBB2_7 ; CHECK-NEXT: b .LBB2_4 @@ -403,7 +402,7 @@ define i8* @test_optsize(i8* nocapture readonly %input_row, i8* nocapture readon ; CHECK-NEXT: ldr r3, [sp, #64] ; CHECK-NEXT: mov.w r11, #0 ; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: ldr.w r9, [sp, #56] +; CHECK-NEXT: ldr r4, [sp, #56] ; CHECK-NEXT: add.w r0, r1, r3, lsl #1 ; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill ; CHECK-NEXT: adds r0, r1, r3 @@ -412,55 +411,55 @@ define i8* @test_optsize(i8* nocapture readonly %input_row, i8* nocapture readon ; CHECK-NEXT: add r0, r1 ; CHECK-NEXT: str r0, [sp] @ 4-byte Spill ; CHECK-NEXT: adds r0, r3, #7 -; CHECK-NEXT: lsrs r0, r0, #3 +; CHECK-NEXT: lsr.w r9, r0, #3 ; CHECK-NEXT: .LBB3_3: @ %for.body ; CHECK-NEXT: @ =>This Loop Header: Depth=1 ; CHECK-NEXT: @ Child Loop BB3_5 Depth 2 -; CHECK-NEXT: ldr r1, [sp, #68] -; CHECK-NEXT: subs.w lr, r0, r0 -; CHECK-NEXT: ldr.w r12, [r1, r11, lsl #2] +; CHECK-NEXT: ldr r0, [sp, #68] +; CHECK-NEXT: subs.w lr, r9, r9 +; CHECK-NEXT: ldr.w r10, [r0, r11, lsl #2] ; CHECK-NEXT: ble .LBB3_6 ; CHECK-NEXT: @ %bb.4: @ %for.body24.preheader ; CHECK-NEXT: @ in Loop: Header=BB3_3 Depth=1 -; CHECK-NEXT: ldr r3, [sp, #64] -; CHECK-NEXT: mov r6, r12 -; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #64] +; CHECK-NEXT: mov r6, r10 +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: dls lr, lr -; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: mov r10, r12 -; CHECK-NEXT: mla r7, r11, r3, r1 +; CHECK-NEXT: ldr r3, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: mov r12, r10 +; CHECK-NEXT: mla r7, r11, r1, r0 ; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: ldrd r4, r3, [sp] @ 8-byte Folded Reload -; CHECK-NEXT: mov r8, r12 +; CHECK-NEXT: ldrd r5, r0, [sp] @ 8-byte Folded Reload +; CHECK-NEXT: mov r8, r10 ; CHECK-NEXT: .LBB3_5: @ %for.body24 ; CHECK-NEXT: @ Parent Loop BB3_3 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-NEXT: vldrb.s16 q0, [r4], #8 -; CHECK-NEXT: vadd.i16 q1, q0, r9 +; CHECK-NEXT: vldrb.s16 q0, [r5], #8 +; CHECK-NEXT: vadd.i16 q1, q0, r4 ; CHECK-NEXT: vldrb.s16 q0, [r7], #8 -; CHECK-NEXT: vmlava.s16 r12, q0, q1 -; CHECK-NEXT: vldrb.s16 q1, [r5], #8 -; CHECK-NEXT: vadd.i16 q1, q1, r9 -; CHECK-NEXT: vmlava.s16 r6, q0, q1 -; CHECK-NEXT: vldrb.s16 q1, [r3], #8 -; CHECK-NEXT: vadd.i16 q1, q1, r9 ; CHECK-NEXT: vmlava.s16 r10, q0, q1 +; CHECK-NEXT: vldrb.s16 q1, [r3], #8 +; CHECK-NEXT: vadd.i16 q1, q1, r4 +; CHECK-NEXT: vmlava.s16 r6, q0, q1 +; CHECK-NEXT: vldrb.s16 q1, [r0], #8 +; CHECK-NEXT: vadd.i16 q1, q1, r4 +; CHECK-NEXT: vmlava.s16 r12, q0, q1 ; CHECK-NEXT: vldrb.s16 q1, [r1], #8 -; CHECK-NEXT: vadd.i16 q1, q1, r9 +; CHECK-NEXT: vadd.i16 q1, q1, r4 ; CHECK-NEXT: vmlava.s16 r8, q0, q1 ; CHECK-NEXT: le lr, .LBB3_5 ; CHECK-NEXT: b .LBB3_7 ; CHECK-NEXT: .LBB3_6: @ in Loop: Header=BB3_3 Depth=1 -; CHECK-NEXT: mov r8, r12 -; CHECK-NEXT: mov r10, r12 -; CHECK-NEXT: mov r6, r12 +; CHECK-NEXT: mov r8, r10 +; CHECK-NEXT: mov r12, r10 +; CHECK-NEXT: mov r6, r10 ; CHECK-NEXT: .LBB3_7: @ %for.cond.cleanup23 ; CHECK-NEXT: @ in Loop: Header=BB3_3 Depth=1 -; CHECK-NEXT: ldr r3, [sp, #72] -; CHECK-NEXT: add.w r1, r10, r8 -; CHECK-NEXT: add r1, r6 -; CHECK-NEXT: add r1, r12 -; CHECK-NEXT: strb.w r1, [r3, r11] +; CHECK-NEXT: ldr r1, [sp, #72] +; CHECK-NEXT: add.w r0, r12, r8 +; CHECK-NEXT: add r0, r6 +; CHECK-NEXT: add r0, r10 +; CHECK-NEXT: strb.w r0, [r1, r11] ; CHECK-NEXT: add.w r11, r11, #1 ; CHECK-NEXT: cmp r11, r2 ; CHECK-NEXT: bne .LBB3_3 @@ -737,8 +736,8 @@ define i8* @signext(i8* %input_row, i8* %input_col, i16 zeroext %output_ch, i16 ; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: mov r8, r12 ; CHECK-NEXT: mla r5, r11, r3, r0 -; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: ldrd r4, r7, [sp] @ 8-byte Folded Reload +; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: ldrd r4, r0, [sp] @ 8-byte Folded Reload ; CHECK-NEXT: mov r10, r12 ; CHECK-NEXT: .LBB5_7: @ %for.body24 ; CHECK-NEXT: @ Parent Loop BB5_5 Depth=1 @@ -747,10 +746,10 @@ define i8* @signext(i8* %input_row, i8* %input_col, i16 zeroext %output_ch, i16 ; CHECK-NEXT: vadd.i16 q2, q1, q0 ; CHECK-NEXT: vldrb.s16 q1, [r5], #8 ; CHECK-NEXT: vmlava.s16 r12, q1, q2 -; CHECK-NEXT: vldrb.s16 q2, [r0], #8 +; CHECK-NEXT: vldrb.s16 q2, [r7], #8 ; CHECK-NEXT: vadd.i16 q2, q2, q0 ; CHECK-NEXT: vmlava.s16 r6, q1, q2 -; CHECK-NEXT: vldrb.s16 q2, [r7], #8 +; CHECK-NEXT: vldrb.s16 q2, [r0], #8 ; CHECK-NEXT: vadd.i16 q2, q2, q0 ; CHECK-NEXT: vmlava.s16 r8, q1, q2 ; CHECK-NEXT: vldrb.s16 q2, [r1], #8 @@ -908,8 +907,8 @@ define i8* @signext_optsize(i8* %input_row, i8* %input_col, i16 zeroext %output_ ; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: mov r8, r12 ; CHECK-NEXT: mla r5, r11, r3, r0 -; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: ldrd r4, r7, [sp] @ 8-byte Folded Reload +; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: ldrd r4, r0, [sp] @ 8-byte Folded Reload ; CHECK-NEXT: mov r10, r12 ; CHECK-NEXT: .LBB6_5: @ %for.body24 ; CHECK-NEXT: @ Parent Loop BB6_3 Depth=1 @@ -918,10 +917,10 @@ define i8* @signext_optsize(i8* %input_row, i8* %input_col, i16 zeroext %output_ ; CHECK-NEXT: vadd.i16 q2, q1, q0 ; CHECK-NEXT: vldrb.s16 q1, [r5], #8 ; CHECK-NEXT: vmlava.s16 r12, q1, q2 -; CHECK-NEXT: vldrb.s16 q2, [r0], #8 +; CHECK-NEXT: vldrb.s16 q2, [r7], #8 ; CHECK-NEXT: vadd.i16 q2, q2, q0 ; CHECK-NEXT: vmlava.s16 r6, q1, q2 -; CHECK-NEXT: vldrb.s16 q2, [r7], #8 +; CHECK-NEXT: vldrb.s16 q2, [r0], #8 ; CHECK-NEXT: vadd.i16 q2, q2, q0 ; CHECK-NEXT: vmlava.s16 r8, q1, q2 ; CHECK-NEXT: vldrb.s16 q2, [r1], #8 diff --git a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll index 9897b607d6b3a5..3ec0e464427688 100644 --- a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll @@ -20,50 +20,50 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: @ %bb.2: ; CHECK-NEXT: movs r7, #0 ; CHECK-NEXT: mov r12, r0 -; CHECK-NEXT: mov r9, r1 +; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: mov r11, r2 ; CHECK-NEXT: b .LBB0_6 ; CHECK-NEXT: .LBB0_3: @ %vector.ph ; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: bic r3, r3, #1 ; CHECK-NEXT: subs r7, r3, #2 -; CHECK-NEXT: adr r4, .LCPI0_0 ; CHECK-NEXT: movs r6, #1 +; CHECK-NEXT: adr r4, .LCPI0_0 ; CHECK-NEXT: add.w r11, r2, r3, lsl #2 ; CHECK-NEXT: add.w lr, r6, r7, lsr #1 -; CHECK-NEXT: add.w r9, r1, r3, lsl #2 +; CHECK-NEXT: add.w r6, r1, r3, lsl #2 ; CHECK-NEXT: add.w r12, r0, r3, lsl #2 ; CHECK-NEXT: vldrw.u32 q0, [r4] +; CHECK-NEXT: mvn r10, #-2147483648 ; CHECK-NEXT: vmvn.i32 q1, #0x80000000 -; CHECK-NEXT: mov.w r10, #-1 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: str r3, [sp] @ 4-byte Spill ; CHECK-NEXT: .LBB0_4: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrd r4, r5, [r0] +; CHECK-NEXT: ldrd r4, r8, [r0] ; CHECK-NEXT: adds r0, #8 -; CHECK-NEXT: ldrd r7, r6, [r1] +; CHECK-NEXT: ldrd r7, r5, [r1] ; CHECK-NEXT: adds r1, #8 -; CHECK-NEXT: smull r8, r5, r6, r5 +; CHECK-NEXT: smull r8, r5, r5, r8 ; CHECK-NEXT: smull r4, r7, r7, r4 ; CHECK-NEXT: asrl r8, r5, #31 ; CHECK-NEXT: asrl r4, r7, #31 -; CHECK-NEXT: rsbs.w r3, r4, #-2147483648 +; CHECK-NEXT: rsbs.w r9, r4, #-2147483648 ; CHECK-NEXT: vmov.32 q4[0], r4 -; CHECK-NEXT: sbcs.w r3, r10, r7 -; CHECK-NEXT: vmov.32 q4[1], r7 +; CHECK-NEXT: mov.w r9, #-1 +; CHECK-NEXT: sbcs.w r3, r9, r7 ; CHECK-NEXT: mov.w r3, #0 -; CHECK-NEXT: vmov.32 q4[2], r8 +; CHECK-NEXT: vmov.32 q4[1], r7 ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r3, #1 ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: csetm r3, ne -; CHECK-NEXT: vmov.32 q4[3], r5 +; CHECK-NEXT: vmov.32 q4[2], r8 ; CHECK-NEXT: vmov.32 q2[0], r3 +; CHECK-NEXT: vmov.32 q4[3], r5 ; CHECK-NEXT: vmov.32 q2[1], r3 ; CHECK-NEXT: rsbs.w r3, r8, #-2147483648 -; CHECK-NEXT: sbcs.w r3, r10, r5 -; CHECK-NEXT: mvn r5, #-2147483648 +; CHECK-NEXT: sbcs.w r3, r9, r5 ; CHECK-NEXT: mov.w r3, #0 ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r3, #1 @@ -76,7 +76,7 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: vorr q2, q2, q3 ; CHECK-NEXT: vmov r4, s8 ; CHECK-NEXT: vmov r3, s9 -; CHECK-NEXT: subs r4, r4, r5 +; CHECK-NEXT: subs.w r4, r4, r10 ; CHECK-NEXT: sbcs r3, r3, #0 ; CHECK-NEXT: vmov r4, s10 ; CHECK-NEXT: mov.w r3, #0 @@ -87,7 +87,7 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: vmov.32 q3[0], r3 ; CHECK-NEXT: vmov.32 q3[1], r3 ; CHECK-NEXT: vmov r3, s11 -; CHECK-NEXT: subs r4, r4, r5 +; CHECK-NEXT: subs.w r4, r4, r10 ; CHECK-NEXT: sbcs r3, r3, #0 ; CHECK-NEXT: mov.w r3, #0 ; CHECK-NEXT: it lt @@ -116,7 +116,7 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: .LBB0_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r3, [r12], #4 -; CHECK-NEXT: ldr r4, [r9], #4 +; CHECK-NEXT: ldr r4, [r6], #4 ; CHECK-NEXT: smull r4, r3, r4, r3 ; CHECK-NEXT: asrl r4, r3, #31 ; CHECK-NEXT: subs r5, r1, r4 @@ -908,36 +908,41 @@ for.body: ; preds = %for.body.preheader, define arm_aapcs_vfpcc void @usatmul_4_q31(i32* nocapture readonly %pSrcA, i32* nocapture readonly %pSrcB, i32* noalias nocapture %pDst, i32 %N) { ; CHECK-LABEL: usatmul_4_q31: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: beq.w .LBB4_8 ; CHECK-NEXT: @ %bb.1: @ %for.body.preheader -; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: mov.w r11, #0 ; CHECK-NEXT: cmp r3, #3 ; CHECK-NEXT: bhi .LBB4_3 ; CHECK-NEXT: @ %bb.2: ; CHECK-NEXT: mov r12, r0 -; CHECK-NEXT: mov r9, r1 -; CHECK-NEXT: mov r10, r2 +; CHECK-NEXT: mov r8, r1 +; CHECK-NEXT: mov r9, r2 ; CHECK-NEXT: b .LBB4_6 ; CHECK-NEXT: .LBB4_3: @ %vector.ph -; CHECK-NEXT: bic r8, r3, #3 +; CHECK-NEXT: bic r11, r3, #3 ; CHECK-NEXT: movs r6, #1 -; CHECK-NEXT: sub.w r7, r8, #4 -; CHECK-NEXT: add.w r10, r2, r8, lsl #2 -; CHECK-NEXT: add.w r9, r1, r8, lsl #2 -; CHECK-NEXT: add.w lr, r6, r7, lsr #2 -; CHECK-NEXT: add.w r12, r0, r8, lsl #2 -; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: sub.w r7, r11, #4 +; CHECK-NEXT: add.w r9, r2, r11, lsl #2 +; CHECK-NEXT: add.w r8, r1, r11, lsl #2 +; CHECK-NEXT: add.w r7, r6, r7, lsr #2 +; CHECK-NEXT: add.w r12, r0, r11, lsl #2 +; CHECK-NEXT: mov r10, r7 ; CHECK-NEXT: .LBB4_4: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vldrw.u32 q2, [r1], #16 +; CHECK-NEXT: mov lr, r10 ; CHECK-NEXT: vmov.f32 s4, s2 +; CHECK-NEXT: sub.w lr, lr, #1 ; CHECK-NEXT: vmov.f32 s12, s10 +; CHECK-NEXT: mov r10, lr ; CHECK-NEXT: vmov.f32 s6, s3 ; CHECK-NEXT: vmov.f32 s14, s11 ; CHECK-NEXT: vmullb.u32 q4, q3, q1 @@ -1002,28 +1007,31 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: vmov.f32 s2, s4 ; CHECK-NEXT: vmov.f32 s3, s6 ; CHECK-NEXT: vstrb.8 q0, [r2], #16 -; CHECK-NEXT: le lr, .LBB4_4 -; CHECK-NEXT: @ %bb.5: @ %middle.block -; CHECK-NEXT: cmp r8, r3 +; CHECK-NEXT: cmp.w lr, #0 +; CHECK-NEXT: bne.w .LBB4_4 +; CHECK-NEXT: b .LBB4_5 +; CHECK-NEXT: .LBB4_5: @ %middle.block +; CHECK-NEXT: cmp r11, r3 ; CHECK-NEXT: beq .LBB4_8 ; CHECK-NEXT: .LBB4_6: @ %for.body.preheader21 -; CHECK-NEXT: sub.w lr, r3, r8 +; CHECK-NEXT: sub.w lr, r3, r11 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB4_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r0, [r12], #4 -; CHECK-NEXT: ldr r1, [r9], #4 +; CHECK-NEXT: ldr r1, [r8], #4 ; CHECK-NEXT: umull r0, r1, r1, r0 ; CHECK-NEXT: lsrl r0, r1, #31 ; CHECK-NEXT: subs.w r2, r0, #-1 ; CHECK-NEXT: sbcs r1, r1, #0 ; CHECK-NEXT: it hs ; CHECK-NEXT: movhs.w r0, #-1 -; CHECK-NEXT: str r0, [r10], #4 +; CHECK-NEXT: str r0, [r9], #4 ; CHECK-NEXT: le lr, .LBB4_7 ; CHECK-NEXT: .LBB4_8: @ %for.cond.cleanup ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: %cmp8 = icmp eq i32 %N, 0 br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader @@ -1136,8 +1144,7 @@ define arm_aapcs_vfpcc void @ssatmul_4_q15(i16* nocapture readonly %pSrcA, i16* ; CHECK-NEXT: le lr, .LBB5_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block ; CHECK-NEXT: cmp r5, r3 -; CHECK-NEXT: it eq -; CHECK-NEXT: popeq {r4, r5, r6, pc} +; CHECK-NEXT: beq .LBB5_8 ; CHECK-NEXT: .LBB5_6: @ %for.body.preheader21 ; CHECK-NEXT: movw r0, #32768 ; CHECK-NEXT: sub.w lr, r3, r5 @@ -1277,8 +1284,7 @@ define arm_aapcs_vfpcc void @ssatmul_8_q15(i16* nocapture readonly %pSrcA, i16* ; CHECK-NEXT: le lr, .LBB6_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block ; CHECK-NEXT: cmp r5, r3 -; CHECK-NEXT: it eq -; CHECK-NEXT: popeq {r4, r5, r6, pc} +; CHECK-NEXT: beq .LBB6_8 ; CHECK-NEXT: .LBB6_6: @ %for.body.preheader21 ; CHECK-NEXT: movw r0, #32768 ; CHECK-NEXT: sub.w lr, r3, r5 @@ -1415,8 +1421,7 @@ define arm_aapcs_vfpcc void @ssatmul_8i_q15(i16* nocapture readonly %pSrcA, i16* ; CHECK-NEXT: le lr, .LBB7_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block ; CHECK-NEXT: cmp r5, r3 -; CHECK-NEXT: it eq -; CHECK-NEXT: popeq {r4, r5, r6, pc} +; CHECK-NEXT: beq .LBB7_8 ; CHECK-NEXT: .LBB7_6: @ %for.body.preheader21 ; CHECK-NEXT: movw r0, #32768 ; CHECK-NEXT: sub.w lr, r3, r5 @@ -1959,8 +1964,7 @@ define arm_aapcs_vfpcc void @usatmul_4_q15(i16* nocapture readonly %pSrcA, i16* ; CHECK-NEXT: le lr, .LBB11_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block ; CHECK-NEXT: cmp r5, r3 -; CHECK-NEXT: it eq -; CHECK-NEXT: popeq {r4, r5, r6, pc} +; CHECK-NEXT: beq .LBB11_8 ; CHECK-NEXT: .LBB11_6: @ %for.body.preheader21 ; CHECK-NEXT: sub.w lr, r3, r5 ; CHECK-NEXT: movw r0, #65535 @@ -2093,8 +2097,7 @@ define arm_aapcs_vfpcc void @usatmul_8_q15(i16* nocapture readonly %pSrcA, i16* ; CHECK-NEXT: le lr, .LBB12_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block ; CHECK-NEXT: cmp r5, r3 -; CHECK-NEXT: it eq -; CHECK-NEXT: popeq {r4, r5, r6, pc} +; CHECK-NEXT: beq .LBB12_8 ; CHECK-NEXT: .LBB12_6: @ %for.body.preheader21 ; CHECK-NEXT: sub.w lr, r3, r5 ; CHECK-NEXT: movw r0, #65535 @@ -2229,8 +2232,7 @@ define arm_aapcs_vfpcc void @ssatmul_4_q7(i8* nocapture readonly %pSrcA, i8* noc ; CHECK-NEXT: le lr, .LBB13_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block ; CHECK-NEXT: cmp r5, r3 -; CHECK-NEXT: it eq -; CHECK-NEXT: popeq {r4, r5, r6, pc} +; CHECK-NEXT: beq .LBB13_8 ; CHECK-NEXT: .LBB13_6: @ %for.body.preheader21 ; CHECK-NEXT: sub.w lr, r3, r5 ; CHECK-NEXT: dls lr, lr @@ -2364,8 +2366,7 @@ define arm_aapcs_vfpcc void @ssatmul_8_q7(i8* nocapture readonly %pSrcA, i8* noc ; CHECK-NEXT: le lr, .LBB14_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block ; CHECK-NEXT: cmp r5, r3 -; CHECK-NEXT: it eq -; CHECK-NEXT: popeq {r4, r5, r6, pc} +; CHECK-NEXT: beq .LBB14_8 ; CHECK-NEXT: .LBB14_6: @ %for.body.preheader23 ; CHECK-NEXT: sub.w lr, r3, r5 ; CHECK-NEXT: mvn r0, #127 @@ -2504,8 +2505,7 @@ define arm_aapcs_vfpcc void @ssatmul_16_q7(i8* nocapture readonly %pSrcA, i8* no ; CHECK-NEXT: le lr, .LBB15_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block ; CHECK-NEXT: cmp r5, r3 -; CHECK-NEXT: it eq -; CHECK-NEXT: popeq {r4, r5, r6, pc} +; CHECK-NEXT: beq .LBB15_8 ; CHECK-NEXT: .LBB15_6: @ %for.body.preheader23 ; CHECK-NEXT: sub.w lr, r3, r5 ; CHECK-NEXT: mvn r0, #127 @@ -2641,8 +2641,7 @@ define arm_aapcs_vfpcc void @ssatmul_16i_q7(i8* nocapture readonly %pSrcA, i8* n ; CHECK-NEXT: le lr, .LBB16_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block ; CHECK-NEXT: cmp r5, r3 -; CHECK-NEXT: it eq -; CHECK-NEXT: popeq {r4, r5, r6, pc} +; CHECK-NEXT: beq .LBB16_8 ; CHECK-NEXT: .LBB16_6: @ %for.body.preheader23 ; CHECK-NEXT: sub.w lr, r3, r5 ; CHECK-NEXT: mvn r0, #127 @@ -3422,8 +3421,7 @@ define arm_aapcs_vfpcc void @usatmul_8_q7(i8* nocapture readonly %pSrcA, i8* noc ; CHECK-NEXT: le lr, .LBB20_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block ; CHECK-NEXT: cmp r5, r3 -; CHECK-NEXT: it eq -; CHECK-NEXT: popeq {r4, r5, r6, pc} +; CHECK-NEXT: beq .LBB20_8 ; CHECK-NEXT: .LBB20_6: @ %for.body.preheader23 ; CHECK-NEXT: sub.w lr, r3, r5 ; CHECK-NEXT: dls lr, lr @@ -3557,8 +3555,7 @@ define arm_aapcs_vfpcc void @usatmul_16_q7(i8* nocapture readonly %pSrcA, i8* no ; CHECK-NEXT: le lr, .LBB21_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block ; CHECK-NEXT: cmp r5, r3 -; CHECK-NEXT: it eq -; CHECK-NEXT: popeq {r4, r5, r6, pc} +; CHECK-NEXT: beq .LBB21_8 ; CHECK-NEXT: .LBB21_6: @ %for.body.preheader23 ; CHECK-NEXT: sub.w lr, r3, r5 ; CHECK-NEXT: dls lr, lr diff --git a/llvm/test/CodeGen/Thumb2/thumb2-branch.ll b/llvm/test/CodeGen/Thumb2/thumb2-branch.ll index 332ed50ede6f2a..e52bab2b11056e 100644 --- a/llvm/test/CodeGen/Thumb2/thumb2-branch.ll +++ b/llvm/test/CodeGen/Thumb2/thumb2-branch.ll @@ -8,7 +8,7 @@ declare void @foo() define i32 @f1(i32 %a, i32 %b, i32* %v) { entry: ; CHECK-LABEL: f1: -; CHECK: bne LBB +; CHECK: beq LBB %tmp = icmp eq i32 %a, %b ; [#uses=1] br i1 %tmp, label %cond_true, label %return @@ -59,7 +59,7 @@ return: ; preds = %entry define i32 @f4(i32 %a, i32 %b, i32* %v) { entry: ; CHECK-LABEL: f4: -; CHECK: blo LBB +; CHECK: bhs LBB %tmp = icmp uge i32 %a, %b ; [#uses=1] br i1 %tmp, label %cond_true, label %return diff --git a/llvm/test/CodeGen/X86/3addr-16bit.ll b/llvm/test/CodeGen/X86/3addr-16bit.ll index 882150a6e39ab9..e8171e2767df12 100644 --- a/llvm/test/CodeGen/X86/3addr-16bit.ll +++ b/llvm/test/CodeGen/X86/3addr-16bit.ll @@ -12,8 +12,11 @@ define zeroext i16 @test1(i16 zeroext %c, i16 zeroext %k) nounwind ssp { ; X64-NEXT: movl %esi, %eax ; X64-NEXT: incl %eax ; X64-NEXT: cmpw %di, %si -; X64-NEXT: jne LBB0_2 -; X64-NEXT: ## %bb.1: ## %bb +; X64-NEXT: je LBB0_1 +; X64-NEXT: ## %bb.2: ## %bb1 +; X64-NEXT: movzwl %ax, %eax +; X64-NEXT: retq +; X64-NEXT: LBB0_1: ## %bb ; X64-NEXT: pushq %rbx ; X64-NEXT: movzwl %ax, %ebx ; X64-NEXT: movl %ebx, %edi @@ -21,9 +24,6 @@ define zeroext i16 @test1(i16 zeroext %c, i16 zeroext %k) nounwind ssp { ; X64-NEXT: movl %ebx, %eax ; X64-NEXT: popq %rbx ; X64-NEXT: retq -; X64-NEXT: LBB0_2: ## %bb1 -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: retq ; ; X32-LABEL: test1: ; X32: ## %bb.0: ## %entry @@ -33,15 +33,15 @@ define zeroext i16 @test1(i16 zeroext %c, i16 zeroext %k) nounwind ssp { ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: incl %eax ; X32-NEXT: cmpw {{[0-9]+}}(%esp), %cx -; X32-NEXT: jne LBB0_2 -; X32-NEXT: ## %bb.1: ## %bb +; X32-NEXT: je LBB0_1 +; X32-NEXT: ## %bb.2: ## %bb1 +; X32-NEXT: movzwl %ax, %eax +; X32-NEXT: jmp LBB0_3 +; X32-NEXT: LBB0_1: ## %bb ; X32-NEXT: movzwl %ax, %esi ; X32-NEXT: movl %esi, (%esp) ; X32-NEXT: calll _foo ; X32-NEXT: movl %esi, %eax -; X32-NEXT: jmp LBB0_3 -; X32-NEXT: LBB0_2: ## %bb1 -; X32-NEXT: movzwl %ax, %eax ; X32-NEXT: LBB0_3: ## %bb1 ; X32-NEXT: addl $8, %esp ; X32-NEXT: popl %esi @@ -65,8 +65,11 @@ define zeroext i16 @test2(i16 zeroext %c, i16 zeroext %k) nounwind ssp { ; X64-NEXT: movl %esi, %eax ; X64-NEXT: decl %eax ; X64-NEXT: cmpw %di, %si -; X64-NEXT: jne LBB1_2 -; X64-NEXT: ## %bb.1: ## %bb +; X64-NEXT: je LBB1_1 +; X64-NEXT: ## %bb.2: ## %bb1 +; X64-NEXT: movzwl %ax, %eax +; X64-NEXT: retq +; X64-NEXT: LBB1_1: ## %bb ; X64-NEXT: pushq %rbx ; X64-NEXT: movzwl %ax, %ebx ; X64-NEXT: movl %ebx, %edi @@ -74,9 +77,6 @@ define zeroext i16 @test2(i16 zeroext %c, i16 zeroext %k) nounwind ssp { ; X64-NEXT: movl %ebx, %eax ; X64-NEXT: popq %rbx ; X64-NEXT: retq -; X64-NEXT: LBB1_2: ## %bb1 -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: retq ; ; X32-LABEL: test2: ; X32: ## %bb.0: ## %entry @@ -86,15 +86,15 @@ define zeroext i16 @test2(i16 zeroext %c, i16 zeroext %k) nounwind ssp { ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: decl %eax ; X32-NEXT: cmpw {{[0-9]+}}(%esp), %cx -; X32-NEXT: jne LBB1_2 -; X32-NEXT: ## %bb.1: ## %bb +; X32-NEXT: je LBB1_1 +; X32-NEXT: ## %bb.2: ## %bb1 +; X32-NEXT: movzwl %ax, %eax +; X32-NEXT: jmp LBB1_3 +; X32-NEXT: LBB1_1: ## %bb ; X32-NEXT: movzwl %ax, %esi ; X32-NEXT: movl %esi, (%esp) ; X32-NEXT: calll _foo ; X32-NEXT: movl %esi, %eax -; X32-NEXT: jmp LBB1_3 -; X32-NEXT: LBB1_2: ## %bb1 -; X32-NEXT: movzwl %ax, %eax ; X32-NEXT: LBB1_3: ## %bb1 ; X32-NEXT: addl $8, %esp ; X32-NEXT: popl %esi @@ -120,8 +120,11 @@ define zeroext i16 @test3(i16 zeroext %c, i16 zeroext %k) nounwind ssp { ; X64-NEXT: movl %esi, %eax ; X64-NEXT: addl $2, %eax ; X64-NEXT: cmpw %di, %si -; X64-NEXT: jne LBB2_2 -; X64-NEXT: ## %bb.1: ## %bb +; X64-NEXT: je LBB2_1 +; X64-NEXT: ## %bb.2: ## %bb1 +; X64-NEXT: movzwl %ax, %eax +; X64-NEXT: retq +; X64-NEXT: LBB2_1: ## %bb ; X64-NEXT: pushq %rbx ; X64-NEXT: movzwl %ax, %ebx ; X64-NEXT: movl %ebx, %edi @@ -129,9 +132,6 @@ define zeroext i16 @test3(i16 zeroext %c, i16 zeroext %k) nounwind ssp { ; X64-NEXT: movl %ebx, %eax ; X64-NEXT: popq %rbx ; X64-NEXT: retq -; X64-NEXT: LBB2_2: ## %bb1 -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: retq ; ; X32-LABEL: test3: ; X32: ## %bb.0: ## %entry @@ -141,15 +141,15 @@ define zeroext i16 @test3(i16 zeroext %c, i16 zeroext %k) nounwind ssp { ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: addl $2, %eax ; X32-NEXT: cmpw {{[0-9]+}}(%esp), %cx -; X32-NEXT: jne LBB2_2 -; X32-NEXT: ## %bb.1: ## %bb +; X32-NEXT: je LBB2_1 +; X32-NEXT: ## %bb.2: ## %bb1 +; X32-NEXT: movzwl %ax, %eax +; X32-NEXT: jmp LBB2_3 +; X32-NEXT: LBB2_1: ## %bb ; X32-NEXT: movzwl %ax, %esi ; X32-NEXT: movl %esi, (%esp) ; X32-NEXT: calll _foo ; X32-NEXT: movl %esi, %eax -; X32-NEXT: jmp LBB2_3 -; X32-NEXT: LBB2_2: ## %bb1 -; X32-NEXT: movzwl %ax, %eax ; X32-NEXT: LBB2_3: ## %bb1 ; X32-NEXT: addl $8, %esp ; X32-NEXT: popl %esi @@ -173,8 +173,11 @@ define zeroext i16 @test4(i16 zeroext %c, i16 zeroext %k) nounwind ssp { ; X64-NEXT: movl %esi, %eax ; X64-NEXT: addl %edi, %eax ; X64-NEXT: cmpw %di, %si -; X64-NEXT: jne LBB3_2 -; X64-NEXT: ## %bb.1: ## %bb +; X64-NEXT: je LBB3_1 +; X64-NEXT: ## %bb.2: ## %bb1 +; X64-NEXT: movzwl %ax, %eax +; X64-NEXT: retq +; X64-NEXT: LBB3_1: ## %bb ; X64-NEXT: pushq %rbx ; X64-NEXT: movzwl %ax, %ebx ; X64-NEXT: movl %ebx, %edi @@ -182,9 +185,6 @@ define zeroext i16 @test4(i16 zeroext %c, i16 zeroext %k) nounwind ssp { ; X64-NEXT: movl %ebx, %eax ; X64-NEXT: popq %rbx ; X64-NEXT: retq -; X64-NEXT: LBB3_2: ## %bb1 -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: retq ; ; X32-LABEL: test4: ; X32: ## %bb.0: ## %entry @@ -195,15 +195,15 @@ define zeroext i16 @test4(i16 zeroext %c, i16 zeroext %k) nounwind ssp { ; X32-NEXT: movl %edx, %eax ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: cmpw %cx, %dx -; X32-NEXT: jne LBB3_2 -; X32-NEXT: ## %bb.1: ## %bb +; X32-NEXT: je LBB3_1 +; X32-NEXT: ## %bb.2: ## %bb1 +; X32-NEXT: movzwl %ax, %eax +; X32-NEXT: jmp LBB3_3 +; X32-NEXT: LBB3_1: ## %bb ; X32-NEXT: movzwl %ax, %esi ; X32-NEXT: movl %esi, (%esp) ; X32-NEXT: calll _foo ; X32-NEXT: movl %esi, %eax -; X32-NEXT: jmp LBB3_3 -; X32-NEXT: LBB3_2: ## %bb1 -; X32-NEXT: movzwl %ax, %eax ; X32-NEXT: LBB3_3: ## %bb1 ; X32-NEXT: addl $8, %esp ; X32-NEXT: popl %esi diff --git a/llvm/test/CodeGen/X86/absolute-cmp.ll b/llvm/test/CodeGen/X86/absolute-cmp.ll index b4f158aa7c91aa..b5fd426039f22e 100644 --- a/llvm/test/CodeGen/X86/absolute-cmp.ll +++ b/llvm/test/CodeGen/X86/absolute-cmp.ll @@ -17,12 +17,16 @@ define void @foo8(i64 %val) { ; NOPIC-NEXT: .cfi_def_cfa_offset 16 ; NOPIC-NEXT: cmpq $cmp8@ABS8, %rdi # encoding: [0x48,0x83,0xff,A] ; NOPIC-NEXT: # fixup A - offset: 3, value: cmp8@ABS8, kind: FK_Data_1 -; NOPIC-NEXT: ja .LBB0_2 # encoding: [0x77,A] -; NOPIC-NEXT: # fixup A - offset: 1, value: .LBB0_2-1, kind: FK_PCRel_1 -; NOPIC-NEXT: # %bb.1: # %t +; NOPIC-NEXT: jbe .LBB0_1 # encoding: [0x76,A] +; NOPIC-NEXT: # fixup A - offset: 1, value: .LBB0_1-1, kind: FK_PCRel_1 +; NOPIC-NEXT: # %bb.2: # %f +; NOPIC-NEXT: popq %rax # encoding: [0x58] +; NOPIC-NEXT: .cfi_def_cfa_offset 8 +; NOPIC-NEXT: retq # encoding: [0xc3] +; NOPIC-NEXT: .LBB0_1: # %t +; NOPIC-NEXT: .cfi_def_cfa_offset 16 ; NOPIC-NEXT: callq f # encoding: [0xe8,A,A,A,A] ; NOPIC-NEXT: # fixup A - offset: 1, value: f-4, kind: reloc_branch_4byte_pcrel -; NOPIC-NEXT: .LBB0_2: # %f ; NOPIC-NEXT: popq %rax # encoding: [0x58] ; NOPIC-NEXT: .cfi_def_cfa_offset 8 ; NOPIC-NEXT: retq # encoding: [0xc3] @@ -33,12 +37,16 @@ define void @foo8(i64 %val) { ; PIC-NEXT: .cfi_def_cfa_offset 16 ; PIC-NEXT: cmpq $cmp8@ABS8, %rdi # encoding: [0x48,0x83,0xff,A] ; PIC-NEXT: # fixup A - offset: 3, value: cmp8@ABS8, kind: FK_Data_1 -; PIC-NEXT: ja .LBB0_2 # encoding: [0x77,A] -; PIC-NEXT: # fixup A - offset: 1, value: .LBB0_2-1, kind: FK_PCRel_1 -; PIC-NEXT: # %bb.1: # %t +; PIC-NEXT: jbe .LBB0_1 # encoding: [0x76,A] +; PIC-NEXT: # fixup A - offset: 1, value: .LBB0_1-1, kind: FK_PCRel_1 +; PIC-NEXT: # %bb.2: # %f +; PIC-NEXT: popq %rax # encoding: [0x58] +; PIC-NEXT: .cfi_def_cfa_offset 8 +; PIC-NEXT: retq # encoding: [0xc3] +; PIC-NEXT: .LBB0_1: # %t +; PIC-NEXT: .cfi_def_cfa_offset 16 ; PIC-NEXT: callq f@PLT # encoding: [0xe8,A,A,A,A] ; PIC-NEXT: # fixup A - offset: 1, value: f@PLT-4, kind: FK_PCRel_4 -; PIC-NEXT: .LBB0_2: # %f ; PIC-NEXT: popq %rax # encoding: [0x58] ; PIC-NEXT: .cfi_def_cfa_offset 8 ; PIC-NEXT: retq # encoding: [0xc3] @@ -60,12 +68,16 @@ define void @foo32(i64 %val) { ; NOPIC-NEXT: .cfi_def_cfa_offset 16 ; NOPIC-NEXT: cmpq $cmp32, %rdi # encoding: [0x48,0x81,0xff,A,A,A,A] ; NOPIC-NEXT: # fixup A - offset: 3, value: cmp32, kind: reloc_signed_4byte -; NOPIC-NEXT: ja .LBB1_2 # encoding: [0x77,A] -; NOPIC-NEXT: # fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1 -; NOPIC-NEXT: # %bb.1: # %t +; NOPIC-NEXT: jbe .LBB1_1 # encoding: [0x76,A] +; NOPIC-NEXT: # fixup A - offset: 1, value: .LBB1_1-1, kind: FK_PCRel_1 +; NOPIC-NEXT: # %bb.2: # %f +; NOPIC-NEXT: popq %rax # encoding: [0x58] +; NOPIC-NEXT: .cfi_def_cfa_offset 8 +; NOPIC-NEXT: retq # encoding: [0xc3] +; NOPIC-NEXT: .LBB1_1: # %t +; NOPIC-NEXT: .cfi_def_cfa_offset 16 ; NOPIC-NEXT: callq f # encoding: [0xe8,A,A,A,A] ; NOPIC-NEXT: # fixup A - offset: 1, value: f-4, kind: reloc_branch_4byte_pcrel -; NOPIC-NEXT: .LBB1_2: # %f ; NOPIC-NEXT: popq %rax # encoding: [0x58] ; NOPIC-NEXT: .cfi_def_cfa_offset 8 ; NOPIC-NEXT: retq # encoding: [0xc3] @@ -76,12 +88,16 @@ define void @foo32(i64 %val) { ; PIC-NEXT: .cfi_def_cfa_offset 16 ; PIC-NEXT: cmpq $cmp32, %rdi # encoding: [0x48,0x81,0xff,A,A,A,A] ; PIC-NEXT: # fixup A - offset: 3, value: cmp32, kind: reloc_signed_4byte -; PIC-NEXT: ja .LBB1_2 # encoding: [0x77,A] -; PIC-NEXT: # fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1 -; PIC-NEXT: # %bb.1: # %t +; PIC-NEXT: jbe .LBB1_1 # encoding: [0x76,A] +; PIC-NEXT: # fixup A - offset: 1, value: .LBB1_1-1, kind: FK_PCRel_1 +; PIC-NEXT: # %bb.2: # %f +; PIC-NEXT: popq %rax # encoding: [0x58] +; PIC-NEXT: .cfi_def_cfa_offset 8 +; PIC-NEXT: retq # encoding: [0xc3] +; PIC-NEXT: .LBB1_1: # %t +; PIC-NEXT: .cfi_def_cfa_offset 16 ; PIC-NEXT: callq f@PLT # encoding: [0xe8,A,A,A,A] ; PIC-NEXT: # fixup A - offset: 1, value: f@PLT-4, kind: FK_PCRel_4 -; PIC-NEXT: .LBB1_2: # %f ; PIC-NEXT: popq %rax # encoding: [0x58] ; PIC-NEXT: .cfi_def_cfa_offset 8 ; PIC-NEXT: retq # encoding: [0xc3] diff --git a/llvm/test/CodeGen/X86/atomic-flags.ll b/llvm/test/CodeGen/X86/atomic-flags.ll index e0c4a915965c8b..6565c107f7a974 100644 --- a/llvm/test/CodeGen/X86/atomic-flags.ll +++ b/llvm/test/CodeGen/X86/atomic-flags.ll @@ -1,20 +1,55 @@ -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,X86-64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,X86-32 ; Make sure that flags are properly preserved despite atomic optimizations. define i32 @atomic_and_flags_1(i8* %p, i32 %a, i32 %b) { -; CHECK-LABEL: atomic_and_flags_1: - ; Generate flags value, and use it. - ; CHECK: cmpl - ; CHECK-NEXT: jne +; X86-64-LABEL: atomic_and_flags_1: +; X86-64: # %bb.0: +; X86-64-NEXT: cmpl %edx, %esi +; X86-64-NEXT: je .LBB0_1 +; X86-64-NEXT: # %bb.3: # %L2 +; X86-64-NEXT: movl $2, %eax +; X86-64-NEXT: retq +; X86-64-NEXT: .LBB0_1: # %L1 +; X86-64-NEXT: incb (%rdi) +; X86-64-NEXT: cmpl %edx, %esi +; X86-64-NEXT: je .LBB0_4 +; X86-64-NEXT: # %bb.2: # %L4 +; X86-64-NEXT: movl $4, %eax +; X86-64-NEXT: retq +; X86-64-NEXT: .LBB0_4: # %L3 +; X86-64-NEXT: movl $3, %eax +; X86-64-NEXT: retq +; +; X86-32-LABEL: atomic_and_flags_1: +; X86-32: # %bb.0: +; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-32-NEXT: cmpl %eax, %ecx +; X86-32-NEXT: je .LBB0_1 +; X86-32-NEXT: # %bb.3: # %L2 +; X86-32-NEXT: movl $2, %eax +; X86-32-NEXT: retl +; X86-32-NEXT: .LBB0_1: # %L1 +; X86-32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-32-NEXT: incb (%edx) +; X86-32-NEXT: cmpl %eax, %ecx +; X86-32-NEXT: je .LBB0_4 +; X86-32-NEXT: # %bb.2: # %L4 +; X86-32-NEXT: movl $4, %eax +; X86-32-NEXT: retl +; X86-32-NEXT: .LBB0_4: # %L3 +; X86-32-NEXT: movl $3, %eax +; X86-32-NEXT: retl %cmp = icmp eq i32 %a, %b br i1 %cmp, label %L1, label %L2 L1: ; The following pattern will get folded. - ; CHECK: incb + %1 = load atomic i8, i8* %p seq_cst, align 1 %2 = add i8 %1, 1 ; This forces the INC instruction to be generated. store atomic i8 %2, i8* %p release, align 1 @@ -23,8 +58,7 @@ L1: ; somehow. This test checks that cmpl gets emitted again, but any ; rematerialization would work (the optimizer used to clobber the flags with ; the add). - ; CHECK-NEXT: cmpl - ; CHECK-NEXT: jne + br i1 %cmp, label %L3, label %L4 L2: @@ -39,18 +73,51 @@ L4: ; Same as above, but using 2 as immediate to avoid the INC instruction. define i32 @atomic_and_flags_2(i8* %p, i32 %a, i32 %b) { -; CHECK-LABEL: atomic_and_flags_2: - ; CHECK: cmpl - ; CHECK-NEXT: jne +; X86-64-LABEL: atomic_and_flags_2: +; X86-64: # %bb.0: +; X86-64-NEXT: cmpl %edx, %esi +; X86-64-NEXT: je .LBB1_1 +; X86-64-NEXT: # %bb.3: # %L2 +; X86-64-NEXT: movl $2, %eax +; X86-64-NEXT: retq +; X86-64-NEXT: .LBB1_1: # %L1 +; X86-64-NEXT: addb $2, (%rdi) +; X86-64-NEXT: cmpl %edx, %esi +; X86-64-NEXT: je .LBB1_4 +; X86-64-NEXT: # %bb.2: # %L4 +; X86-64-NEXT: movl $4, %eax +; X86-64-NEXT: retq +; X86-64-NEXT: .LBB1_4: # %L3 +; X86-64-NEXT: movl $3, %eax +; X86-64-NEXT: retq +; +; X86-32-LABEL: atomic_and_flags_2: +; X86-32: # %bb.0: +; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-32-NEXT: cmpl %eax, %ecx +; X86-32-NEXT: je .LBB1_1 +; X86-32-NEXT: # %bb.3: # %L2 +; X86-32-NEXT: movl $2, %eax +; X86-32-NEXT: retl +; X86-32-NEXT: .LBB1_1: # %L1 +; X86-32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-32-NEXT: addb $2, (%edx) +; X86-32-NEXT: cmpl %eax, %ecx +; X86-32-NEXT: je .LBB1_4 +; X86-32-NEXT: # %bb.2: # %L4 +; X86-32-NEXT: movl $4, %eax +; X86-32-NEXT: retl +; X86-32-NEXT: .LBB1_4: # %L3 +; X86-32-NEXT: movl $3, %eax +; X86-32-NEXT: retl %cmp = icmp eq i32 %a, %b br i1 %cmp, label %L1, label %L2 L1: - ; CHECK: addb %1 = load atomic i8, i8* %p seq_cst, align 1 %2 = add i8 %1, 2 store atomic i8 %2, i8* %p release, align 1 - ; CHECK-NEXT: cmpl - ; CHECK-NEXT: jne + br i1 %cmp, label %L3, label %L4 L2: ret i32 2 diff --git a/llvm/test/CodeGen/X86/atomic-unordered.ll b/llvm/test/CodeGen/X86/atomic-unordered.ll index 7a1f34c65c183d..3e5abc97b89a35 100644 --- a/llvm/test/CodeGen/X86/atomic-unordered.ll +++ b/llvm/test/CodeGen/X86/atomic-unordered.ll @@ -2716,24 +2716,24 @@ define i1 @fold_cmp_over_fence(i32* %p, i32 %v1) { ; CHECK-O3-CUR-NEXT: movl (%rdi), %eax ; CHECK-O3-CUR-NEXT: mfence ; CHECK-O3-CUR-NEXT: cmpl %eax, %esi -; CHECK-O3-CUR-NEXT: jne .LBB116_2 -; CHECK-O3-CUR-NEXT: # %bb.1: # %taken -; CHECK-O3-CUR-NEXT: movb $1, %al -; CHECK-O3-CUR-NEXT: retq -; CHECK-O3-CUR-NEXT: .LBB116_2: # %untaken +; CHECK-O3-CUR-NEXT: je .LBB116_1 +; CHECK-O3-CUR-NEXT: # %bb.2: # %untaken ; CHECK-O3-CUR-NEXT: xorl %eax, %eax ; CHECK-O3-CUR-NEXT: retq +; CHECK-O3-CUR-NEXT: .LBB116_1: # %taken +; CHECK-O3-CUR-NEXT: movb $1, %al +; CHECK-O3-CUR-NEXT: retq ; ; CHECK-O3-EX-LABEL: fold_cmp_over_fence: ; CHECK-O3-EX: # %bb.0: ; CHECK-O3-EX-NEXT: cmpl (%rdi), %esi ; CHECK-O3-EX-NEXT: mfence -; CHECK-O3-EX-NEXT: jne .LBB116_2 -; CHECK-O3-EX-NEXT: # %bb.1: # %taken -; CHECK-O3-EX-NEXT: movb $1, %al -; CHECK-O3-EX-NEXT: retq -; CHECK-O3-EX-NEXT: .LBB116_2: # %untaken +; CHECK-O3-EX-NEXT: je .LBB116_1 +; CHECK-O3-EX-NEXT: # %bb.2: # %untaken ; CHECK-O3-EX-NEXT: xorl %eax, %eax +; CHECK-O3-EX-NEXT: retq +; CHECK-O3-EX-NEXT: .LBB116_1: # %taken +; CHECK-O3-EX-NEXT: movb $1, %al ; CHECK-O3-EX-NEXT: retq %v2 = load atomic i32, i32* %p unordered, align 4 fence seq_cst diff --git a/llvm/test/CodeGen/X86/bmi.ll b/llvm/test/CodeGen/X86/bmi.ll index 9c3a74fccbe633..f8c7300186df20 100644 --- a/llvm/test/CodeGen/X86/bmi.ll +++ b/llvm/test/CodeGen/X86/bmi.ll @@ -1249,20 +1249,20 @@ define void @pr42118_i32(i32 %x) { ; X86-LABEL: pr42118_i32: ; X86: # %bb.0: ; X86-NEXT: blsrl {{[0-9]+}}(%esp), %eax -; X86-NEXT: jne .LBB50_1 -; X86-NEXT: # %bb.2: -; X86-NEXT: jmp bar # TAILCALL -; X86-NEXT: .LBB50_1: +; X86-NEXT: je .LBB50_2 +; X86-NEXT: # %bb.1: ; X86-NEXT: retl +; X86-NEXT: .LBB50_2: +; X86-NEXT: jmp bar # TAILCALL ; ; X64-LABEL: pr42118_i32: ; X64: # %bb.0: ; X64-NEXT: blsrl %edi, %eax -; X64-NEXT: jne .LBB50_1 -; X64-NEXT: # %bb.2: -; X64-NEXT: jmp bar # TAILCALL -; X64-NEXT: .LBB50_1: +; X64-NEXT: je .LBB50_2 +; X64-NEXT: # %bb.1: ; X64-NEXT: retq +; X64-NEXT: .LBB50_2: +; X64-NEXT: jmp bar # TAILCALL %tmp = sub i32 0, %x %tmp1 = and i32 %tmp, %x %cmp = icmp eq i32 %tmp1, %x @@ -1289,25 +1289,25 @@ define void @pr42118_i64(i64 %x) { ; X86-NEXT: andl %eax, %edx ; X86-NEXT: andl %ecx, %esi ; X86-NEXT: orl %edx, %esi -; X86-NEXT: jne .LBB51_1 -; X86-NEXT: # %bb.2: +; X86-NEXT: je .LBB51_2 +; X86-NEXT: # %bb.1: ; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 4 -; X86-NEXT: jmp bar # TAILCALL -; X86-NEXT: .LBB51_1: +; X86-NEXT: retl +; X86-NEXT: .LBB51_2: ; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 4 -; X86-NEXT: retl +; X86-NEXT: jmp bar # TAILCALL ; ; X64-LABEL: pr42118_i64: ; X64: # %bb.0: ; X64-NEXT: blsrq %rdi, %rax -; X64-NEXT: jne .LBB51_1 -; X64-NEXT: # %bb.2: -; X64-NEXT: jmp bar # TAILCALL -; X64-NEXT: .LBB51_1: +; X64-NEXT: je .LBB51_2 +; X64-NEXT: # %bb.1: ; X64-NEXT: retq +; X64-NEXT: .LBB51_2: +; X64-NEXT: jmp bar # TAILCALL %tmp = sub i64 0, %x %tmp1 = and i64 %tmp, %x %cmp = icmp eq i64 %tmp1, %x diff --git a/llvm/test/CodeGen/X86/bt.ll b/llvm/test/CodeGen/X86/bt.ll index 59df366c6e4a23..48fef1e8270ca7 100644 --- a/llvm/test/CodeGen/X86/bt.ll +++ b/llvm/test/CodeGen/X86/bt.ll @@ -611,21 +611,23 @@ define void @query3(i32 %x, i32 %n) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: btl %ecx, %eax -; X86-NEXT: jae .LBB16_2 -; X86-NEXT: # %bb.1: # %bb +; X86-NEXT: jb .LBB16_1 +; X86-NEXT: # %bb.2: # %UnifiedReturnBlock +; X86-NEXT: retl +; X86-NEXT: .LBB16_1: # %bb ; X86-NEXT: calll foo -; X86-NEXT: .LBB16_2: # %UnifiedReturnBlock ; X86-NEXT: retl ; ; X64-LABEL: query3: ; X64: # %bb.0: # %entry ; X64-NEXT: btl %esi, %edi -; X64-NEXT: jae .LBB16_2 -; X64-NEXT: # %bb.1: # %bb +; X64-NEXT: jb .LBB16_1 +; X64-NEXT: # %bb.2: # %UnifiedReturnBlock +; X64-NEXT: retq +; X64-NEXT: .LBB16_1: # %bb ; X64-NEXT: pushq %rax ; X64-NEXT: callq foo ; X64-NEXT: popq %rax -; X64-NEXT: .LBB16_2: # %UnifiedReturnBlock ; X64-NEXT: retq entry: %tmp29 = shl i32 1, %n @@ -647,21 +649,23 @@ define void @query3b(i32 %x, i32 %n) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: btl %ecx, %eax -; X86-NEXT: jae .LBB17_2 -; X86-NEXT: # %bb.1: # %bb +; X86-NEXT: jb .LBB17_1 +; X86-NEXT: # %bb.2: # %UnifiedReturnBlock +; X86-NEXT: retl +; X86-NEXT: .LBB17_1: # %bb ; X86-NEXT: calll foo -; X86-NEXT: .LBB17_2: # %UnifiedReturnBlock ; X86-NEXT: retl ; ; X64-LABEL: query3b: ; X64: # %bb.0: # %entry ; X64-NEXT: btl %esi, %edi -; X64-NEXT: jae .LBB17_2 -; X64-NEXT: # %bb.1: # %bb +; X64-NEXT: jb .LBB17_1 +; X64-NEXT: # %bb.2: # %UnifiedReturnBlock +; X64-NEXT: retq +; X64-NEXT: .LBB17_1: # %bb ; X64-NEXT: pushq %rax ; X64-NEXT: callq foo ; X64-NEXT: popq %rax -; X64-NEXT: .LBB17_2: # %UnifiedReturnBlock ; X64-NEXT: retq entry: %tmp29 = shl i32 1, %n @@ -683,21 +687,23 @@ define void @query3x(i32 %x, i32 %n) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: btl %ecx, %eax -; X86-NEXT: jae .LBB18_2 -; X86-NEXT: # %bb.1: # %bb +; X86-NEXT: jb .LBB18_1 +; X86-NEXT: # %bb.2: # %UnifiedReturnBlock +; X86-NEXT: retl +; X86-NEXT: .LBB18_1: # %bb ; X86-NEXT: calll foo -; X86-NEXT: .LBB18_2: # %UnifiedReturnBlock ; X86-NEXT: retl ; ; X64-LABEL: query3x: ; X64: # %bb.0: # %entry ; X64-NEXT: btl %esi, %edi -; X64-NEXT: jae .LBB18_2 -; X64-NEXT: # %bb.1: # %bb +; X64-NEXT: jb .LBB18_1 +; X64-NEXT: # %bb.2: # %UnifiedReturnBlock +; X64-NEXT: retq +; X64-NEXT: .LBB18_1: # %bb ; X64-NEXT: pushq %rax ; X64-NEXT: callq foo ; X64-NEXT: popq %rax -; X64-NEXT: .LBB18_2: # %UnifiedReturnBlock ; X64-NEXT: retq entry: %tmp29 = shl i32 1, %n @@ -719,21 +725,23 @@ define void @query3bx(i32 %x, i32 %n) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: btl %ecx, %eax -; X86-NEXT: jae .LBB19_2 -; X86-NEXT: # %bb.1: # %bb +; X86-NEXT: jb .LBB19_1 +; X86-NEXT: # %bb.2: # %UnifiedReturnBlock +; X86-NEXT: retl +; X86-NEXT: .LBB19_1: # %bb ; X86-NEXT: calll foo -; X86-NEXT: .LBB19_2: # %UnifiedReturnBlock ; X86-NEXT: retl ; ; X64-LABEL: query3bx: ; X64: # %bb.0: # %entry ; X64-NEXT: btl %esi, %edi -; X64-NEXT: jae .LBB19_2 -; X64-NEXT: # %bb.1: # %bb +; X64-NEXT: jb .LBB19_1 +; X64-NEXT: # %bb.2: # %UnifiedReturnBlock +; X64-NEXT: retq +; X64-NEXT: .LBB19_1: # %bb ; X64-NEXT: pushq %rax ; X64-NEXT: callq foo ; X64-NEXT: popq %rax -; X64-NEXT: .LBB19_2: # %UnifiedReturnBlock ; X64-NEXT: retq entry: %tmp29 = shl i32 1, %n diff --git a/llvm/test/CodeGen/X86/conditional-tailcall-pgso.ll b/llvm/test/CodeGen/X86/conditional-tailcall-pgso.ll index 65bd1dad21a822..d3cb6daecf9c9a 100644 --- a/llvm/test/CodeGen/X86/conditional-tailcall-pgso.ll +++ b/llvm/test/CodeGen/X86/conditional-tailcall-pgso.ll @@ -63,21 +63,21 @@ define void @f_non_leaf(i32 %x, i32 %y) !prof !14 { ; CHECK32-NEXT: #APP ; CHECK32-NEXT: #NO_APP ; CHECK32-NEXT: cmpl {{[0-9]+}}(%esp), %eax # encoding: [0x3b,0x44,0x24,0x0c] -; CHECK32-NEXT: jne .LBB1_2 # encoding: [0x75,A] -; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1 -; CHECK32-NEXT: # %bb.1: # %bb1 +; CHECK32-NEXT: je .LBB1_1 # encoding: [0x74,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB1_1-1, kind: FK_PCRel_1 +; CHECK32-NEXT: # %bb.2: # %bb2 ; CHECK32-NEXT: popl %ebx # encoding: [0x5b] ; CHECK32-NEXT: .cfi_def_cfa_offset 4 -; CHECK32-NEXT: jmp foo # TAILCALL +; CHECK32-NEXT: jmp bar # TAILCALL ; CHECK32-NEXT: # encoding: [0xeb,A] -; CHECK32-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1 -; CHECK32-NEXT: .LBB1_2: # %bb2 +; CHECK32-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1 +; CHECK32-NEXT: .LBB1_1: # %bb1 ; CHECK32-NEXT: .cfi_def_cfa_offset 8 ; CHECK32-NEXT: popl %ebx # encoding: [0x5b] ; CHECK32-NEXT: .cfi_def_cfa_offset 4 -; CHECK32-NEXT: jmp bar # TAILCALL +; CHECK32-NEXT: jmp foo # TAILCALL ; CHECK32-NEXT: # encoding: [0xeb,A] -; CHECK32-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1 +; CHECK32-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1 ; ; CHECK64-LABEL: f_non_leaf: ; CHECK64: # %bb.0: # %entry @@ -87,21 +87,21 @@ define void @f_non_leaf(i32 %x, i32 %y) !prof !14 { ; CHECK64-NEXT: #APP ; CHECK64-NEXT: #NO_APP ; CHECK64-NEXT: cmpl %esi, %edi # encoding: [0x39,0xf7] -; CHECK64-NEXT: jne .LBB1_2 # encoding: [0x75,A] -; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1 -; CHECK64-NEXT: # %bb.1: # %bb1 +; CHECK64-NEXT: je .LBB1_1 # encoding: [0x74,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB1_1-1, kind: FK_PCRel_1 +; CHECK64-NEXT: # %bb.2: # %bb2 ; CHECK64-NEXT: popq %rbx # encoding: [0x5b] ; CHECK64-NEXT: .cfi_def_cfa_offset 8 -; CHECK64-NEXT: jmp foo # TAILCALL +; CHECK64-NEXT: jmp bar # TAILCALL ; CHECK64-NEXT: # encoding: [0xeb,A] -; CHECK64-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1 -; CHECK64-NEXT: .LBB1_2: # %bb2 +; CHECK64-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1 +; CHECK64-NEXT: .LBB1_1: # %bb1 ; CHECK64-NEXT: .cfi_def_cfa_offset 16 ; CHECK64-NEXT: popq %rbx # encoding: [0x5b] ; CHECK64-NEXT: .cfi_def_cfa_offset 8 -; CHECK64-NEXT: jmp bar # TAILCALL +; CHECK64-NEXT: jmp foo # TAILCALL ; CHECK64-NEXT: # encoding: [0xeb,A] -; CHECK64-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1 +; CHECK64-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1 ; ; WIN64-LABEL: f_non_leaf: ; WIN64: # %bb.0: # %entry @@ -111,19 +111,19 @@ define void @f_non_leaf(i32 %x, i32 %y) !prof !14 { ; WIN64-NEXT: #APP ; WIN64-NEXT: #NO_APP ; WIN64-NEXT: cmpl %edx, %ecx # encoding: [0x39,0xd1] -; WIN64-NEXT: jne .LBB1_2 # encoding: [0x75,A] -; WIN64-NEXT: # fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1 -; WIN64-NEXT: # %bb.1: # %bb1 +; WIN64-NEXT: je .LBB1_1 # encoding: [0x74,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB1_1-1, kind: FK_PCRel_1 +; WIN64-NEXT: # %bb.2: # %bb2 ; WIN64-NEXT: popq %rbx # encoding: [0x5b] -; WIN64-NEXT: jmp foo # TAILCALL +; WIN64-NEXT: jmp bar # TAILCALL ; WIN64-NEXT: # encoding: [0xeb,A] -; WIN64-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1 -; WIN64-NEXT: .LBB1_2: # %bb2 +; WIN64-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1 +; WIN64-NEXT: .LBB1_1: # %bb1 ; WIN64-NEXT: nop # encoding: [0x90] ; WIN64-NEXT: popq %rbx # encoding: [0x5b] -; WIN64-NEXT: jmp bar # TAILCALL +; WIN64-NEXT: jmp foo # TAILCALL ; WIN64-NEXT: # encoding: [0xeb,A] -; WIN64-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1 +; WIN64-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1 ; WIN64-NEXT: .seh_handlerdata ; WIN64-NEXT: .text ; WIN64-NEXT: .seh_endproc diff --git a/llvm/test/CodeGen/X86/conditional-tailcall.ll b/llvm/test/CodeGen/X86/conditional-tailcall.ll index 17078413a82423..66c57697aa7242 100644 --- a/llvm/test/CodeGen/X86/conditional-tailcall.ll +++ b/llvm/test/CodeGen/X86/conditional-tailcall.ll @@ -63,21 +63,21 @@ define void @f_non_leaf(i32 %x, i32 %y) optsize { ; CHECK32-NEXT: #APP ; CHECK32-NEXT: #NO_APP ; CHECK32-NEXT: cmpl {{[0-9]+}}(%esp), %eax # encoding: [0x3b,0x44,0x24,0x0c] -; CHECK32-NEXT: jne .LBB1_2 # encoding: [0x75,A] -; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1 -; CHECK32-NEXT: # %bb.1: # %bb1 +; CHECK32-NEXT: je .LBB1_1 # encoding: [0x74,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB1_1-1, kind: FK_PCRel_1 +; CHECK32-NEXT: # %bb.2: # %bb2 ; CHECK32-NEXT: popl %ebx # encoding: [0x5b] ; CHECK32-NEXT: .cfi_def_cfa_offset 4 -; CHECK32-NEXT: jmp foo # TAILCALL +; CHECK32-NEXT: jmp bar # TAILCALL ; CHECK32-NEXT: # encoding: [0xeb,A] -; CHECK32-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1 -; CHECK32-NEXT: .LBB1_2: # %bb2 +; CHECK32-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1 +; CHECK32-NEXT: .LBB1_1: # %bb1 ; CHECK32-NEXT: .cfi_def_cfa_offset 8 ; CHECK32-NEXT: popl %ebx # encoding: [0x5b] ; CHECK32-NEXT: .cfi_def_cfa_offset 4 -; CHECK32-NEXT: jmp bar # TAILCALL +; CHECK32-NEXT: jmp foo # TAILCALL ; CHECK32-NEXT: # encoding: [0xeb,A] -; CHECK32-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1 +; CHECK32-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1 ; ; CHECK64-LABEL: f_non_leaf: ; CHECK64: # %bb.0: # %entry @@ -87,21 +87,21 @@ define void @f_non_leaf(i32 %x, i32 %y) optsize { ; CHECK64-NEXT: #APP ; CHECK64-NEXT: #NO_APP ; CHECK64-NEXT: cmpl %esi, %edi # encoding: [0x39,0xf7] -; CHECK64-NEXT: jne .LBB1_2 # encoding: [0x75,A] -; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1 -; CHECK64-NEXT: # %bb.1: # %bb1 +; CHECK64-NEXT: je .LBB1_1 # encoding: [0x74,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB1_1-1, kind: FK_PCRel_1 +; CHECK64-NEXT: # %bb.2: # %bb2 ; CHECK64-NEXT: popq %rbx # encoding: [0x5b] ; CHECK64-NEXT: .cfi_def_cfa_offset 8 -; CHECK64-NEXT: jmp foo # TAILCALL +; CHECK64-NEXT: jmp bar # TAILCALL ; CHECK64-NEXT: # encoding: [0xeb,A] -; CHECK64-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1 -; CHECK64-NEXT: .LBB1_2: # %bb2 +; CHECK64-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1 +; CHECK64-NEXT: .LBB1_1: # %bb1 ; CHECK64-NEXT: .cfi_def_cfa_offset 16 ; CHECK64-NEXT: popq %rbx # encoding: [0x5b] ; CHECK64-NEXT: .cfi_def_cfa_offset 8 -; CHECK64-NEXT: jmp bar # TAILCALL +; CHECK64-NEXT: jmp foo # TAILCALL ; CHECK64-NEXT: # encoding: [0xeb,A] -; CHECK64-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1 +; CHECK64-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1 ; ; WIN64-LABEL: f_non_leaf: ; WIN64: # %bb.0: # %entry @@ -111,19 +111,19 @@ define void @f_non_leaf(i32 %x, i32 %y) optsize { ; WIN64-NEXT: #APP ; WIN64-NEXT: #NO_APP ; WIN64-NEXT: cmpl %edx, %ecx # encoding: [0x39,0xd1] -; WIN64-NEXT: jne .LBB1_2 # encoding: [0x75,A] -; WIN64-NEXT: # fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1 -; WIN64-NEXT: # %bb.1: # %bb1 +; WIN64-NEXT: je .LBB1_1 # encoding: [0x74,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB1_1-1, kind: FK_PCRel_1 +; WIN64-NEXT: # %bb.2: # %bb2 ; WIN64-NEXT: popq %rbx # encoding: [0x5b] -; WIN64-NEXT: jmp foo # TAILCALL +; WIN64-NEXT: jmp bar # TAILCALL ; WIN64-NEXT: # encoding: [0xeb,A] -; WIN64-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1 -; WIN64-NEXT: .LBB1_2: # %bb2 +; WIN64-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1 +; WIN64-NEXT: .LBB1_1: # %bb1 ; WIN64-NEXT: nop # encoding: [0x90] ; WIN64-NEXT: popq %rbx # encoding: [0x5b] -; WIN64-NEXT: jmp bar # TAILCALL +; WIN64-NEXT: jmp foo # TAILCALL ; WIN64-NEXT: # encoding: [0xeb,A] -; WIN64-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1 +; WIN64-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1 ; WIN64-NEXT: .seh_handlerdata ; WIN64-NEXT: .text ; WIN64-NEXT: .seh_endproc diff --git a/llvm/test/CodeGen/X86/fast-isel-cmp-branch2.ll b/llvm/test/CodeGen/X86/fast-isel-cmp-branch2.ll index 475d8fcf7f35a7..1ba8b00646efc6 100644 --- a/llvm/test/CodeGen/X86/fast-isel-cmp-branch2.ll +++ b/llvm/test/CodeGen/X86/fast-isel-cmp-branch2.ll @@ -1,11 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s ; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=x86_64-apple-darwin10 | FileCheck %s define i32 @fcmp_oeq(float %x, float %y) { -; CHECK-LABEL: fcmp_oeq -; CHECK: ucomiss %xmm1, %xmm0 -; CHECK-NEXT: jne {{LBB.+_1}} -; CHECK-NEXT: jp {{LBB.+_1}} +; CHECK-LABEL: fcmp_oeq: +; CHECK: ## %bb.0: +; CHECK-NEXT: ucomiss %xmm1, %xmm0 +; CHECK-NEXT: jne LBB0_1 +; CHECK-NEXT: jp LBB0_1 +; CHECK-NEXT: ## %bb.2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB0_1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq %1 = fcmp oeq float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -15,9 +23,16 @@ bb1: } define i32 @fcmp_ogt(float %x, float %y) { -; CHECK-LABEL: fcmp_ogt -; CHECK: ucomiss %xmm1, %xmm0 -; CHECK-NEXT: jbe {{LBB.+_1}} +; CHECK-LABEL: fcmp_ogt: +; CHECK: ## %bb.0: +; CHECK-NEXT: ucomiss %xmm1, %xmm0 +; CHECK-NEXT: jbe LBB1_1 +; CHECK-NEXT: ## %bb.2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB1_1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq %1 = fcmp ogt float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -27,9 +42,16 @@ bb1: } define i32 @fcmp_oge(float %x, float %y) { -; CHECK-LABEL: fcmp_oge -; CHECK: ucomiss %xmm1, %xmm0 -; CHECK-NEXT: jb {{LBB.+_1}} +; CHECK-LABEL: fcmp_oge: +; CHECK: ## %bb.0: +; CHECK-NEXT: ucomiss %xmm1, %xmm0 +; CHECK-NEXT: jb LBB2_1 +; CHECK-NEXT: ## %bb.2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB2_1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq %1 = fcmp oge float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -39,9 +61,16 @@ bb1: } define i32 @fcmp_olt(float %x, float %y) { -; CHECK-LABEL: fcmp_olt -; CHECK: ucomiss %xmm0, %xmm1 -; CHECK-NEXT: jbe {{LBB.+_1}} +; CHECK-LABEL: fcmp_olt: +; CHECK: ## %bb.0: +; CHECK-NEXT: ucomiss %xmm0, %xmm1 +; CHECK-NEXT: jbe LBB3_1 +; CHECK-NEXT: ## %bb.2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB3_1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq %1 = fcmp olt float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -51,9 +80,16 @@ bb1: } define i32 @fcmp_ole(float %x, float %y) { -; CHECK-LABEL: fcmp_ole -; CHECK: ucomiss %xmm0, %xmm1 -; CHECK-NEXT: jb {{LBB.+_1}} +; CHECK-LABEL: fcmp_ole: +; CHECK: ## %bb.0: +; CHECK-NEXT: ucomiss %xmm0, %xmm1 +; CHECK-NEXT: jb LBB4_1 +; CHECK-NEXT: ## %bb.2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB4_1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq %1 = fcmp ole float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -63,9 +99,16 @@ bb1: } define i32 @fcmp_one(float %x, float %y) { -; CHECK-LABEL: fcmp_one -; CHECK: ucomiss %xmm1, %xmm0 -; CHECK-NEXT: je {{LBB.+_1}} +; CHECK-LABEL: fcmp_one: +; CHECK: ## %bb.0: +; CHECK-NEXT: ucomiss %xmm1, %xmm0 +; CHECK-NEXT: je LBB5_1 +; CHECK-NEXT: ## %bb.2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB5_1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq %1 = fcmp one float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -75,9 +118,16 @@ bb1: } define i32 @fcmp_ord(float %x, float %y) { -; CHECK-LABEL: fcmp_ord -; CHECK: ucomiss %xmm1, %xmm0 -; CHECK-NEXT: jp {{LBB.+_1}} +; CHECK-LABEL: fcmp_ord: +; CHECK: ## %bb.0: +; CHECK-NEXT: ucomiss %xmm1, %xmm0 +; CHECK-NEXT: jp LBB6_1 +; CHECK-NEXT: ## %bb.2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB6_1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq %1 = fcmp ord float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -87,9 +137,16 @@ bb1: } define i32 @fcmp_uno(float %x, float %y) { -; CHECK-LABEL: fcmp_uno -; CHECK: ucomiss %xmm1, %xmm0 -; CHECK-NEXT: jp {{LBB.+_2}} +; CHECK-LABEL: fcmp_uno: +; CHECK: ## %bb.0: +; CHECK-NEXT: ucomiss %xmm1, %xmm0 +; CHECK-NEXT: jp LBB7_2 +; CHECK-NEXT: ## %bb.1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB7_2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq %1 = fcmp uno float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -99,9 +156,16 @@ bb1: } define i32 @fcmp_ueq(float %x, float %y) { -; CHECK-LABEL: fcmp_ueq -; CHECK: ucomiss %xmm1, %xmm0 -; CHECK-NEXT: je {{LBB.+_2}} +; CHECK-LABEL: fcmp_ueq: +; CHECK: ## %bb.0: +; CHECK-NEXT: ucomiss %xmm1, %xmm0 +; CHECK-NEXT: je LBB8_2 +; CHECK-NEXT: ## %bb.1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB8_2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq %1 = fcmp ueq float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -111,9 +175,16 @@ bb1: } define i32 @fcmp_ugt(float %x, float %y) { -; CHECK-LABEL: fcmp_ugt -; CHECK: ucomiss %xmm0, %xmm1 -; CHECK-NEXT: jae {{LBB.+_1}} +; CHECK-LABEL: fcmp_ugt: +; CHECK: ## %bb.0: +; CHECK-NEXT: ucomiss %xmm0, %xmm1 +; CHECK-NEXT: jae LBB9_1 +; CHECK-NEXT: ## %bb.2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB9_1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq %1 = fcmp ugt float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -123,9 +194,16 @@ bb1: } define i32 @fcmp_uge(float %x, float %y) { -; CHECK-LABEL: fcmp_uge -; CHECK: ucomiss %xmm0, %xmm1 -; CHECK-NEXT: ja {{LBB.+_1}} +; CHECK-LABEL: fcmp_uge: +; CHECK: ## %bb.0: +; CHECK-NEXT: ucomiss %xmm0, %xmm1 +; CHECK-NEXT: ja LBB10_1 +; CHECK-NEXT: ## %bb.2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB10_1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq %1 = fcmp uge float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -135,9 +213,16 @@ bb1: } define i32 @fcmp_ult(float %x, float %y) { -; CHECK-LABEL: fcmp_ult -; CHECK: ucomiss %xmm1, %xmm0 -; CHECK-NEXT: jae {{LBB.+_1}} +; CHECK-LABEL: fcmp_ult: +; CHECK: ## %bb.0: +; CHECK-NEXT: ucomiss %xmm1, %xmm0 +; CHECK-NEXT: jae LBB11_1 +; CHECK-NEXT: ## %bb.2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB11_1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq %1 = fcmp ult float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -147,9 +232,16 @@ bb1: } define i32 @fcmp_ule(float %x, float %y) { -; CHECK-LABEL: fcmp_ule -; CHECK: ucomiss %xmm1, %xmm0 -; CHECK-NEXT: ja {{LBB.+_1}} +; CHECK-LABEL: fcmp_ule: +; CHECK: ## %bb.0: +; CHECK-NEXT: ucomiss %xmm1, %xmm0 +; CHECK-NEXT: ja LBB12_1 +; CHECK-NEXT: ## %bb.2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB12_1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq %1 = fcmp ule float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -159,10 +251,17 @@ bb1: } define i32 @fcmp_une(float %x, float %y) { -; CHECK-LABEL: fcmp_une -; CHECK: ucomiss %xmm1, %xmm0 -; CHECK-NEXT: jne {{LBB.+_2}} -; CHECK-NEXT: jnp {{LBB.+_1}} +; CHECK-LABEL: fcmp_une: +; CHECK: ## %bb.0: +; CHECK-NEXT: ucomiss %xmm1, %xmm0 +; CHECK-NEXT: jne LBB13_2 +; CHECK-NEXT: jnp LBB13_1 +; CHECK-NEXT: LBB13_2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB13_1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq %1 = fcmp une float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -172,9 +271,16 @@ bb1: } define i32 @icmp_eq(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_eq -; CHECK: cmpl %esi, %edi -; CHECK-NEXT: jne {{LBB.+_1}} +; CHECK-LABEL: icmp_eq: +; CHECK: ## %bb.0: +; CHECK-NEXT: cmpl %esi, %edi +; CHECK-NEXT: je LBB14_2 +; CHECK-NEXT: ## %bb.1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB14_2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq %1 = icmp eq i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -184,9 +290,16 @@ bb1: } define i32 @icmp_ne(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_ne -; CHECK: cmpl %esi, %edi -; CHECK-NEXT: je {{LBB.+_1}} +; CHECK-LABEL: icmp_ne: +; CHECK: ## %bb.0: +; CHECK-NEXT: cmpl %esi, %edi +; CHECK-NEXT: je LBB15_1 +; CHECK-NEXT: ## %bb.2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB15_1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq %1 = icmp ne i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -196,9 +309,16 @@ bb1: } define i32 @icmp_ugt(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_ugt -; CHECK: cmpl %esi, %edi -; CHECK-NEXT: jbe {{LBB.+_1}} +; CHECK-LABEL: icmp_ugt: +; CHECK: ## %bb.0: +; CHECK-NEXT: cmpl %esi, %edi +; CHECK-NEXT: jbe LBB16_1 +; CHECK-NEXT: ## %bb.2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB16_1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq %1 = icmp ugt i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -208,9 +328,16 @@ bb1: } define i32 @icmp_uge(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_uge -; CHECK: cmpl %esi, %edi -; CHECK-NEXT: jb {{LBB.+_1}} +; CHECK-LABEL: icmp_uge: +; CHECK: ## %bb.0: +; CHECK-NEXT: cmpl %esi, %edi +; CHECK-NEXT: jae LBB17_2 +; CHECK-NEXT: ## %bb.1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB17_2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq %1 = icmp uge i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -220,9 +347,16 @@ bb1: } define i32 @icmp_ult(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_ult -; CHECK: cmpl %esi, %edi -; CHECK-NEXT: jae {{LBB.+_1}} +; CHECK-LABEL: icmp_ult: +; CHECK: ## %bb.0: +; CHECK-NEXT: cmpl %esi, %edi +; CHECK-NEXT: jae LBB18_1 +; CHECK-NEXT: ## %bb.2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB18_1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq %1 = icmp ult i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -232,9 +366,16 @@ bb1: } define i32 @icmp_ule(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_ule -; CHECK: cmpl %esi, %edi -; CHECK-NEXT: ja {{LBB.+_1}} +; CHECK-LABEL: icmp_ule: +; CHECK: ## %bb.0: +; CHECK-NEXT: cmpl %esi, %edi +; CHECK-NEXT: jbe LBB19_2 +; CHECK-NEXT: ## %bb.1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB19_2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq %1 = icmp ule i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -244,9 +385,16 @@ bb1: } define i32 @icmp_sgt(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_sgt -; CHECK: cmpl %esi, %edi -; CHECK-NEXT: jle {{LBB.+_1}} +; CHECK-LABEL: icmp_sgt: +; CHECK: ## %bb.0: +; CHECK-NEXT: cmpl %esi, %edi +; CHECK-NEXT: jle LBB20_1 +; CHECK-NEXT: ## %bb.2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB20_1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq %1 = icmp sgt i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -256,9 +404,16 @@ bb1: } define i32 @icmp_sge(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_sge -; CHECK: cmpl %esi, %edi -; CHECK-NEXT: jl {{LBB.+_1}} +; CHECK-LABEL: icmp_sge: +; CHECK: ## %bb.0: +; CHECK-NEXT: cmpl %esi, %edi +; CHECK-NEXT: jge LBB21_2 +; CHECK-NEXT: ## %bb.1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB21_2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq %1 = icmp sge i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -268,9 +423,16 @@ bb1: } define i32 @icmp_slt(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_slt -; CHECK: cmpl %esi, %edi -; CHECK-NEXT: jge {{LBB.+_1}} +; CHECK-LABEL: icmp_slt: +; CHECK: ## %bb.0: +; CHECK-NEXT: cmpl %esi, %edi +; CHECK-NEXT: jge LBB22_1 +; CHECK-NEXT: ## %bb.2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB22_1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq %1 = icmp slt i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -280,9 +442,16 @@ bb1: } define i32 @icmp_sle(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_sle -; CHECK: cmpl %esi, %edi -; CHECK-NEXT: jg {{LBB.+_1}} +; CHECK-LABEL: icmp_sle: +; CHECK: ## %bb.0: +; CHECK-NEXT: cmpl %esi, %edi +; CHECK-NEXT: jle LBB23_2 +; CHECK-NEXT: ## %bb.1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB23_2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq %1 = icmp sle i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: diff --git a/llvm/test/CodeGen/X86/funnel-shift.ll b/llvm/test/CodeGen/X86/funnel-shift.ll index 76e45f43342f16..bb778fb61c04e5 100644 --- a/llvm/test/CodeGen/X86/funnel-shift.ll +++ b/llvm/test/CodeGen/X86/funnel-shift.ll @@ -941,17 +941,17 @@ define void @PR45265(i32 %0, %struct.S* nocapture readonly %1) nounwind { ; X32-SSE2-NEXT: shldl $24, %ebx, %edi ; X32-SSE2-NEXT: xorl %eax, %edi ; X32-SSE2-NEXT: orl %edi, %ecx -; X32-SSE2-NEXT: jne .LBB44_1 -; X32-SSE2-NEXT: # %bb.2: +; X32-SSE2-NEXT: je .LBB44_2 +; X32-SSE2-NEXT: # %bb.1: ; X32-SSE2-NEXT: popl %esi ; X32-SSE2-NEXT: popl %edi ; X32-SSE2-NEXT: popl %ebx -; X32-SSE2-NEXT: jmp _Z3foov # TAILCALL -; X32-SSE2-NEXT: .LBB44_1: +; X32-SSE2-NEXT: retl +; X32-SSE2-NEXT: .LBB44_2: ; X32-SSE2-NEXT: popl %esi ; X32-SSE2-NEXT: popl %edi ; X32-SSE2-NEXT: popl %ebx -; X32-SSE2-NEXT: retl +; X32-SSE2-NEXT: jmp _Z3foov # TAILCALL ; ; X64-AVX2-LABEL: PR45265: ; X64-AVX2: # %bb.0: @@ -964,11 +964,11 @@ define void @PR45265(i32 %0, %struct.S* nocapture readonly %1) nounwind { ; X64-AVX2-NEXT: movq (%rsi,%rcx,4), %rcx ; X64-AVX2-NEXT: shrdq $40, %rdi, %rcx ; X64-AVX2-NEXT: cmpq %rax, %rcx -; X64-AVX2-NEXT: jne .LBB44_1 -; X64-AVX2-NEXT: # %bb.2: -; X64-AVX2-NEXT: jmp _Z3foov # TAILCALL -; X64-AVX2-NEXT: .LBB44_1: +; X64-AVX2-NEXT: je .LBB44_2 +; X64-AVX2-NEXT: # %bb.1: ; X64-AVX2-NEXT: retq +; X64-AVX2-NEXT: .LBB44_2: +; X64-AVX2-NEXT: jmp _Z3foov # TAILCALL %3 = sext i32 %0 to i64 %4 = getelementptr inbounds %struct.S, %struct.S* %1, i64 %3 %5 = bitcast %struct.S* %4 to i88* diff --git a/llvm/test/CodeGen/X86/indirect-branch-tracking-eh2.ll b/llvm/test/CodeGen/X86/indirect-branch-tracking-eh2.ll index 6e41c94e979a1b..8223b15ccafae8 100644 --- a/llvm/test/CodeGen/X86/indirect-branch-tracking-eh2.ll +++ b/llvm/test/CodeGen/X86/indirect-branch-tracking-eh2.ll @@ -1,59 +1,183 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple x86_64-unknown-unknown -exception-model sjlj -verify-machineinstrs=0 < %s | FileCheck %s --check-prefix=NUM ; RUN: llc -mtriple x86_64-unknown-unknown -exception-model sjlj -verify-machineinstrs=0 < %s | FileCheck %s --check-prefix=SJLJ -; NUM-COUNT-3: endbr64 - -;SJLJ: main: # @main -;SJLJ-NEXT: .Lfunc_begin0: -;SJLJ-NEXT: # %bb.0: # %entry -;SJLJ-NEXT: endbr64 -;SJLJ-NEXT: pushq %rbp -;SJLJ: callq _Unwind_SjLj_Register -;SJLJ-NEXT: .Ltmp0: -;SJLJ-NEXT: callq _Z3foov -;SJLJ-NEXT: .Ltmp1: -;SJLJ-NEXT: # %bb.1: # %invoke.cont -;SJLJ-NEXT: movl -;SJLJ-NEXT: .LBB0_7: # %return -;SJLJ: callq _Unwind_SjLj_Unregister -;SJLJ: retq -;SJLJ-NEXT: .LBB0_9: -;SJLJ-NEXT: endbr64 -;SJLJ-NEXT: movl -;SJLJ-NEXT: cmpl -;SJLJ-NEXT: jb .LBB0_10 -;SJLJ-NEXT: # %bb.11: -;SJLJ-NEXT: ud2 -;SJLJ-NEXT: .LBB0_10: -;SJLJ-NEXT: leaq .LJTI0_0(%rip), %rcx -;SJLJ-NEXT: jmpq *(%rcx,%rax,8) -;SJLJ-NEXT: .LBB0_2: # %lpad -;SJLJ-NEXT: .Ltmp2: -;SJLJ-NEXT: endbr64 -;SJLJ: jne .LBB0_4 -;SJLJ-NEXT: # %bb.3: # %catch3 -;SJLJ: callq __cxa_begin_catch -;SJLJ: jmp .LBB0_6 -;SJLJ-NEXT: .LBB0_4: # %catch.fallthrough -;SJLJ-NEXT: cmpl -;SJLJ-NEXT: jne .LBB0_8 -;SJLJ-NEXT: # %bb.5: # %catch -;SJLJ: callq __cxa_begin_catch -;SJLJ: cmpb -;SJLJ-NEXT: .LBB0_6: # %return -;SJLJ: callq __cxa_end_catch -;SJLJ-NEXT: jmp .LBB0_7 -;SJLJ-NEXT: .LBB0_8: # %eh.resume -;SJLJ-NEXT: movl -;SJLJ-NEXT: .Lfunc_end0: -;SJLJ: .LJTI0_0: -;SJLJ-NEXT: .quad .LBB0_2 - @_ZTIi = external dso_local constant i8* @_ZTIc = external dso_local constant i8* ; Function Attrs: noinline norecurse optnone uwtable define dso_local i32 @main() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) { +; NUM-LABEL: main: +; NUM: # %bb.0: # %entry +; NUM-NEXT: endbr64 +; NUM-NEXT: pushq %rbp +; NUM-NEXT: movq %rsp, %rbp +; NUM-NEXT: pushq %r15 +; NUM-NEXT: pushq %r14 +; NUM-NEXT: pushq %r13 +; NUM-NEXT: pushq %r12 +; NUM-NEXT: pushq %rbx +; NUM-NEXT: subq $120, %rsp +; NUM-NEXT: movl $0, -44(%rbp) +; NUM-NEXT: movq $__gxx_personality_sj0, -120(%rbp) +; NUM-NEXT: movq $GCC_except_table0, -112(%rbp) +; NUM-NEXT: movq %rbp, -104(%rbp) +; NUM-NEXT: movq %rsp, -88(%rbp) +; NUM-NEXT: movq $.LBB0_9, -96(%rbp) +; NUM-NEXT: movl $1, -144(%rbp) +; NUM-NEXT: leaq -152(%rbp), %rdi +; NUM-NEXT: callq _Unwind_SjLj_Register +; NUM-NEXT: .Ltmp0: +; NUM-NEXT: callq _Z3foov +; NUM-NEXT: .Ltmp1: +; NUM-NEXT: # %bb.1: # %invoke.cont +; NUM-NEXT: movl $1, -44(%rbp) +; NUM-NEXT: .LBB0_7: # %return +; NUM-NEXT: movl -44(%rbp), %ebx +; NUM-NEXT: leaq -152(%rbp), %rdi +; NUM-NEXT: callq _Unwind_SjLj_Unregister +; NUM-NEXT: movl %ebx, %eax +; NUM-NEXT: addq $120, %rsp +; NUM-NEXT: popq %rbx +; NUM-NEXT: popq %r12 +; NUM-NEXT: popq %r13 +; NUM-NEXT: popq %r14 +; NUM-NEXT: popq %r15 +; NUM-NEXT: popq %rbp +; NUM-NEXT: retq +; NUM-NEXT: .LBB0_9: +; NUM-NEXT: endbr64 +; NUM-NEXT: movl -144(%rbp), %eax +; NUM-NEXT: cmpl $1, %eax +; NUM-NEXT: jb .LBB0_10 +; NUM-NEXT: # %bb.11: +; NUM-NEXT: ud2 +; NUM-NEXT: .LBB0_10: +; NUM-NEXT: leaq {{.*}}(%rip), %rcx +; NUM-NEXT: jmpq *(%rcx,%rax,8) +; NUM-NEXT: .LBB0_2: # %lpad +; NUM-NEXT: .Ltmp2: +; NUM-NEXT: endbr64 +; NUM-NEXT: movl -140(%rbp), %ecx +; NUM-NEXT: movl -136(%rbp), %eax +; NUM-NEXT: movq %rcx, -56(%rbp) +; NUM-NEXT: movl %eax, -64(%rbp) +; NUM-NEXT: cmpl $2, %eax +; NUM-NEXT: je .LBB0_3 +; NUM-NEXT: # %bb.4: # %catch.fallthrough +; NUM-NEXT: cmpl $1, %eax +; NUM-NEXT: jne .LBB0_8 +; NUM-NEXT: # %bb.5: # %catch +; NUM-NEXT: movq -56(%rbp), %rdi +; NUM-NEXT: movl $-1, -144(%rbp) +; NUM-NEXT: callq __cxa_begin_catch +; NUM-NEXT: movb (%rax), %al +; NUM-NEXT: movb %al, -45(%rbp) +; NUM-NEXT: xorl %ecx, %ecx +; NUM-NEXT: cmpb $3, %al +; NUM-NEXT: jmp .LBB0_6 +; NUM-NEXT: .LBB0_3: # %catch3 +; NUM-NEXT: movq -56(%rbp), %rdi +; NUM-NEXT: movl $-1, -144(%rbp) +; NUM-NEXT: callq __cxa_begin_catch +; NUM-NEXT: movl (%rax), %eax +; NUM-NEXT: movl %eax, -60(%rbp) +; NUM-NEXT: xorl %ecx, %ecx +; NUM-NEXT: cmpl $5, %eax +; NUM-NEXT: .LBB0_6: # %return +; NUM-NEXT: setne %cl +; NUM-NEXT: movl %ecx, -44(%rbp) +; NUM-NEXT: movl $-1, -144(%rbp) +; NUM-NEXT: callq __cxa_end_catch +; NUM-NEXT: jmp .LBB0_7 +; NUM-NEXT: .LBB0_8: # %eh.resume +; NUM-NEXT: movl $-1, -144(%rbp) +; +; SJLJ-LABEL: main: +; SJLJ: # %bb.0: # %entry +; SJLJ-NEXT: endbr64 +; SJLJ-NEXT: pushq %rbp +; SJLJ-NEXT: movq %rsp, %rbp +; SJLJ-NEXT: pushq %r15 +; SJLJ-NEXT: pushq %r14 +; SJLJ-NEXT: pushq %r13 +; SJLJ-NEXT: pushq %r12 +; SJLJ-NEXT: pushq %rbx +; SJLJ-NEXT: subq $120, %rsp +; SJLJ-NEXT: movl $0, -44(%rbp) +; SJLJ-NEXT: movq $__gxx_personality_sj0, -120(%rbp) +; SJLJ-NEXT: movq $GCC_except_table0, -112(%rbp) +; SJLJ-NEXT: movq %rbp, -104(%rbp) +; SJLJ-NEXT: movq %rsp, -88(%rbp) +; SJLJ-NEXT: movq $.LBB0_9, -96(%rbp) +; SJLJ-NEXT: movl $1, -144(%rbp) +; SJLJ-NEXT: leaq -152(%rbp), %rdi +; SJLJ-NEXT: callq _Unwind_SjLj_Register +; SJLJ-NEXT: .Ltmp0: +; SJLJ-NEXT: callq _Z3foov +; SJLJ-NEXT: .Ltmp1: +; SJLJ-NEXT: # %bb.1: # %invoke.cont +; SJLJ-NEXT: movl $1, -44(%rbp) +; SJLJ-NEXT: .LBB0_7: # %return +; SJLJ-NEXT: movl -44(%rbp), %ebx +; SJLJ-NEXT: leaq -152(%rbp), %rdi +; SJLJ-NEXT: callq _Unwind_SjLj_Unregister +; SJLJ-NEXT: movl %ebx, %eax +; SJLJ-NEXT: addq $120, %rsp +; SJLJ-NEXT: popq %rbx +; SJLJ-NEXT: popq %r12 +; SJLJ-NEXT: popq %r13 +; SJLJ-NEXT: popq %r14 +; SJLJ-NEXT: popq %r15 +; SJLJ-NEXT: popq %rbp +; SJLJ-NEXT: retq +; SJLJ-NEXT: .LBB0_9: +; SJLJ-NEXT: endbr64 +; SJLJ-NEXT: movl -144(%rbp), %eax +; SJLJ-NEXT: cmpl $1, %eax +; SJLJ-NEXT: jb .LBB0_10 +; SJLJ-NEXT: # %bb.11: +; SJLJ-NEXT: ud2 +; SJLJ-NEXT: .LBB0_10: +; SJLJ-NEXT: leaq {{.*}}(%rip), %rcx +; SJLJ-NEXT: jmpq *(%rcx,%rax,8) +; SJLJ-NEXT: .LBB0_2: # %lpad +; SJLJ-NEXT: .Ltmp2: +; SJLJ-NEXT: endbr64 +; SJLJ-NEXT: movl -140(%rbp), %ecx +; SJLJ-NEXT: movl -136(%rbp), %eax +; SJLJ-NEXT: movq %rcx, -56(%rbp) +; SJLJ-NEXT: movl %eax, -64(%rbp) +; SJLJ-NEXT: cmpl $2, %eax +; SJLJ-NEXT: je .LBB0_3 +; SJLJ-NEXT: # %bb.4: # %catch.fallthrough +; SJLJ-NEXT: cmpl $1, %eax +; SJLJ-NEXT: jne .LBB0_8 +; SJLJ-NEXT: # %bb.5: # %catch +; SJLJ-NEXT: movq -56(%rbp), %rdi +; SJLJ-NEXT: movl $-1, -144(%rbp) +; SJLJ-NEXT: callq __cxa_begin_catch +; SJLJ-NEXT: movb (%rax), %al +; SJLJ-NEXT: movb %al, -45(%rbp) +; SJLJ-NEXT: xorl %ecx, %ecx +; SJLJ-NEXT: cmpb $3, %al +; SJLJ-NEXT: jmp .LBB0_6 +; SJLJ-NEXT: .LBB0_3: # %catch3 +; SJLJ-NEXT: movq -56(%rbp), %rdi +; SJLJ-NEXT: movl $-1, -144(%rbp) +; SJLJ-NEXT: callq __cxa_begin_catch +; SJLJ-NEXT: movl (%rax), %eax +; SJLJ-NEXT: movl %eax, -60(%rbp) +; SJLJ-NEXT: xorl %ecx, %ecx +; SJLJ-NEXT: cmpl $5, %eax +; SJLJ-NEXT: .LBB0_6: # %return +; SJLJ-NEXT: setne %cl +; SJLJ-NEXT: movl %ecx, -44(%rbp) +; SJLJ-NEXT: movl $-1, -144(%rbp) +; SJLJ-NEXT: callq __cxa_end_catch +; SJLJ-NEXT: jmp .LBB0_7 +; SJLJ-NEXT: .LBB0_8: # %eh.resume +; SJLJ-NEXT: movl $-1, -144(%rbp) entry: %retval = alloca i32, align 4 %exn.slot = alloca i8* diff --git a/llvm/test/CodeGen/X86/jump_sign.ll b/llvm/test/CodeGen/X86/jump_sign.ll index 78a32200438040..6f09506f666673 100644 --- a/llvm/test/CodeGen/X86/jump_sign.ll +++ b/llvm/test/CodeGen/X86/jump_sign.ll @@ -139,11 +139,12 @@ define i32 @func_l2(i32 %a, i32 %b) nounwind { ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: subl %edx, %eax -; CHECK-NEXT: jne .LBB8_2 -; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: je .LBB8_1 +; CHECK-NEXT: # %bb.2: # %if.else +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB8_1: # %if.then ; CHECK-NEXT: cmpl %ecx, %edx ; CHECK-NEXT: cmovlel %ecx, %eax -; CHECK-NEXT: .LBB8_2: # %if.else ; CHECK-NEXT: retl %cmp = icmp eq i32 %b, %a %sub = sub nsw i32 %a, %b @@ -329,12 +330,13 @@ define i8* @func_r(i8* %base, i32* nocapture %offset, i32 %size) nounwind { ; CHECK-NEXT: movl (%edx), %ecx ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: subl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: jl .LBB15_2 -; CHECK-NEXT: # %bb.1: # %if.end +; CHECK-NEXT: jge .LBB15_1 +; CHECK-NEXT: # %bb.2: # %return +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB15_1: # %if.end ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl %ecx, (%edx) ; CHECK-NEXT: addl %ecx, %eax -; CHECK-NEXT: .LBB15_2: # %return ; CHECK-NEXT: retl entry: %0 = load i32, i32* %offset, align 8 diff --git a/llvm/test/CodeGen/X86/lsr-negative-stride.ll b/llvm/test/CodeGen/X86/lsr-negative-stride.ll index 26c6128ab48db5..0d25e141439640 100644 --- a/llvm/test/CodeGen/X86/lsr-negative-stride.ll +++ b/llvm/test/CodeGen/X86/lsr-negative-stride.ll @@ -19,11 +19,7 @@ define i32 @t(i32 %a, i32 %b) nounwind { ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx ; CHECK-NEXT: cmpl %ecx, %edx -; CHECK-NEXT: jne .LBB0_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: movl %edx, %eax -; CHECK-NEXT: retl -; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: je .LBB0_1 ; CHECK-NEXT: .LBB0_2: # %bb.outer ; CHECK-NEXT: # =>This Loop Header: Depth=1 ; CHECK-NEXT: # Child Loop BB0_3 Depth 2 @@ -49,6 +45,9 @@ define i32 @t(i32 %a, i32 %b) nounwind { ; CHECK-NEXT: jne .LBB0_2 ; CHECK-NEXT: .LBB0_6: # %bb17 ; CHECK-NEXT: retl +; CHECK-NEXT: .LBB0_1: +; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: retl entry: %tmp1434 = icmp eq i32 %a, %b ; [#uses=1] br i1 %tmp1434, label %bb17, label %bb.outer diff --git a/llvm/test/CodeGen/X86/machine-cse.ll b/llvm/test/CodeGen/X86/machine-cse.ll index 281aaca7c8d1fc..7ec9cafb5465e6 100644 --- a/llvm/test/CodeGen/X86/machine-cse.ll +++ b/llvm/test/CodeGen/X86/machine-cse.ll @@ -110,10 +110,11 @@ define i32 @cross_mbb_phys_cse(i32 %a, i32 %b) nounwind ssp { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movl $1, %eax ; CHECK-NEXT: cmpl %esi, %edi -; CHECK-NEXT: ja .LBB2_2 -; CHECK-NEXT: # %bb.1: # %if.end +; CHECK-NEXT: jbe .LBB2_1 +; CHECK-NEXT: # %bb.2: # %return +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB2_1: # %if.end ; CHECK-NEXT: sbbl %eax, %eax -; CHECK-NEXT: .LBB2_2: # %return ; CHECK-NEXT: retq entry: %cmp = icmp ugt i32 %a, %b diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll index 374f573eed7b26..6c7076757d0e9c 100644 --- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll +++ b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll @@ -239,19 +239,19 @@ define i32 @length3(i8* %X, i8* %Y) nounwind { ; X86-NEXT: rolw $8, %dx ; X86-NEXT: rolw $8, %si ; X86-NEXT: cmpw %si, %dx -; X86-NEXT: jne .LBB9_3 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movzbl 2(%eax), %eax -; X86-NEXT: movzbl 2(%ecx), %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: popl %esi -; X86-NEXT: retl -; X86-NEXT: .LBB9_3: # %res_block +; X86-NEXT: je .LBB9_1 +; X86-NEXT: # %bb.3: # %res_block ; X86-NEXT: setae %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: leal -1(%eax,%eax), %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl +; X86-NEXT: .LBB9_1: # %loadbb1 +; X86-NEXT: movzbl 2(%eax), %eax +; X86-NEXT: movzbl 2(%ecx), %ecx +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl ; ; X64-LABEL: length3: ; X64: # %bb.0: @@ -260,16 +260,16 @@ define i32 @length3(i8* %X, i8* %Y) nounwind { ; X64-NEXT: rolw $8, %ax ; X64-NEXT: rolw $8, %cx ; X64-NEXT: cmpw %cx, %ax -; X64-NEXT: jne .LBB9_3 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movzbl 2(%rdi), %eax -; X64-NEXT: movzbl 2(%rsi), %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB9_3: # %res_block +; X64-NEXT: je .LBB9_1 +; X64-NEXT: # %bb.3: # %res_block ; X64-NEXT: setae %al ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB9_1: # %loadbb1 +; X64-NEXT: movzbl 2(%rdi), %eax +; X64-NEXT: movzbl 2(%rsi), %ecx +; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind ret i32 %m @@ -454,19 +454,19 @@ define i32 @length5(i8* %X, i8* %Y) nounwind { ; X86-NEXT: bswapl %edx ; X86-NEXT: bswapl %esi ; X86-NEXT: cmpl %esi, %edx -; X86-NEXT: jne .LBB16_3 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movzbl 4(%eax), %eax -; X86-NEXT: movzbl 4(%ecx), %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: popl %esi -; X86-NEXT: retl -; X86-NEXT: .LBB16_3: # %res_block +; X86-NEXT: je .LBB16_1 +; X86-NEXT: # %bb.3: # %res_block ; X86-NEXT: setae %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: leal -1(%eax,%eax), %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl +; X86-NEXT: .LBB16_1: # %loadbb1 +; X86-NEXT: movzbl 4(%eax), %eax +; X86-NEXT: movzbl 4(%ecx), %ecx +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl ; ; X64-LABEL: length5: ; X64: # %bb.0: @@ -475,16 +475,16 @@ define i32 @length5(i8* %X, i8* %Y) nounwind { ; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx ; X64-NEXT: cmpl %ecx, %eax -; X64-NEXT: jne .LBB16_3 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movzbl 4(%rdi), %eax -; X64-NEXT: movzbl 4(%rsi), %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB16_3: # %res_block +; X64-NEXT: je .LBB16_1 +; X64-NEXT: # %bb.3: # %res_block ; X64-NEXT: setae %al ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB16_1: # %loadbb1 +; X64-NEXT: movzbl 4(%rdi), %eax +; X64-NEXT: movzbl 4(%rsi), %ecx +; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind ret i32 %m @@ -530,16 +530,16 @@ define i1 @length5_lt(i8* %X, i8* %Y) nounwind { ; X86-NEXT: bswapl %edx ; X86-NEXT: bswapl %esi ; X86-NEXT: cmpl %esi, %edx -; X86-NEXT: jne .LBB18_3 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movzbl 4(%eax), %eax -; X86-NEXT: movzbl 4(%ecx), %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: jmp .LBB18_2 -; X86-NEXT: .LBB18_3: # %res_block +; X86-NEXT: je .LBB18_1 +; X86-NEXT: # %bb.3: # %res_block ; X86-NEXT: setae %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: jmp .LBB18_2 +; X86-NEXT: .LBB18_1: # %loadbb1 +; X86-NEXT: movzbl 4(%eax), %eax +; X86-NEXT: movzbl 4(%ecx), %ecx +; X86-NEXT: subl %ecx, %eax ; X86-NEXT: .LBB18_2: # %endblock ; X86-NEXT: shrl $31, %eax ; X86-NEXT: # kill: def $al killed $al killed $eax @@ -553,20 +553,20 @@ define i1 @length5_lt(i8* %X, i8* %Y) nounwind { ; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx ; X64-NEXT: cmpl %ecx, %eax -; X64-NEXT: jne .LBB18_3 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movzbl 4(%rdi), %eax -; X64-NEXT: movzbl 4(%rsi), %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: retq -; X64-NEXT: .LBB18_3: # %res_block +; X64-NEXT: je .LBB18_1 +; X64-NEXT: # %bb.3: # %res_block ; X64-NEXT: setae %al ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: leal -1(%rax,%rax), %eax ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; X64-NEXT: .LBB18_1: # %loadbb1 +; X64-NEXT: movzbl 4(%rdi), %eax +; X64-NEXT: movzbl 4(%rsi), %ecx +; X64-NEXT: subl %ecx, %eax +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind %c = icmp slt i32 %m, 0 @@ -577,53 +577,56 @@ define i32 @length7(i8* %X, i8* %Y) nounwind { ; X86-LABEL: length7: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx +; X86-NEXT: movl (%esi), %eax +; X86-NEXT: movl (%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB19_2 +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: jne .LBB19_4 ; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movl 3(%esi), %ecx -; X86-NEXT: movl 3(%eax), %edx +; X86-NEXT: movl 3(%esi), %eax +; X86-NEXT: movl 3(%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: je .LBB19_3 -; X86-NEXT: .LBB19_2: # %res_block +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: je .LBB19_2 +; X86-NEXT: .LBB19_4: # %res_block +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: setae %dl +; X86-NEXT: leal -1(%edx,%edx), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB19_2: ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: setae %al -; X86-NEXT: leal -1(%eax,%eax), %eax -; X86-NEXT: .LBB19_3: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: length7: ; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: movl (%rsi), %edx +; X64-NEXT: movl (%rdi), %eax +; X64-NEXT: movl (%rsi), %ecx +; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: jne .LBB19_2 +; X64-NEXT: cmpl %ecx, %eax +; X64-NEXT: jne .LBB19_4 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movl 3(%rdi), %ecx -; X64-NEXT: movl 3(%rsi), %edx +; X64-NEXT: movl 3(%rdi), %eax +; X64-NEXT: movl 3(%rsi), %ecx +; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: je .LBB19_3 -; X64-NEXT: .LBB19_2: # %res_block +; X64-NEXT: cmpl %ecx, %eax +; X64-NEXT: je .LBB19_2 +; X64-NEXT: .LBB19_4: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpl %ecx, %eax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB19_2: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB19_3: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 7) nounwind ret i32 %m @@ -660,55 +663,60 @@ define i1 @length7_lt(i8* %X, i8* %Y) nounwind { ; X86-LABEL: length7_lt: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx +; X86-NEXT: movl (%esi), %eax +; X86-NEXT: movl (%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB21_2 +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: jne .LBB21_4 ; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movl 3(%esi), %ecx -; X86-NEXT: movl 3(%eax), %edx +; X86-NEXT: movl 3(%esi), %eax +; X86-NEXT: movl 3(%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: je .LBB21_3 -; X86-NEXT: .LBB21_2: # %res_block -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: setae %al -; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: je .LBB21_2 +; X86-NEXT: .LBB21_4: # %res_block +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: setae %dl +; X86-NEXT: leal -1(%edx,%edx), %eax ; X86-NEXT: .LBB21_3: # %endblock ; X86-NEXT: shrl $31, %eax ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: popl %esi ; X86-NEXT: retl +; X86-NEXT: .LBB21_2: +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: jmp .LBB21_3 ; ; X64-LABEL: length7_lt: ; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: movl (%rsi), %edx +; X64-NEXT: movl (%rdi), %eax +; X64-NEXT: movl (%rsi), %ecx +; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: cmpl %edx, %ecx +; X64-NEXT: cmpl %ecx, %eax ; X64-NEXT: jne .LBB21_2 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movl 3(%rdi), %ecx -; X64-NEXT: movl 3(%rsi), %edx +; X64-NEXT: movl 3(%rdi), %eax +; X64-NEXT: movl 3(%rsi), %ecx +; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %edx, %ecx +; X64-NEXT: cmpl %ecx, %eax ; X64-NEXT: je .LBB21_3 ; X64-NEXT: .LBB21_2: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpl %ecx, %eax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; X64-NEXT: .LBB21_3: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB21_3: # %endblock ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq @@ -721,28 +729,30 @@ define i32 @length8(i8* %X, i8* %Y) nounwind { ; X86-LABEL: length8: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx +; X86-NEXT: movl (%esi), %eax +; X86-NEXT: movl (%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB22_2 +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: jne .LBB22_4 ; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movl 4(%esi), %ecx -; X86-NEXT: movl 4(%eax), %edx +; X86-NEXT: movl 4(%esi), %eax +; X86-NEXT: movl 4(%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: je .LBB22_3 -; X86-NEXT: .LBB22_2: # %res_block +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: je .LBB22_2 +; X86-NEXT: .LBB22_4: # %res_block +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: setae %dl +; X86-NEXT: leal -1(%edx,%edx), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB22_2: ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: setae %al -; X86-NEXT: leal -1(%eax,%eax), %eax -; X86-NEXT: .LBB22_3: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl ; @@ -944,60 +954,63 @@ define i32 @length12(i8* %X, i8* %Y) nounwind { ; X86-LABEL: length12: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx +; X86-NEXT: movl (%esi), %eax +; X86-NEXT: movl (%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB29_3 +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: jne .LBB29_5 ; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movl 4(%esi), %ecx -; X86-NEXT: movl 4(%eax), %edx +; X86-NEXT: movl 4(%esi), %eax +; X86-NEXT: movl 4(%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB29_3 +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: jne .LBB29_5 ; X86-NEXT: # %bb.2: # %loadbb2 -; X86-NEXT: movl 8(%esi), %ecx -; X86-NEXT: movl 8(%eax), %edx +; X86-NEXT: movl 8(%esi), %eax +; X86-NEXT: movl 8(%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: je .LBB29_4 -; X86-NEXT: .LBB29_3: # %res_block +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: je .LBB29_3 +; X86-NEXT: .LBB29_5: # %res_block +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: setae %dl +; X86-NEXT: leal -1(%edx,%edx), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB29_3: ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: setae %al -; X86-NEXT: leal -1(%eax,%eax), %eax -; X86-NEXT: .LBB29_4: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: length12: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB29_2 +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jne .LBB29_4 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movl 8(%rdi), %ecx -; X64-NEXT: movl 8(%rsi), %edx +; X64-NEXT: movl 8(%rdi), %eax +; X64-NEXT: movl 8(%rsi), %ecx +; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB29_3 -; X64-NEXT: .LBB29_2: # %res_block +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: je .LBB29_2 +; X64-NEXT: .LBB29_4: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB29_2: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB29_3: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind ret i32 %m @@ -1116,67 +1129,70 @@ define i32 @length16(i8* %X, i8* %Y) nounwind { ; X86-LABEL: length16: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx +; X86-NEXT: movl (%esi), %eax +; X86-NEXT: movl (%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB33_4 +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: jne .LBB33_6 ; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movl 4(%esi), %ecx -; X86-NEXT: movl 4(%eax), %edx +; X86-NEXT: movl 4(%esi), %eax +; X86-NEXT: movl 4(%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB33_4 +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: jne .LBB33_6 ; X86-NEXT: # %bb.2: # %loadbb2 -; X86-NEXT: movl 8(%esi), %ecx -; X86-NEXT: movl 8(%eax), %edx +; X86-NEXT: movl 8(%esi), %eax +; X86-NEXT: movl 8(%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB33_4 +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: jne .LBB33_6 ; X86-NEXT: # %bb.3: # %loadbb3 -; X86-NEXT: movl 12(%esi), %ecx -; X86-NEXT: movl 12(%eax), %edx +; X86-NEXT: movl 12(%esi), %eax +; X86-NEXT: movl 12(%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: je .LBB33_5 -; X86-NEXT: .LBB33_4: # %res_block +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: je .LBB33_4 +; X86-NEXT: .LBB33_6: # %res_block +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: setae %dl +; X86-NEXT: leal -1(%edx,%edx), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB33_4: ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: setae %al -; X86-NEXT: leal -1(%eax,%eax), %eax -; X86-NEXT: .LBB33_5: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: length16: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB33_2 +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jne .LBB33_4 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx +; X64-NEXT: movq 8(%rdi), %rax +; X64-NEXT: movq 8(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB33_3 -; X64-NEXT: .LBB33_2: # %res_block +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: je .LBB33_2 +; X64-NEXT: .LBB33_4: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB33_2: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB33_3: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 16) nounwind ret i32 %m @@ -1291,69 +1307,74 @@ define i1 @length16_lt(i8* %x, i8* %y) nounwind { ; X86-LABEL: length16_lt: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx +; X86-NEXT: movl (%esi), %eax +; X86-NEXT: movl (%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB35_4 +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: jne .LBB35_6 ; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movl 4(%esi), %ecx -; X86-NEXT: movl 4(%eax), %edx +; X86-NEXT: movl 4(%esi), %eax +; X86-NEXT: movl 4(%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB35_4 +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: jne .LBB35_6 ; X86-NEXT: # %bb.2: # %loadbb2 -; X86-NEXT: movl 8(%esi), %ecx -; X86-NEXT: movl 8(%eax), %edx +; X86-NEXT: movl 8(%esi), %eax +; X86-NEXT: movl 8(%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB35_4 +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: jne .LBB35_6 ; X86-NEXT: # %bb.3: # %loadbb3 -; X86-NEXT: movl 12(%esi), %ecx -; X86-NEXT: movl 12(%eax), %edx +; X86-NEXT: movl 12(%esi), %eax +; X86-NEXT: movl 12(%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: je .LBB35_5 -; X86-NEXT: .LBB35_4: # %res_block -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: setae %al -; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: je .LBB35_4 +; X86-NEXT: .LBB35_6: # %res_block +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: setae %dl +; X86-NEXT: leal -1(%edx,%edx), %eax ; X86-NEXT: .LBB35_5: # %endblock ; X86-NEXT: shrl $31, %eax ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: popl %esi ; X86-NEXT: retl +; X86-NEXT: .LBB35_4: +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: jmp .LBB35_5 ; ; X64-LABEL: length16_lt: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: jne .LBB35_2 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx +; X64-NEXT: movq 8(%rdi), %rax +; X64-NEXT: movq 8(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: je .LBB35_3 ; X64-NEXT: .LBB35_2: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; X64-NEXT: .LBB35_3: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB35_3: # %endblock ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq @@ -1373,39 +1394,41 @@ define i1 @length16_gt(i8* %x, i8* %y) nounwind { ; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx ; X86-NEXT: cmpl %ecx, %eax -; X86-NEXT: jne .LBB36_4 +; X86-NEXT: jne .LBB36_6 ; X86-NEXT: # %bb.1: # %loadbb1 ; X86-NEXT: movl 4(%esi), %eax ; X86-NEXT: movl 4(%edx), %ecx ; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx ; X86-NEXT: cmpl %ecx, %eax -; X86-NEXT: jne .LBB36_4 +; X86-NEXT: jne .LBB36_6 ; X86-NEXT: # %bb.2: # %loadbb2 ; X86-NEXT: movl 8(%esi), %eax ; X86-NEXT: movl 8(%edx), %ecx ; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx ; X86-NEXT: cmpl %ecx, %eax -; X86-NEXT: jne .LBB36_4 +; X86-NEXT: jne .LBB36_6 ; X86-NEXT: # %bb.3: # %loadbb3 ; X86-NEXT: movl 12(%esi), %eax ; X86-NEXT: movl 12(%edx), %ecx ; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: xorl %edx, %edx ; X86-NEXT: cmpl %ecx, %eax -; X86-NEXT: je .LBB36_5 -; X86-NEXT: .LBB36_4: # %res_block +; X86-NEXT: je .LBB36_4 +; X86-NEXT: .LBB36_6: # %res_block ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: cmpl %ecx, %eax ; X86-NEXT: setae %dl -; X86-NEXT: leal -1(%edx,%edx), %edx +; X86-NEXT: leal -1(%edx,%edx), %eax ; X86-NEXT: .LBB36_5: # %endblock -; X86-NEXT: testl %edx, %edx +; X86-NEXT: testl %eax, %eax ; X86-NEXT: setg %al ; X86-NEXT: popl %esi ; X86-NEXT: retl +; X86-NEXT: .LBB36_4: +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: jmp .LBB36_5 ; ; X64-LABEL: length16_gt: ; X64: # %bb.0: @@ -1414,24 +1437,26 @@ define i1 @length16_gt(i8* %x, i8* %y) nounwind { ; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: jne .LBB36_2 +; X64-NEXT: jne .LBB36_4 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movq 8(%rdi), %rax ; X64-NEXT: movq 8(%rsi), %rcx ; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: xorl %edx, %edx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: je .LBB36_3 -; X64-NEXT: .LBB36_2: # %res_block +; X64-NEXT: je .LBB36_2 +; X64-NEXT: .LBB36_4: # %res_block ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: setae %dl -; X64-NEXT: leal -1(%rdx,%rdx), %edx +; X64-NEXT: leal -1(%rdx,%rdx), %eax ; X64-NEXT: .LBB36_3: # %endblock -; X64-NEXT: testl %edx, %edx +; X64-NEXT: testl %eax, %eax ; X64-NEXT: setg %al ; X64-NEXT: retq +; X64-NEXT: .LBB36_2: +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: jmp .LBB36_3 %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) nounwind %cmp = icmp sgt i32 %call, 0 ret i1 %cmp @@ -1549,33 +1574,34 @@ define i32 @length24(i8* %X, i8* %Y) nounwind { ; ; X64-LABEL: length24: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB38_3 +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jne .LBB38_5 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx +; X64-NEXT: movq 8(%rdi), %rax +; X64-NEXT: movq 8(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB38_3 +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jne .LBB38_5 ; X64-NEXT: # %bb.2: # %loadbb2 -; X64-NEXT: movq 16(%rdi), %rcx -; X64-NEXT: movq 16(%rsi), %rdx +; X64-NEXT: movq 16(%rdi), %rax +; X64-NEXT: movq 16(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB38_4 -; X64-NEXT: .LBB38_3: # %res_block +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: je .LBB38_3 +; X64-NEXT: .LBB38_5: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB38_3: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB38_4: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 24) nounwind ret i32 %m @@ -1708,33 +1734,36 @@ define i1 @length24_lt(i8* %x, i8* %y) nounwind { ; ; X64-LABEL: length24_lt: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: jne .LBB40_3 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx +; X64-NEXT: movq 8(%rdi), %rax +; X64-NEXT: movq 8(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: jne .LBB40_3 ; X64-NEXT: # %bb.2: # %loadbb2 -; X64-NEXT: movq 16(%rdi), %rcx -; X64-NEXT: movq 16(%rsi), %rdx +; X64-NEXT: movq 16(%rdi), %rax +; X64-NEXT: movq 16(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: je .LBB40_4 ; X64-NEXT: .LBB40_3: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; X64-NEXT: .LBB40_4: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB40_4: # %endblock ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq @@ -1763,31 +1792,33 @@ define i1 @length24_gt(i8* %x, i8* %y) nounwind { ; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: jne .LBB41_3 +; X64-NEXT: jne .LBB41_5 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movq 8(%rdi), %rax ; X64-NEXT: movq 8(%rsi), %rcx ; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: jne .LBB41_3 +; X64-NEXT: jne .LBB41_5 ; X64-NEXT: # %bb.2: # %loadbb2 ; X64-NEXT: movq 16(%rdi), %rax ; X64-NEXT: movq 16(%rsi), %rcx ; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: xorl %edx, %edx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: je .LBB41_4 -; X64-NEXT: .LBB41_3: # %res_block +; X64-NEXT: je .LBB41_3 +; X64-NEXT: .LBB41_5: # %res_block ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: setae %dl -; X64-NEXT: leal -1(%rdx,%rdx), %edx +; X64-NEXT: leal -1(%rdx,%rdx), %eax ; X64-NEXT: .LBB41_4: # %endblock -; X64-NEXT: testl %edx, %edx +; X64-NEXT: testl %eax, %eax ; X64-NEXT: setg %al ; X64-NEXT: retq +; X64-NEXT: .LBB41_3: +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: jmp .LBB41_4 %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 24) nounwind %cmp = icmp sgt i32 %call, 0 ret i1 %cmp @@ -1907,40 +1938,41 @@ define i32 @length31(i8* %X, i8* %Y) nounwind { ; ; X64-LABEL: length31: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB43_4 +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jne .LBB43_6 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx +; X64-NEXT: movq 8(%rdi), %rax +; X64-NEXT: movq 8(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB43_4 +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jne .LBB43_6 ; X64-NEXT: # %bb.2: # %loadbb2 -; X64-NEXT: movq 16(%rdi), %rcx -; X64-NEXT: movq 16(%rsi), %rdx +; X64-NEXT: movq 16(%rdi), %rax +; X64-NEXT: movq 16(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB43_4 +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jne .LBB43_6 ; X64-NEXT: # %bb.3: # %loadbb3 -; X64-NEXT: movq 23(%rdi), %rcx -; X64-NEXT: movq 23(%rsi), %rdx +; X64-NEXT: movq 23(%rdi), %rax +; X64-NEXT: movq 23(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB43_5 -; X64-NEXT: .LBB43_4: # %res_block +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: je .LBB43_4 +; X64-NEXT: .LBB43_6: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB43_4: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB43_5: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 31) nounwind ret i32 %m @@ -2072,40 +2104,43 @@ define i1 @length31_lt(i8* %x, i8* %y) nounwind { ; ; X64-LABEL: length31_lt: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: jne .LBB45_4 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx +; X64-NEXT: movq 8(%rdi), %rax +; X64-NEXT: movq 8(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: jne .LBB45_4 ; X64-NEXT: # %bb.2: # %loadbb2 -; X64-NEXT: movq 16(%rdi), %rcx -; X64-NEXT: movq 16(%rsi), %rdx +; X64-NEXT: movq 16(%rdi), %rax +; X64-NEXT: movq 16(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: jne .LBB45_4 ; X64-NEXT: # %bb.3: # %loadbb3 -; X64-NEXT: movq 23(%rdi), %rcx -; X64-NEXT: movq 23(%rsi), %rdx +; X64-NEXT: movq 23(%rdi), %rax +; X64-NEXT: movq 23(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: je .LBB45_5 ; X64-NEXT: .LBB45_4: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; X64-NEXT: .LBB45_5: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB45_5: # %endblock ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq @@ -2134,38 +2169,40 @@ define i1 @length31_gt(i8* %x, i8* %y) nounwind { ; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: jne .LBB46_4 +; X64-NEXT: jne .LBB46_6 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movq 8(%rdi), %rax ; X64-NEXT: movq 8(%rsi), %rcx ; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: jne .LBB46_4 +; X64-NEXT: jne .LBB46_6 ; X64-NEXT: # %bb.2: # %loadbb2 ; X64-NEXT: movq 16(%rdi), %rax ; X64-NEXT: movq 16(%rsi), %rcx ; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: jne .LBB46_4 +; X64-NEXT: jne .LBB46_6 ; X64-NEXT: # %bb.3: # %loadbb3 ; X64-NEXT: movq 23(%rdi), %rax ; X64-NEXT: movq 23(%rsi), %rcx ; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: xorl %edx, %edx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: je .LBB46_5 -; X64-NEXT: .LBB46_4: # %res_block +; X64-NEXT: je .LBB46_4 +; X64-NEXT: .LBB46_6: # %res_block ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: setae %dl -; X64-NEXT: leal -1(%rdx,%rdx), %edx +; X64-NEXT: leal -1(%rdx,%rdx), %eax ; X64-NEXT: .LBB46_5: # %endblock -; X64-NEXT: testl %edx, %edx +; X64-NEXT: testl %eax, %eax ; X64-NEXT: setg %al ; X64-NEXT: retq +; X64-NEXT: .LBB46_4: +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: jmp .LBB46_5 %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 31) nounwind %cmp = icmp sgt i32 %call, 0 ret i1 %cmp @@ -2396,40 +2433,41 @@ define i32 @length32(i8* %X, i8* %Y) nounwind { ; ; X64-LABEL: length32: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB49_4 +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jne .LBB49_6 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx +; X64-NEXT: movq 8(%rdi), %rax +; X64-NEXT: movq 8(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB49_4 +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jne .LBB49_6 ; X64-NEXT: # %bb.2: # %loadbb2 -; X64-NEXT: movq 16(%rdi), %rcx -; X64-NEXT: movq 16(%rsi), %rdx +; X64-NEXT: movq 16(%rdi), %rax +; X64-NEXT: movq 16(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB49_4 +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jne .LBB49_6 ; X64-NEXT: # %bb.3: # %loadbb3 -; X64-NEXT: movq 24(%rdi), %rcx -; X64-NEXT: movq 24(%rsi), %rdx +; X64-NEXT: movq 24(%rdi), %rax +; X64-NEXT: movq 24(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB49_5 -; X64-NEXT: .LBB49_4: # %res_block +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: je .LBB49_4 +; X64-NEXT: .LBB49_6: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB49_4: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB49_5: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 32) nounwind ret i32 %m @@ -2576,40 +2614,43 @@ define i1 @length32_lt(i8* %x, i8* %y) nounwind { ; ; X64-LABEL: length32_lt: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: jne .LBB51_4 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx +; X64-NEXT: movq 8(%rdi), %rax +; X64-NEXT: movq 8(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: jne .LBB51_4 ; X64-NEXT: # %bb.2: # %loadbb2 -; X64-NEXT: movq 16(%rdi), %rcx -; X64-NEXT: movq 16(%rsi), %rdx +; X64-NEXT: movq 16(%rdi), %rax +; X64-NEXT: movq 16(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: jne .LBB51_4 ; X64-NEXT: # %bb.3: # %loadbb3 -; X64-NEXT: movq 24(%rdi), %rcx -; X64-NEXT: movq 24(%rsi), %rdx +; X64-NEXT: movq 24(%rdi), %rax +; X64-NEXT: movq 24(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: je .LBB51_5 ; X64-NEXT: .LBB51_4: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; X64-NEXT: .LBB51_5: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB51_5: # %endblock ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq @@ -2638,38 +2679,40 @@ define i1 @length32_gt(i8* %x, i8* %y) nounwind { ; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: jne .LBB52_4 +; X64-NEXT: jne .LBB52_6 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movq 8(%rdi), %rax ; X64-NEXT: movq 8(%rsi), %rcx ; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: jne .LBB52_4 +; X64-NEXT: jne .LBB52_6 ; X64-NEXT: # %bb.2: # %loadbb2 ; X64-NEXT: movq 16(%rdi), %rax ; X64-NEXT: movq 16(%rsi), %rcx ; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: jne .LBB52_4 +; X64-NEXT: jne .LBB52_6 ; X64-NEXT: # %bb.3: # %loadbb3 ; X64-NEXT: movq 24(%rdi), %rax ; X64-NEXT: movq 24(%rsi), %rcx ; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: xorl %edx, %edx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: je .LBB52_5 -; X64-NEXT: .LBB52_4: # %res_block +; X64-NEXT: je .LBB52_4 +; X64-NEXT: .LBB52_6: # %res_block ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: setae %dl -; X64-NEXT: leal -1(%rdx,%rdx), %edx +; X64-NEXT: leal -1(%rdx,%rdx), %eax ; X64-NEXT: .LBB52_5: # %endblock -; X64-NEXT: testl %edx, %edx +; X64-NEXT: testl %eax, %eax ; X64-NEXT: setg %al ; X64-NEXT: retq +; X64-NEXT: .LBB52_4: +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: jmp .LBB52_5 %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 32) nounwind %cmp = icmp sgt i32 %call, 0 ret i1 %cmp diff --git a/llvm/test/CodeGen/X86/memcmp-optsize.ll b/llvm/test/CodeGen/X86/memcmp-optsize.ll index 594a4a68dac00f..03e70e6720972f 100644 --- a/llvm/test/CodeGen/X86/memcmp-optsize.ll +++ b/llvm/test/CodeGen/X86/memcmp-optsize.ll @@ -120,16 +120,16 @@ define i32 @length3(i8* %X, i8* %Y) nounwind optsize { ; X86-NEXT: rolw $8, %dx ; X86-NEXT: rolw $8, %si ; X86-NEXT: cmpw %si, %dx -; X86-NEXT: jne .LBB4_3 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movzbl 2(%eax), %eax -; X86-NEXT: movzbl 2(%ecx), %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: jmp .LBB4_2 -; X86-NEXT: .LBB4_3: # %res_block +; X86-NEXT: je .LBB4_1 +; X86-NEXT: # %bb.3: # %res_block ; X86-NEXT: setae %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: jmp .LBB4_2 +; X86-NEXT: .LBB4_1: # %loadbb1 +; X86-NEXT: movzbl 2(%eax), %eax +; X86-NEXT: movzbl 2(%ecx), %ecx +; X86-NEXT: subl %ecx, %eax ; X86-NEXT: .LBB4_2: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl @@ -141,16 +141,16 @@ define i32 @length3(i8* %X, i8* %Y) nounwind optsize { ; X64-NEXT: rolw $8, %ax ; X64-NEXT: rolw $8, %cx ; X64-NEXT: cmpw %cx, %ax -; X64-NEXT: jne .LBB4_3 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movzbl 2(%rdi), %eax -; X64-NEXT: movzbl 2(%rsi), %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB4_3: # %res_block +; X64-NEXT: je .LBB4_1 +; X64-NEXT: # %bb.3: # %res_block ; X64-NEXT: setae %al ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB4_1: # %loadbb1 +; X64-NEXT: movzbl 2(%rdi), %eax +; X64-NEXT: movzbl 2(%rsi), %ecx +; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind ret i32 %m @@ -265,16 +265,16 @@ define i32 @length5(i8* %X, i8* %Y) nounwind optsize { ; X86-NEXT: bswapl %edx ; X86-NEXT: bswapl %esi ; X86-NEXT: cmpl %esi, %edx -; X86-NEXT: jne .LBB9_3 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movzbl 4(%eax), %eax -; X86-NEXT: movzbl 4(%ecx), %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: jmp .LBB9_2 -; X86-NEXT: .LBB9_3: # %res_block +; X86-NEXT: je .LBB9_1 +; X86-NEXT: # %bb.3: # %res_block ; X86-NEXT: setae %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: jmp .LBB9_2 +; X86-NEXT: .LBB9_1: # %loadbb1 +; X86-NEXT: movzbl 4(%eax), %eax +; X86-NEXT: movzbl 4(%ecx), %ecx +; X86-NEXT: subl %ecx, %eax ; X86-NEXT: .LBB9_2: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl @@ -286,16 +286,16 @@ define i32 @length5(i8* %X, i8* %Y) nounwind optsize { ; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx ; X64-NEXT: cmpl %ecx, %eax -; X64-NEXT: jne .LBB9_3 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movzbl 4(%rdi), %eax -; X64-NEXT: movzbl 4(%rsi), %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB9_3: # %res_block +; X64-NEXT: je .LBB9_1 +; X64-NEXT: # %bb.3: # %res_block ; X64-NEXT: setae %al ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB9_1: # %loadbb1 +; X64-NEXT: movzbl 4(%rdi), %eax +; X64-NEXT: movzbl 4(%rsi), %ecx +; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind ret i32 %m @@ -334,30 +334,32 @@ define i32 @length8(i8* %X, i8* %Y) nounwind optsize { ; X86-LABEL: length8: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx +; X86-NEXT: movl (%esi), %eax +; X86-NEXT: movl (%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB11_2 +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: jne .LBB11_4 ; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movl 4(%esi), %ecx -; X86-NEXT: movl 4(%eax), %edx +; X86-NEXT: movl 4(%esi), %eax +; X86-NEXT: movl 4(%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: je .LBB11_3 -; X86-NEXT: .LBB11_2: # %res_block -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: setae %al -; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: je .LBB11_2 +; X86-NEXT: .LBB11_4: # %res_block +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: setae %dl +; X86-NEXT: leal -1(%edx,%edx), %eax ; X86-NEXT: .LBB11_3: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl +; X86-NEXT: .LBB11_2: +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: jmp .LBB11_3 ; ; X64-LABEL: length8: ; X64: # %bb.0: @@ -461,26 +463,27 @@ define i32 @length12(i8* %X, i8* %Y) nounwind optsize { ; ; X64-LABEL: length12: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB15_2 +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jne .LBB15_4 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movl 8(%rdi), %ecx -; X64-NEXT: movl 8(%rsi), %edx +; X64-NEXT: movl 8(%rdi), %eax +; X64-NEXT: movl 8(%rsi), %ecx +; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB15_3 -; X64-NEXT: .LBB15_2: # %res_block +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: je .LBB15_2 +; X64-NEXT: .LBB15_4: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB15_2: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB15_3: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind ret i32 %m @@ -501,26 +504,27 @@ define i32 @length16(i8* %X, i8* %Y) nounwind optsize { ; ; X64-LABEL: length16: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB16_2 +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jne .LBB16_4 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx +; X64-NEXT: movq 8(%rdi), %rax +; X64-NEXT: movq 8(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB16_3 -; X64-NEXT: .LBB16_2: # %res_block +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: je .LBB16_2 +; X64-NEXT: .LBB16_4: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB16_2: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB16_3: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 16) nounwind ret i32 %m diff --git a/llvm/test/CodeGen/X86/memcmp-pgso.ll b/llvm/test/CodeGen/X86/memcmp-pgso.ll index 75e9f5975d95c5..d9273a52c523a4 100644 --- a/llvm/test/CodeGen/X86/memcmp-pgso.ll +++ b/llvm/test/CodeGen/X86/memcmp-pgso.ll @@ -120,19 +120,19 @@ define i32 @length3(i8* %X, i8* %Y) nounwind !prof !14 { ; X86-NEXT: rolw $8, %dx ; X86-NEXT: rolw $8, %si ; X86-NEXT: cmpw %si, %dx -; X86-NEXT: jne .LBB4_3 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movzbl 2(%eax), %eax -; X86-NEXT: movzbl 2(%ecx), %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: jmp .LBB4_2 -; X86-NEXT: .LBB4_3: # %res_block +; X86-NEXT: je .LBB4_1 +; X86-NEXT: # %bb.3: # %res_block ; X86-NEXT: setae %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: leal -1(%eax,%eax), %eax ; X86-NEXT: .LBB4_2: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl +; X86-NEXT: .LBB4_1: # %loadbb1 +; X86-NEXT: movzbl 2(%eax), %eax +; X86-NEXT: movzbl 2(%ecx), %ecx +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: jmp .LBB4_2 ; ; X64-LABEL: length3: ; X64: # %bb.0: @@ -141,16 +141,16 @@ define i32 @length3(i8* %X, i8* %Y) nounwind !prof !14 { ; X64-NEXT: rolw $8, %ax ; X64-NEXT: rolw $8, %cx ; X64-NEXT: cmpw %cx, %ax -; X64-NEXT: jne .LBB4_3 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movzbl 2(%rdi), %eax -; X64-NEXT: movzbl 2(%rsi), %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB4_3: # %res_block +; X64-NEXT: je .LBB4_1 +; X64-NEXT: # %bb.3: # %res_block ; X64-NEXT: setae %al ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB4_1: # %loadbb1 +; X64-NEXT: movzbl 2(%rdi), %eax +; X64-NEXT: movzbl 2(%rsi), %ecx +; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind ret i32 %m @@ -265,19 +265,19 @@ define i32 @length5(i8* %X, i8* %Y) nounwind !prof !14 { ; X86-NEXT: bswapl %edx ; X86-NEXT: bswapl %esi ; X86-NEXT: cmpl %esi, %edx -; X86-NEXT: jne .LBB9_3 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movzbl 4(%eax), %eax -; X86-NEXT: movzbl 4(%ecx), %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: jmp .LBB9_2 -; X86-NEXT: .LBB9_3: # %res_block +; X86-NEXT: je .LBB9_1 +; X86-NEXT: # %bb.3: # %res_block ; X86-NEXT: setae %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: leal -1(%eax,%eax), %eax ; X86-NEXT: .LBB9_2: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl +; X86-NEXT: .LBB9_1: # %loadbb1 +; X86-NEXT: movzbl 4(%eax), %eax +; X86-NEXT: movzbl 4(%ecx), %ecx +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: jmp .LBB9_2 ; ; X64-LABEL: length5: ; X64: # %bb.0: @@ -286,16 +286,16 @@ define i32 @length5(i8* %X, i8* %Y) nounwind !prof !14 { ; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx ; X64-NEXT: cmpl %ecx, %eax -; X64-NEXT: jne .LBB9_3 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movzbl 4(%rdi), %eax -; X64-NEXT: movzbl 4(%rsi), %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB9_3: # %res_block +; X64-NEXT: je .LBB9_1 +; X64-NEXT: # %bb.3: # %res_block ; X64-NEXT: setae %al ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB9_1: # %loadbb1 +; X64-NEXT: movzbl 4(%rdi), %eax +; X64-NEXT: movzbl 4(%rsi), %ecx +; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind ret i32 %m @@ -334,30 +334,32 @@ define i32 @length8(i8* %X, i8* %Y) nounwind !prof !14 { ; X86-LABEL: length8: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB11_2 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movl 4(%esi), %ecx -; X86-NEXT: movl 4(%eax), %edx +; X86-NEXT: movl (%esi), %eax +; X86-NEXT: movl (%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: je .LBB11_3 -; X86-NEXT: .LBB11_2: # %res_block -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: setae %al -; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: je .LBB11_1 +; X86-NEXT: .LBB11_4: # %res_block +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: setae %dl +; X86-NEXT: leal -1(%edx,%edx), %eax ; X86-NEXT: .LBB11_3: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl +; X86-NEXT: .LBB11_1: # %loadbb1 +; X86-NEXT: movl 4(%esi), %eax +; X86-NEXT: movl 4(%edx), %ecx +; X86-NEXT: bswapl %eax +; X86-NEXT: bswapl %ecx +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: jne .LBB11_4 +; X86-NEXT: # %bb.2: +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: jmp .LBB11_3 ; ; X64-LABEL: length8: ; X64: # %bb.0: @@ -461,26 +463,27 @@ define i32 @length12(i8* %X, i8* %Y) nounwind !prof !14 { ; ; X64-LABEL: length12: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB15_2 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movl 8(%rdi), %ecx -; X64-NEXT: movl 8(%rsi), %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: je .LBB15_1 +; X64-NEXT: .LBB15_4: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB15_1: # %loadbb1 +; X64-NEXT: movl 8(%rdi), %eax +; X64-NEXT: movl 8(%rsi), %ecx +; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB15_3 -; X64-NEXT: .LBB15_2: # %res_block +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jne .LBB15_4 +; X64-NEXT: # %bb.2: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB15_3: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind ret i32 %m @@ -501,26 +504,27 @@ define i32 @length16(i8* %X, i8* %Y) nounwind !prof !14 { ; ; X64-LABEL: length16: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB16_2 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: je .LBB16_1 +; X64-NEXT: .LBB16_4: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB16_1: # %loadbb1 +; X64-NEXT: movq 8(%rdi), %rax +; X64-NEXT: movq 8(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jne .LBB16_4 +; X64-NEXT: # %bb.2: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB16_3 -; X64-NEXT: .LBB16_2: # %res_block -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB16_3: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 16) nounwind ret i32 %m diff --git a/llvm/test/CodeGen/X86/memcmp.ll b/llvm/test/CodeGen/X86/memcmp.ll index de604ded08d1aa..233183b7f9ad59 100644 --- a/llvm/test/CodeGen/X86/memcmp.ll +++ b/llvm/test/CodeGen/X86/memcmp.ll @@ -285,19 +285,19 @@ define i32 @length3(i8* %X, i8* %Y) nounwind { ; X86-NEXT: rolw $8, %dx ; X86-NEXT: rolw $8, %si ; X86-NEXT: cmpw %si, %dx -; X86-NEXT: jne .LBB11_3 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movzbl 2(%eax), %eax -; X86-NEXT: movzbl 2(%ecx), %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: popl %esi -; X86-NEXT: retl -; X86-NEXT: .LBB11_3: # %res_block +; X86-NEXT: je .LBB11_1 +; X86-NEXT: # %bb.3: # %res_block ; X86-NEXT: setae %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: leal -1(%eax,%eax), %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl +; X86-NEXT: .LBB11_1: # %loadbb1 +; X86-NEXT: movzbl 2(%eax), %eax +; X86-NEXT: movzbl 2(%ecx), %ecx +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl ; ; X64-LABEL: length3: ; X64: # %bb.0: @@ -306,16 +306,16 @@ define i32 @length3(i8* %X, i8* %Y) nounwind { ; X64-NEXT: rolw $8, %ax ; X64-NEXT: rolw $8, %cx ; X64-NEXT: cmpw %cx, %ax -; X64-NEXT: jne .LBB11_3 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movzbl 2(%rdi), %eax -; X64-NEXT: movzbl 2(%rsi), %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB11_3: # %res_block +; X64-NEXT: je .LBB11_1 +; X64-NEXT: # %bb.3: # %res_block ; X64-NEXT: setae %al ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB11_1: # %loadbb1 +; X64-NEXT: movzbl 2(%rdi), %eax +; X64-NEXT: movzbl 2(%rsi), %ecx +; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind ret i32 %m @@ -500,19 +500,19 @@ define i32 @length5(i8* %X, i8* %Y) nounwind { ; X86-NEXT: bswapl %edx ; X86-NEXT: bswapl %esi ; X86-NEXT: cmpl %esi, %edx -; X86-NEXT: jne .LBB18_3 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movzbl 4(%eax), %eax -; X86-NEXT: movzbl 4(%ecx), %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: popl %esi -; X86-NEXT: retl -; X86-NEXT: .LBB18_3: # %res_block +; X86-NEXT: je .LBB18_1 +; X86-NEXT: # %bb.3: # %res_block ; X86-NEXT: setae %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: leal -1(%eax,%eax), %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl +; X86-NEXT: .LBB18_1: # %loadbb1 +; X86-NEXT: movzbl 4(%eax), %eax +; X86-NEXT: movzbl 4(%ecx), %ecx +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl ; ; X64-LABEL: length5: ; X64: # %bb.0: @@ -521,16 +521,16 @@ define i32 @length5(i8* %X, i8* %Y) nounwind { ; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx ; X64-NEXT: cmpl %ecx, %eax -; X64-NEXT: jne .LBB18_3 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movzbl 4(%rdi), %eax -; X64-NEXT: movzbl 4(%rsi), %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB18_3: # %res_block +; X64-NEXT: je .LBB18_1 +; X64-NEXT: # %bb.3: # %res_block ; X64-NEXT: setae %al ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB18_1: # %loadbb1 +; X64-NEXT: movzbl 4(%rdi), %eax +; X64-NEXT: movzbl 4(%rsi), %ecx +; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind ret i32 %m @@ -576,16 +576,16 @@ define i1 @length5_lt(i8* %X, i8* %Y) nounwind { ; X86-NEXT: bswapl %edx ; X86-NEXT: bswapl %esi ; X86-NEXT: cmpl %esi, %edx -; X86-NEXT: jne .LBB20_3 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movzbl 4(%eax), %eax -; X86-NEXT: movzbl 4(%ecx), %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: jmp .LBB20_2 -; X86-NEXT: .LBB20_3: # %res_block +; X86-NEXT: je .LBB20_1 +; X86-NEXT: # %bb.3: # %res_block ; X86-NEXT: setae %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: jmp .LBB20_2 +; X86-NEXT: .LBB20_1: # %loadbb1 +; X86-NEXT: movzbl 4(%eax), %eax +; X86-NEXT: movzbl 4(%ecx), %ecx +; X86-NEXT: subl %ecx, %eax ; X86-NEXT: .LBB20_2: # %endblock ; X86-NEXT: shrl $31, %eax ; X86-NEXT: # kill: def $al killed $al killed $eax @@ -599,20 +599,20 @@ define i1 @length5_lt(i8* %X, i8* %Y) nounwind { ; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx ; X64-NEXT: cmpl %ecx, %eax -; X64-NEXT: jne .LBB20_3 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movzbl 4(%rdi), %eax -; X64-NEXT: movzbl 4(%rsi), %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: retq -; X64-NEXT: .LBB20_3: # %res_block +; X64-NEXT: je .LBB20_1 +; X64-NEXT: # %bb.3: # %res_block ; X64-NEXT: setae %al ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: leal -1(%rax,%rax), %eax ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; X64-NEXT: .LBB20_1: # %loadbb1 +; X64-NEXT: movzbl 4(%rdi), %eax +; X64-NEXT: movzbl 4(%rsi), %ecx +; X64-NEXT: subl %ecx, %eax +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind %c = icmp slt i32 %m, 0 @@ -623,53 +623,56 @@ define i32 @length7(i8* %X, i8* %Y) nounwind { ; X86-LABEL: length7: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx +; X86-NEXT: movl (%esi), %eax +; X86-NEXT: movl (%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB21_2 +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: jne .LBB21_4 ; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movl 3(%esi), %ecx -; X86-NEXT: movl 3(%eax), %edx +; X86-NEXT: movl 3(%esi), %eax +; X86-NEXT: movl 3(%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: je .LBB21_3 -; X86-NEXT: .LBB21_2: # %res_block +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: je .LBB21_2 +; X86-NEXT: .LBB21_4: # %res_block +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: setae %dl +; X86-NEXT: leal -1(%edx,%edx), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB21_2: ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: setae %al -; X86-NEXT: leal -1(%eax,%eax), %eax -; X86-NEXT: .LBB21_3: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: length7: ; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: movl (%rsi), %edx +; X64-NEXT: movl (%rdi), %eax +; X64-NEXT: movl (%rsi), %ecx +; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: jne .LBB21_2 +; X64-NEXT: cmpl %ecx, %eax +; X64-NEXT: jne .LBB21_4 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movl 3(%rdi), %ecx -; X64-NEXT: movl 3(%rsi), %edx +; X64-NEXT: movl 3(%rdi), %eax +; X64-NEXT: movl 3(%rsi), %ecx +; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: je .LBB21_3 -; X64-NEXT: .LBB21_2: # %res_block +; X64-NEXT: cmpl %ecx, %eax +; X64-NEXT: je .LBB21_2 +; X64-NEXT: .LBB21_4: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpl %ecx, %eax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB21_2: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB21_3: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 7) nounwind ret i32 %m @@ -679,55 +682,60 @@ define i1 @length7_lt(i8* %X, i8* %Y) nounwind { ; X86-LABEL: length7_lt: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx +; X86-NEXT: movl (%esi), %eax +; X86-NEXT: movl (%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB22_2 +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: jne .LBB22_4 ; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movl 3(%esi), %ecx -; X86-NEXT: movl 3(%eax), %edx +; X86-NEXT: movl 3(%esi), %eax +; X86-NEXT: movl 3(%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: je .LBB22_3 -; X86-NEXT: .LBB22_2: # %res_block -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: setae %al -; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: je .LBB22_2 +; X86-NEXT: .LBB22_4: # %res_block +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: setae %dl +; X86-NEXT: leal -1(%edx,%edx), %eax ; X86-NEXT: .LBB22_3: # %endblock ; X86-NEXT: shrl $31, %eax ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: popl %esi ; X86-NEXT: retl +; X86-NEXT: .LBB22_2: +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: jmp .LBB22_3 ; ; X64-LABEL: length7_lt: ; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: movl (%rsi), %edx +; X64-NEXT: movl (%rdi), %eax +; X64-NEXT: movl (%rsi), %ecx +; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: cmpl %edx, %ecx +; X64-NEXT: cmpl %ecx, %eax ; X64-NEXT: jne .LBB22_2 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movl 3(%rdi), %ecx -; X64-NEXT: movl 3(%rsi), %edx +; X64-NEXT: movl 3(%rdi), %eax +; X64-NEXT: movl 3(%rsi), %ecx +; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %edx, %ecx +; X64-NEXT: cmpl %ecx, %eax ; X64-NEXT: je .LBB22_3 ; X64-NEXT: .LBB22_2: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpl %ecx, %eax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; X64-NEXT: .LBB22_3: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB22_3: # %endblock ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq @@ -767,28 +775,30 @@ define i32 @length8(i8* %X, i8* %Y) nounwind { ; X86-LABEL: length8: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx +; X86-NEXT: movl (%esi), %eax +; X86-NEXT: movl (%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB24_2 +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: jne .LBB24_4 ; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movl 4(%esi), %ecx -; X86-NEXT: movl 4(%eax), %edx +; X86-NEXT: movl 4(%esi), %eax +; X86-NEXT: movl 4(%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: je .LBB24_3 -; X86-NEXT: .LBB24_2: # %res_block +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: je .LBB24_2 +; X86-NEXT: .LBB24_4: # %res_block +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: setae %dl +; X86-NEXT: leal -1(%edx,%edx), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB24_2: ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: setae %al -; X86-NEXT: leal -1(%eax,%eax), %eax -; X86-NEXT: .LBB24_3: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl ; @@ -977,26 +987,27 @@ define i32 @length12(i8* %X, i8* %Y) nounwind { ; ; X64-LABEL: length12: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB31_2 +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jne .LBB31_4 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movl 8(%rdi), %ecx -; X64-NEXT: movl 8(%rsi), %edx +; X64-NEXT: movl 8(%rdi), %eax +; X64-NEXT: movl 8(%rsi), %ecx +; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB31_3 -; X64-NEXT: .LBB31_2: # %res_block +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: je .LBB31_2 +; X64-NEXT: .LBB31_4: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB31_2: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB31_3: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind ret i32 %m @@ -1069,26 +1080,27 @@ define i32 @length15(i8* %X, i8* %Y) nounwind { ; ; X64-LABEL: length15: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB34_2 +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jne .LBB34_4 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 7(%rdi), %rcx -; X64-NEXT: movq 7(%rsi), %rdx +; X64-NEXT: movq 7(%rdi), %rax +; X64-NEXT: movq 7(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB34_3 -; X64-NEXT: .LBB34_2: # %res_block +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: je .LBB34_2 +; X64-NEXT: .LBB34_4: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB34_2: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB34_3: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 15) nounwind ret i32 %m @@ -1109,26 +1121,29 @@ define i1 @length15_lt(i8* %X, i8* %Y) nounwind { ; ; X64-LABEL: length15_lt: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: jne .LBB35_2 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 7(%rdi), %rcx -; X64-NEXT: movq 7(%rsi), %rdx +; X64-NEXT: movq 7(%rdi), %rax +; X64-NEXT: movq 7(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: je .LBB35_3 ; X64-NEXT: .LBB35_2: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; X64-NEXT: .LBB35_3: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB35_3: # %endblock ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq @@ -1256,26 +1271,27 @@ define i32 @length16(i8* %X, i8* %Y) nounwind { ; ; X64-LABEL: length16: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB39_2 +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jne .LBB39_4 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx +; X64-NEXT: movq 8(%rdi), %rax +; X64-NEXT: movq 8(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB39_3 -; X64-NEXT: .LBB39_2: # %res_block +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: je .LBB39_2 +; X64-NEXT: .LBB39_4: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB39_2: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB39_3: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 16) nounwind ret i32 %m @@ -1385,26 +1401,29 @@ define i1 @length16_lt(i8* %x, i8* %y) nounwind { ; ; X64-LABEL: length16_lt: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: jne .LBB41_2 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx +; X64-NEXT: movq 8(%rdi), %rax +; X64-NEXT: movq 8(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: je .LBB41_3 ; X64-NEXT: .LBB41_2: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; X64-NEXT: .LBB41_3: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB41_3: # %endblock ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq @@ -1433,24 +1452,26 @@ define i1 @length16_gt(i8* %x, i8* %y) nounwind { ; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: jne .LBB42_2 +; X64-NEXT: jne .LBB42_4 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movq 8(%rdi), %rax ; X64-NEXT: movq 8(%rsi), %rcx ; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: xorl %edx, %edx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: je .LBB42_3 -; X64-NEXT: .LBB42_2: # %res_block +; X64-NEXT: je .LBB42_2 +; X64-NEXT: .LBB42_4: # %res_block ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: setae %dl -; X64-NEXT: leal -1(%rdx,%rdx), %edx +; X64-NEXT: leal -1(%rdx,%rdx), %eax ; X64-NEXT: .LBB42_3: # %endblock -; X64-NEXT: testl %edx, %edx +; X64-NEXT: testl %eax, %eax ; X64-NEXT: setg %al ; X64-NEXT: retq +; X64-NEXT: .LBB42_2: +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: jmp .LBB42_3 %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) nounwind %cmp = icmp sgt i32 %call, 0 ret i1 %cmp diff --git a/llvm/test/CodeGen/X86/neg_cmp.ll b/llvm/test/CodeGen/X86/neg_cmp.ll index 47fa7fbb88f0d1..8cc1ed7ff1231e 100644 --- a/llvm/test/CodeGen/X86/neg_cmp.ll +++ b/llvm/test/CodeGen/X86/neg_cmp.ll @@ -10,11 +10,11 @@ define void @neg_cmp(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: neg_cmp: ; CHECK: # %bb.0: ; CHECK-NEXT: addl %esi, %edi -; CHECK-NEXT: jne .LBB0_1 -; CHECK-NEXT: # %bb.2: # %if.then -; CHECK-NEXT: jmp g # TAILCALL -; CHECK-NEXT: .LBB0_1: # %if.end +; CHECK-NEXT: je .LBB0_2 +; CHECK-NEXT: # %bb.1: # %if.end ; CHECK-NEXT: retq +; CHECK-NEXT: .LBB0_2: # %if.then +; CHECK-NEXT: jmp g # TAILCALL %sub = sub i32 0, %y %cmp = icmp eq i32 %x, %sub br i1 %cmp, label %if.then, label %if.end @@ -31,11 +31,11 @@ define void @neg_cmp_commuted(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: neg_cmp_commuted: ; CHECK: # %bb.0: ; CHECK-NEXT: addl %esi, %edi -; CHECK-NEXT: jne .LBB1_1 -; CHECK-NEXT: # %bb.2: # %if.then -; CHECK-NEXT: jmp g # TAILCALL -; CHECK-NEXT: .LBB1_1: # %if.end +; CHECK-NEXT: je .LBB1_2 +; CHECK-NEXT: # %bb.1: # %if.end ; CHECK-NEXT: retq +; CHECK-NEXT: .LBB1_2: # %if.then +; CHECK-NEXT: jmp g # TAILCALL %sub = sub i32 0, %y %cmp = icmp eq i32 %sub, %x br i1 %cmp, label %if.then, label %if.end diff --git a/llvm/test/CodeGen/X86/nobt.ll b/llvm/test/CodeGen/X86/nobt.ll index b994b7f950fd2c..5e3926dd07a806 100644 --- a/llvm/test/CodeGen/X86/nobt.ll +++ b/llvm/test/CodeGen/X86/nobt.ll @@ -9,10 +9,11 @@ define void @test2(i32 %x, i32 %n) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jne .LBB0_2 -; CHECK-NEXT: # %bb.1: # %bb +; CHECK-NEXT: je .LBB0_1 +; CHECK-NEXT: # %bb.2: # %UnifiedReturnBlock +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB0_1: # %bb ; CHECK-NEXT: calll foo -; CHECK-NEXT: .LBB0_2: # %UnifiedReturnBlock ; CHECK-NEXT: retl entry: %tmp1 = and i32 %x, 1 @@ -34,10 +35,11 @@ define void @test3(i32 %x, i32 %n) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jne .LBB1_2 -; CHECK-NEXT: # %bb.1: # %bb +; CHECK-NEXT: je .LBB1_1 +; CHECK-NEXT: # %bb.2: # %UnifiedReturnBlock +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB1_1: # %bb ; CHECK-NEXT: calll foo -; CHECK-NEXT: .LBB1_2: # %UnifiedReturnBlock ; CHECK-NEXT: retl entry: %tmp1 = and i32 %x, 1 diff --git a/llvm/test/CodeGen/X86/pr29170.ll b/llvm/test/CodeGen/X86/pr29170.ll index dfbad021d2871f..d5cfc51bf41650 100644 --- a/llvm/test/CodeGen/X86/pr29170.ll +++ b/llvm/test/CodeGen/X86/pr29170.ll @@ -11,8 +11,11 @@ define i32 @main() { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jne .LBB0_3 -; CHECK-NEXT: # %bb.1: # %go +; CHECK-NEXT: je .LBB0_1 +; CHECK-NEXT: .LBB0_3: # %if.else +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB0_1: # %go ; CHECK-NEXT: movl $-1, %ecx ; CHECK-NEXT: movsbl b, %edx ; CHECK-NEXT: notl %ecx @@ -23,9 +26,6 @@ define i32 @main() { ; CHECK-NEXT: # %bb.2: # %if.then ; CHECK-NEXT: movl $42, %eax ; CHECK-NEXT: retl -; CHECK-NEXT: .LBB0_3: # %if.else -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: retl entry: %true = icmp eq i32 0, 0 %const = bitcast i64 -4294967296 to i64 diff --git a/llvm/test/CodeGen/X86/wide-integer-cmp.ll b/llvm/test/CodeGen/X86/wide-integer-cmp.ll index a15d633d85381d..cc4857a16c5806 100644 --- a/llvm/test/CodeGen/X86/wide-integer-cmp.ll +++ b/llvm/test/CodeGen/X86/wide-integer-cmp.ll @@ -9,13 +9,13 @@ define i32 @branch_eq(i64 %a, i64 %b) { ; CHECK-NEXT: xorl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: xorl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: orl %ecx, %eax -; CHECK-NEXT: jne .LBB0_2 -; CHECK-NEXT: # %bb.1: # %bb1 -; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: retl -; CHECK-NEXT: .LBB0_2: # %bb2 +; CHECK-NEXT: je .LBB0_1 +; CHECK-NEXT: # %bb.2: # %bb2 ; CHECK-NEXT: movl $2, %eax ; CHECK-NEXT: retl +; CHECK-NEXT: .LBB0_1: # %bb1 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retl entry: %cmp = icmp eq i64 %a, %b br i1 %cmp, label %bb1, label %bb2 @@ -55,13 +55,13 @@ define i32 @branch_ule(i64 %a, i64 %b) { ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: cmpl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: jb .LBB2_2 -; CHECK-NEXT: # %bb.1: # %bb1 -; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: retl -; CHECK-NEXT: .LBB2_2: # %bb2 +; CHECK-NEXT: jae .LBB2_1 +; CHECK-NEXT: # %bb.2: # %bb2 ; CHECK-NEXT: movl $2, %eax ; CHECK-NEXT: retl +; CHECK-NEXT: .LBB2_1: # %bb1 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retl entry: %cmp = icmp ule i64 %a, %b br i1 %cmp, label %bb1, label %bb2 diff --git a/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll b/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll index 719d69d16a625d..ea89c76c4d93e0 100644 --- a/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll +++ b/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll @@ -16,8 +16,11 @@ ; ASM: .cv_loc 0 1 3 9 # t.c:3:9 ; ASM: movl %ecx, %eax ; ASM: cmpl %edx, %ecx -; ASM: jl [[EPILOGUE:LBB0_[0-9]+]] +; ASM: jge LBB0_1 +; ASM: retl $8 + +; ASM: LBB0_1: ; ASM: pushl %ebx ; ASM: .cv_fpo_pushreg %ebx ; ASM: pushl %edi @@ -31,9 +34,7 @@ ; ASM: popl %esi ; ASM: popl %edi ; ASM: popl %ebx -; ASM: [[EPILOGUE]]: # %return -; ASM: retl $8 -; ASM: Ltmp10: +; ASM: Ltmp11: ; ASM: .cv_fpo_endproc ; Note how RvaStart advances 7 bytes to skip the shrink-wrapped portion. @@ -41,7 +42,7 @@ ; OBJ: FrameData { ; OBJ: RvaStart: 0x0 ; OBJ: CodeSize: 0x36 -; OBJ: PrologSize: 0x9 +; OBJ: PrologSize: 0xC ; OBJ: FrameFunc [ ; OBJ-NEXT: $T0 .raSearch = ; OBJ-NEXT: $eip $T0 ^ = @@ -49,8 +50,8 @@ ; OBJ-NEXT: ] ; OBJ: } ; OBJ: FrameData { -; OBJ: RvaStart: 0x7 -; OBJ: CodeSize: 0x2F +; OBJ: RvaStart: 0xA +; OBJ: CodeSize: 0x2C ; OBJ: PrologSize: 0x2 ; OBJ: FrameFunc [ ; OBJ-NEXT: $T0 .raSearch = @@ -60,8 +61,8 @@ ; OBJ-NEXT: ] ; OBJ: } ; OBJ: FrameData { -; OBJ: RvaStart: 0x8 -; OBJ: CodeSize: 0x2E +; OBJ: RvaStart: 0xB +; OBJ: CodeSize: 0x2B ; OBJ: PrologSize: 0x1 ; OBJ: FrameFunc [ ; OBJ-NEXT: $T0 .raSearch = @@ -72,8 +73,8 @@ ; OBJ-NEXT: ] ; OBJ: } ; OBJ: FrameData { -; OBJ: RvaStart: 0x9 -; OBJ: CodeSize: 0x2D +; OBJ: RvaStart: 0xC +; OBJ: CodeSize: 0x2A ; OBJ: PrologSize: 0x0 ; OBJ: FrameFunc [ ; OBJ-NEXT: $T0 .raSearch = diff --git a/llvm/test/Transforms/PGOProfile/counter_promo_mexits.ll b/llvm/test/Transforms/PGOProfile/counter_promo_mexits.ll index bb799757a47cc2..f462eba5a3ab9b 100644 --- a/llvm/test/Transforms/PGOProfile/counter_promo_mexits.ll +++ b/llvm/test/Transforms/PGOProfile/counter_promo_mexits.ll @@ -1,27 +1,95 @@ -; RUN: opt < %s -pgo-instr-gen -instrprof -do-counter-promotion=true -speculative-counter-promotion-max-exiting=3 -S | FileCheck --check-prefix=PROMO %s -; RUN: opt < %s --passes=pgo-instr-gen,instrprof -do-counter-promotion=true -speculative-counter-promotion-max-exiting=3 -S | FileCheck --check-prefix=PROMO %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -pgo-instr-gen -instrprof -do-counter-promotion=true -speculative-counter-promotion-max-exiting=3 -S | FileCheck %s --check-prefix=PROMO +; RUN: opt < %s --passes=pgo-instr-gen,instrprof -do-counter-promotion=true -speculative-counter-promotion-max-exiting=3 -S | FileCheck %s --check-prefix=PROMO @g = common local_unnamed_addr global i32 0, align 4 define void @foo(i32 %arg) local_unnamed_addr { -; PROMO-LABEL: @foo +; PROMO-LABEL: @foo( +; PROMO-NEXT: bb: +; PROMO-NEXT: [[T:%.*]] = add nsw i32 [[ARG:%.*]], -1 +; PROMO-NEXT: br label [[BB1:%.*]] +; PROMO: bb1: +; PROMO-NEXT: [[PGOCOUNT213:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ [[PGOCOUNT212:%.*]], [[BB11:%.*]] ] +; PROMO-NEXT: [[PGOCOUNT19:%.*]] = phi i64 [ 0, [[BB]] ], [ [[PGOCOUNT18:%.*]], [[BB11]] ] +; PROMO-NEXT: [[PGOCOUNT6:%.*]] = phi i64 [ 0, [[BB]] ], [ [[PGOCOUNT5:%.*]], [[BB11]] ] +; PROMO-NEXT: [[T2:%.*]] = phi i32 [ 0, [[BB]] ], [ [[T12:%.*]], [[BB11]] ] +; PROMO-NEXT: [[T3:%.*]] = icmp sgt i32 [[T2]], [[ARG]] +; PROMO-NEXT: br i1 [[T3]], label [[BB7:%.*]], label [[BB4:%.*]] +; PROMO: bb4: +; PROMO-NEXT: [[TMP0:%.*]] = add i64 [[PGOCOUNT6]], 1 +; PROMO-NEXT: tail call void @bar(i32 1) +; PROMO-NEXT: [[T5:%.*]] = load i32, i32* @g, align 4 +; PROMO-NEXT: [[T6:%.*]] = icmp sgt i32 [[T5]], 100 +; PROMO-NEXT: br i1 [[T6]], label [[BB15_0:%.*]], label [[BB11]] +; PROMO: bb7: +; PROMO-NEXT: [[T8:%.*]] = icmp slt i32 [[T2]], [[T]] +; PROMO-NEXT: br i1 [[T8]], label [[BB9:%.*]], label [[BB10:%.*]] +; PROMO: bb9: +; PROMO-NEXT: [[TMP1:%.*]] = add i64 [[PGOCOUNT19]], 1 +; PROMO-NEXT: tail call void @bar(i32 2) +; PROMO-NEXT: br label [[BB11]] +; PROMO: bb10: +; PROMO-NEXT: [[TMP2:%.*]] = add i64 [[PGOCOUNT213]], 1 +; PROMO-NEXT: tail call void @bar(i32 3) +; PROMO-NEXT: br label [[BB11]] +; PROMO: bb11: +; PROMO-NEXT: [[PGOCOUNT212]] = phi i64 [ [[TMP2]], [[BB10]] ], [ [[PGOCOUNT213]], [[BB9]] ], [ [[PGOCOUNT213]], [[BB4]] ] +; PROMO-NEXT: [[PGOCOUNT18]] = phi i64 [ [[PGOCOUNT19]], [[BB10]] ], [ [[TMP1]], [[BB9]] ], [ [[PGOCOUNT19]], [[BB4]] ] +; PROMO-NEXT: [[PGOCOUNT5]] = phi i64 [ [[PGOCOUNT6]], [[BB10]] ], [ [[PGOCOUNT6]], [[BB9]] ], [ [[TMP0]], [[BB4]] ] +; PROMO-NEXT: [[T12]] = add nuw nsw i32 [[T2]], 1 +; PROMO-NEXT: [[T13:%.*]] = icmp slt i32 [[T2]], 99 +; PROMO-NEXT: br i1 [[T13]], label [[BB1]], label [[BB14:%.*]] +; PROMO: bb14: +; PROMO-NEXT: [[PGOCOUNT_PROMOTED7:%.*]] = load i64, i64* getelementptr inbounds ([5 x i64], [5 x i64]* @__profc_foo, i64 0, i64 1), align 4 +; PROMO-NEXT: [[TMP3:%.*]] = add i64 [[PGOCOUNT_PROMOTED7]], [[PGOCOUNT5]] +; PROMO-NEXT: store i64 [[TMP3]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @__profc_foo, i64 0, i64 1), align 4 +; PROMO-NEXT: [[PGOCOUNT_PROMOTED11:%.*]] = load i64, i64* getelementptr inbounds ([5 x i64], [5 x i64]* @__profc_foo, i64 0, i64 0), align 4 +; PROMO-NEXT: [[TMP4:%.*]] = add i64 [[PGOCOUNT_PROMOTED11]], [[PGOCOUNT18]] +; PROMO-NEXT: store i64 [[TMP4]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @__profc_foo, i64 0, i64 0), align 4 +; PROMO-NEXT: [[PGOCOUNT_PROMOTED15:%.*]] = load i64, i64* getelementptr inbounds ([5 x i64], [5 x i64]* @__profc_foo, i64 0, i64 2), align 4 +; PROMO-NEXT: [[TMP5:%.*]] = add i64 [[PGOCOUNT_PROMOTED15]], [[PGOCOUNT212]] +; PROMO-NEXT: store i64 [[TMP5]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @__profc_foo, i64 0, i64 2), align 4 +; PROMO-NEXT: [[PGOCOUNT3:%.*]] = load i64, i64* getelementptr inbounds ([5 x i64], [5 x i64]* @__profc_foo, i64 0, i64 3), align 4 +; PROMO-NEXT: [[TMP6:%.*]] = add i64 [[PGOCOUNT3]], 1 +; PROMO-NEXT: store i64 [[TMP6]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @__profc_foo, i64 0, i64 3), align 4 +; PROMO-NEXT: tail call void @bar(i32 0) +; PROMO-NEXT: br label [[BB15:%.*]] +; PROMO: bb15_0: +; PROMO-NEXT: [[PGOCOUNT_PROMOTED:%.*]] = load i64, i64* getelementptr inbounds ([5 x i64], [5 x i64]* @__profc_foo, i64 0, i64 1), align 4 +; PROMO-NEXT: [[TMP7:%.*]] = add i64 [[PGOCOUNT_PROMOTED]], [[TMP0]] +; PROMO-NEXT: store i64 [[TMP7]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @__profc_foo, i64 0, i64 1), align 4 +; PROMO-NEXT: [[PGOCOUNT_PROMOTED10:%.*]] = load i64, i64* getelementptr inbounds ([5 x i64], [5 x i64]* @__profc_foo, i64 0, i64 0), align 4 +; PROMO-NEXT: [[TMP8:%.*]] = add i64 [[PGOCOUNT_PROMOTED10]], [[PGOCOUNT19]] +; PROMO-NEXT: store i64 [[TMP8]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @__profc_foo, i64 0, i64 0), align 4 +; PROMO-NEXT: [[PGOCOUNT_PROMOTED14:%.*]] = load i64, i64* getelementptr inbounds ([5 x i64], [5 x i64]* @__profc_foo, i64 0, i64 2), align 4 +; PROMO-NEXT: [[TMP9:%.*]] = add i64 [[PGOCOUNT_PROMOTED14]], [[PGOCOUNT213]] +; PROMO-NEXT: store i64 [[TMP9]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @__profc_foo, i64 0, i64 2), align 4 +; PROMO-NEXT: [[PGOCOUNT4:%.*]] = load i64, i64* getelementptr inbounds ([5 x i64], [5 x i64]* @__profc_foo, i64 0, i64 4), align 4 +; PROMO-NEXT: [[TMP10:%.*]] = add i64 [[PGOCOUNT4]], 1 +; PROMO-NEXT: store i64 [[TMP10]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @__profc_foo, i64 0, i64 4), align 4 +; PROMO-NEXT: br label [[BB15]] +; PROMO: bb15: +; PROMO-NEXT: tail call void @bar(i32 1) +; PROMO-NEXT: ret void +; bb: - %tmp = add nsw i32 %arg, -1 + %t = add nsw i32 %arg, -1 br label %bb1 bb1: ; preds = %bb11, %bb - %tmp2 = phi i32 [ 0, %bb ], [ %tmp12, %bb11 ] - %tmp3 = icmp sgt i32 %tmp2, %arg - br i1 %tmp3, label %bb7, label %bb4 + %t2 = phi i32 [ 0, %bb ], [ %t12, %bb11 ] + %t3 = icmp sgt i32 %t2, %arg + br i1 %t3, label %bb7, label %bb4 bb4: ; preds = %bb1 tail call void @bar(i32 1) - %tmp5 = load i32, i32* @g, align 4 - %tmp6 = icmp sgt i32 %tmp5, 100 - br i1 %tmp6, label %bb15_0, label %bb11 + %t5 = load i32, i32* @g, align 4 + %t6 = icmp sgt i32 %t5, 100 + br i1 %t6, label %bb15_0, label %bb11 bb7: ; preds = %bb1 - %tmp8 = icmp slt i32 %tmp2, %tmp - br i1 %tmp8, label %bb9, label %bb10 + %t8 = icmp slt i32 %t2, %t + br i1 %t8, label %bb9, label %bb10 bb9: ; preds = %bb7 tail call void @bar(i32 2) @@ -32,45 +100,18 @@ bb10: ; preds = %bb7 br label %bb11 bb11: ; preds = %bb10, %bb9, %bb4 - %tmp12 = add nuw nsw i32 %tmp2, 1 - %tmp13 = icmp slt i32 %tmp2, 99 - br i1 %tmp13, label %bb1, label %bb14 + %t12 = add nuw nsw i32 %t2, 1 + %t13 = icmp slt i32 %t2, 99 + br i1 %t13, label %bb1, label %bb14 bb14: ; preds = %bb11 -; PROMO-LABEL: bb14: tail call void @bar(i32 0) br label %bb15 -; PROMO: %pgocount.promoted{{.*}} = load {{.*}} @__profc_foo{{.*}} 0) -; PROMO-NEXT: add -; PROMO-NEXT: store {{.*}}@__profc_foo{{.*}}0) -; PROMO-NEXT: %pgocount.promoted{{.*}} = load {{.*}} @__profc_foo{{.*}} 1) -; PROMO-NEXT: add -; PROMO-NEXT: store {{.*}}@__profc_foo{{.*}}1) -; PROMO-NEXT: %pgocount.promoted{{.*}} = load {{.*}} @__profc_foo{{.*}} 2) -; PROMO-NEXT: add -; PROMO-NEXT: store {{.*}}@__profc_foo{{.*}}2) -; PROMO-NEXT: %pgocount{{.*}} = load {{.*}} @__profc_foo{{.*}} 3) -; PROMO-NEXT: add -; PROMO-NEXT: store {{.*}}@__profc_foo{{.*}}3) + bb15_0: ; preds = %bb11 -; PROMO-LABEL: bb15_0: br label %bb15 -; PROMO: %pgocount.promoted{{.*}} = load {{.*}} @__profc_foo{{.*}} 0) -; PROMO-NEXT: add -; PROMO-NEXT: store {{.*}}@__profc_foo{{.*}}0) -; PROMO-NEXT: %pgocount.promoted{{.*}} = load {{.*}} @__profc_foo{{.*}} 1) -; PROMO-NEXT: add -; PROMO-NEXT: store {{.*}}@__profc_foo{{.*}}1) -; PROMO-NEXT: %pgocount.promoted{{.*}} = load {{.*}} @__profc_foo{{.*}} 2) -; PROMO-NEXT: add -; PROMO-NEXT: store {{.*}}@__profc_foo{{.*}}2) -; PROMO-NEXT: %pgocount{{.*}} = load {{.*}} @__profc_foo{{.*}} 4) -; PROMO-NEXT: add -; PROMO-NEXT: store {{.*}}@__profc_foo{{.*}}4) -; PROMO-NOT: @__profc_foo - bb15: ; preds = %bb14, %bb4 tail call void @bar(i32 1) diff --git a/llvm/test/Transforms/PGOProfile/landingpad.ll b/llvm/test/Transforms/PGOProfile/landingpad.ll index a0ca799fa8a43b..5191e118210fec 100644 --- a/llvm/test/Transforms/PGOProfile/landingpad.ll +++ b/llvm/test/Transforms/PGOProfile/landingpad.ll @@ -85,7 +85,8 @@ catch.dispatch: ; GEN: catch.dispatch: ; GEN-NOT: call void @llvm.instrprof.increment %tmp3 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) - %matches = icmp eq i32 %tmp2, %tmp3 + %c = icmp ne i32 %tmp2, %tmp3 + %matches = xor i1 %c, -1 br i1 %matches, label %catch, label %eh.resume ; USE: br i1 %matches, label %catch, label %eh.resume ; USE-SAME: !prof ![[BW_CATCH_DISPATCH:[0-9]+]] From 8d58eb11f9dabacc37f1f5e2cc83149b24868180 Mon Sep 17 00:00:00 2001 From: Sameer Arora Date: Wed, 29 Jul 2020 07:40:11 -0700 Subject: [PATCH 02/23] [llvm-libtool-darwin] Refactor ArchiveWriter Refactoring function `writeArchive` in ArchiveWriter. Added a new function `writeArchiveBuffer` that returns the archive in a memory buffer instead of writing it out to the disk. This refactor is necessary so as to allow `llvm-libtool-darwin` to write universal files containing archives. Reviewed by jhenderson, MaskRay, smeenai Differential Revision: https://reviews.llvm.org/D84858 --- llvm/include/llvm/Object/ArchiveWriter.h | 6 +++ llvm/lib/Object/ArchiveWriter.cpp | 49 +++++++++++++++++++----- 2 files changed, 45 insertions(+), 10 deletions(-) diff --git a/llvm/include/llvm/Object/ArchiveWriter.h b/llvm/include/llvm/Object/ArchiveWriter.h index 274ffd90c05aad..7eaf13e8fb2294 100644 --- a/llvm/include/llvm/Object/ArchiveWriter.h +++ b/llvm/include/llvm/Object/ArchiveWriter.h @@ -39,6 +39,12 @@ Error writeArchive(StringRef ArcName, ArrayRef NewMembers, bool WriteSymtab, object::Archive::Kind Kind, bool Deterministic, bool Thin, std::unique_ptr OldArchiveBuf = nullptr); + +// writeArchiveToBuffer is similar to writeArchive but returns the Archive in a +// buffer instead of writing it out to a file. +Expected> +writeArchiveToBuffer(ArrayRef NewMembers, bool WriteSymtab, + object::Archive::Kind Kind, bool Deterministic, bool Thin); } #endif diff --git a/llvm/lib/Object/ArchiveWriter.cpp b/llvm/lib/Object/ArchiveWriter.cpp index 6f92c547164ba1..ca8ffa7706b0ea 100644 --- a/llvm/lib/Object/ArchiveWriter.cpp +++ b/llvm/lib/Object/ArchiveWriter.cpp @@ -26,6 +26,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/Path.h" +#include "llvm/Support/SmallVectorMemoryBuffer.h" #include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/raw_ostream.h" @@ -552,10 +553,10 @@ Expected computeArchiveRelativePath(StringRef From, StringRef To) { return std::string(Relative.str()); } -Error writeArchive(StringRef ArcName, ArrayRef NewMembers, - bool WriteSymtab, object::Archive::Kind Kind, - bool Deterministic, bool Thin, - std::unique_ptr OldArchiveBuf) { +static Error writeArchiveToStream(raw_ostream &Out, + ArrayRef NewMembers, + bool WriteSymtab, object::Archive::Kind Kind, + bool Deterministic, bool Thin) { assert((!Thin || !isBSDLike(Kind)) && "Only the gnu format has a thin mode"); SmallString<0> SymNamesBuf; @@ -608,12 +609,6 @@ Error writeArchive(StringRef ArcName, ArrayRef NewMembers, } } - Expected Temp = - sys::fs::TempFile::create(ArcName + ".temp-archive-%%%%%%%.a"); - if (!Temp) - return Temp.takeError(); - - raw_fd_ostream Out(Temp->FD, false); if (Thin) Out << "!\n"; else @@ -626,6 +621,25 @@ Error writeArchive(StringRef ArcName, ArrayRef NewMembers, Out << M.Header << M.Data << M.Padding; Out.flush(); + return Error::success(); +} + +Error writeArchive(StringRef ArcName, ArrayRef NewMembers, + bool WriteSymtab, object::Archive::Kind Kind, + bool Deterministic, bool Thin, + std::unique_ptr OldArchiveBuf) { + Expected Temp = + sys::fs::TempFile::create(ArcName + ".temp-archive-%%%%%%%.a"); + if (!Temp) + return Temp.takeError(); + raw_fd_ostream Out(Temp->FD, false); + + if (Error E = writeArchiveToStream(Out, NewMembers, WriteSymtab, Kind, + Deterministic, Thin)) { + if (Error DiscardError = Temp->discard()) + return joinErrors(std::move(E), std::move(DiscardError)); + return E; + } // At this point, we no longer need whatever backing memory // was used to generate the NewMembers. On Windows, this buffer @@ -642,4 +656,19 @@ Error writeArchive(StringRef ArcName, ArrayRef NewMembers, return Temp->keep(ArcName); } +Expected> +writeArchiveToBuffer(ArrayRef NewMembers, bool WriteSymtab, + object::Archive::Kind Kind, bool Deterministic, + bool Thin) { + SmallVector ArchiveBufferVector; + raw_svector_ostream ArchiveStream(ArchiveBufferVector); + + if (Error E = writeArchiveToStream(ArchiveStream, NewMembers, WriteSymtab, + Kind, Deterministic, Thin)) + return std::move(E); + + return std::make_unique( + std::move(ArchiveBufferVector)); +} + } // namespace llvm From 05169af5cea2c3b9aa0f38354d0e81ddf6b7a3d9 Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Thu, 13 Aug 2020 14:04:43 -0400 Subject: [PATCH 03/23] [flang][openacc] Handle optional end directive in combined construct OpenACC combined construct can have an optional end directive. This patch handle this case in the parsing/unparsing with a canonicalization step. Unlike OmpEndLoopDirective, this doesn't need a special treatment in the pre-fir tree as there is no clause attached to a AccEndCombinedDirective. Reviewed By: klausler Differential Revision: https://reviews.llvm.org/D84481 --- flang/include/flang/Parser/parse-tree.h | 7 +- flang/lib/Parser/executable-parsers.cpp | 3 +- flang/lib/Parser/openacc-parsers.cpp | 14 +-- flang/lib/Parser/program-parsers.cpp | 8 +- flang/lib/Parser/type-parsers.h | 1 + flang/lib/Parser/unparse.cpp | 5 +- flang/lib/Semantics/canonicalize-acc.cpp | 62 ++++++++++++ flang/lib/Semantics/check-acc-structure.cpp | 12 ++- flang/test/Lower/pre-fir-tree05.f90 | 16 ++++ .../acc-canonicalization-validity.f90 | 95 +++++++++++++++++++ flang/test/Semantics/acc-clause-validity.f90 | 25 +++++ 11 files changed, 230 insertions(+), 18 deletions(-) create mode 100644 flang/test/Semantics/acc-canonicalization-validity.f90 diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 2fecac5118d844..695121f8395950 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -258,6 +258,7 @@ struct AssignStmt; struct AssignedGotoStmt; struct PauseStmt; struct OpenACCConstruct; +struct AccEndCombinedDirective; struct OpenACCDeclarativeConstruct; struct OpenMPConstruct; struct OpenMPDeclarativeConstruct; @@ -517,6 +518,7 @@ struct ExecutableConstruct { common::Indirection, common::Indirection, common::Indirection, common::Indirection, + common::Indirection, common::Indirection, common::Indirection> u; @@ -3970,6 +3972,7 @@ struct OpenACCStandaloneDeclarativeConstruct { struct AccBeginCombinedDirective { TUPLE_CLASS_BOILERPLATE(AccBeginCombinedDirective); + CharBlock source; std::tuple t; }; @@ -3981,7 +3984,9 @@ struct AccEndCombinedDirective { struct OpenACCCombinedConstruct { TUPLE_CLASS_BOILERPLATE(OpenACCCombinedConstruct); CharBlock source; - std::tuple, std::optional> t; }; diff --git a/flang/lib/Parser/executable-parsers.cpp b/flang/lib/Parser/executable-parsers.cpp index d6dd4688dbac1f..a0b5cf232abf7f 100644 --- a/flang/lib/Parser/executable-parsers.cpp +++ b/flang/lib/Parser/executable-parsers.cpp @@ -50,8 +50,9 @@ constexpr auto executableConstruct{ construct(indirect(whereConstruct)), construct(indirect(forallConstruct)), construct(indirect(ompEndLoopDirective)), - construct(indirect(openaccConstruct)), construct(indirect(openmpConstruct)), + construct(indirect(accEndCombinedDirective)), + construct(indirect(openaccConstruct)), construct(indirect(compilerDirective)))}; // R510 execution-part-construct -> diff --git a/flang/lib/Parser/openacc-parsers.cpp b/flang/lib/Parser/openacc-parsers.cpp index 0a61921c90874c..823fbaec0acef1 100644 --- a/flang/lib/Parser/openacc-parsers.cpp +++ b/flang/lib/Parser/openacc-parsers.cpp @@ -199,16 +199,9 @@ TYPE_PARSER(sourced( parenthesized(Parser{})))) // 2.11 Combined constructs -TYPE_PARSER(startAccLine >> construct(sourced( - "END"_tok >> Parser{}))) - TYPE_PARSER(construct( sourced(Parser{}), Parser{})) -TYPE_PARSER(construct( - Parser{} / endAccLine, block, - maybe(Parser{} / endAccLine))) - // 2.12 Atomic constructs TYPE_PARSER(construct(startAccLine >> "END ATOMIC"_tok)) @@ -281,4 +274,11 @@ TYPE_CONTEXT_PARSER("OpenACC construct"_en_US, construct(Parser{}), construct(Parser{}), construct(Parser{}))) + +TYPE_PARSER(startAccLine >> sourced(construct(sourced( + "END"_tok >> Parser{})))) + +TYPE_PARSER(construct( + sourced(Parser{} / endAccLine))) + } // namespace Fortran::parser diff --git a/flang/lib/Parser/program-parsers.cpp b/flang/lib/Parser/program-parsers.cpp index 1be1207c8626a8..278cc6fdb51a5c 100644 --- a/flang/lib/Parser/program-parsers.cpp +++ b/flang/lib/Parser/program-parsers.cpp @@ -76,10 +76,10 @@ TYPE_CONTEXT_PARSER("specification part"_en_US, // are in contexts that impose constraints on the kinds of statements that // are allowed, and so we have a variant production for declaration-construct // that implements those constraints. -constexpr auto execPartLookAhead{first(actionStmt >> ok, - ompEndLoopDirective >> ok, openaccConstruct >> ok, openmpConstruct >> ok, - "ASSOCIATE ("_tok, "BLOCK"_tok, "SELECT"_tok, "CHANGE TEAM"_sptok, - "CRITICAL"_tok, "DO"_tok, "IF ("_tok, "WHERE ("_tok, "FORALL ("_tok)}; +constexpr auto execPartLookAhead{ + first(actionStmt >> ok, openaccConstruct >> ok, openmpConstruct >> ok, + "ASSOCIATE ("_tok, "BLOCK"_tok, "SELECT"_tok, "CHANGE TEAM"_sptok, + "CRITICAL"_tok, "DO"_tok, "IF ("_tok, "WHERE ("_tok, "FORALL ("_tok)}; constexpr auto declErrorRecovery{ stmtErrorRecoveryStart >> !execPartLookAhead >> skipStmtErrorRecovery}; constexpr auto misplacedSpecificationStmt{Parser{} >> diff --git a/flang/lib/Parser/type-parsers.h b/flang/lib/Parser/type-parsers.h index a2f38e90db2120..d6269cbdc7151d 100644 --- a/flang/lib/Parser/type-parsers.h +++ b/flang/lib/Parser/type-parsers.h @@ -131,6 +131,7 @@ constexpr Parser entryStmt; // R1541 constexpr Parser containsStmt; // R1543 constexpr Parser compilerDirective; constexpr Parser openaccConstruct; +constexpr Parser accEndCombinedDirective; constexpr Parser openaccDeclarativeConstruct; constexpr Parser openmpConstruct; constexpr Parser openmpDeclarativeConstruct; diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index 1093cb21709c91..85ed1a2bd60b9d 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2104,10 +2104,9 @@ class UnparseVisitor { Walk(std::get(x.t)); Put("\n"); EndOpenACC(); - Walk(std::get(x.t), ""); + Walk(std::get>(x.t)); BeginOpenACC(); - Word("!$ACC END "); - Walk(std::get>(x.t)); + Walk("!$ACC END ", std::get>(x.t)); Put("\n"); EndOpenACC(); } diff --git a/flang/lib/Semantics/canonicalize-acc.cpp b/flang/lib/Semantics/canonicalize-acc.cpp index 4c4d716fe7defe..8cf04910ba6e13 100644 --- a/flang/lib/Semantics/canonicalize-acc.cpp +++ b/flang/lib/Semantics/canonicalize-acc.cpp @@ -16,6 +16,9 @@ // 1. move structured DoConstruct into // OpenACCLoopConstruct. Compilation will not proceed in case of errors // after this pass. +// 2. move structured DoConstruct into OpenACCCombinedConstruct. Move +// AccEndCombinedConstruct into OpenACCCombinedConstruct if present. +// Compilation will not proceed in case of errors after this pass. namespace Fortran::semantics { using namespace parser::literals; @@ -30,6 +33,16 @@ class CanonicalizationOfAcc { for (auto it{block.begin()}; it != block.end(); ++it) { if (auto *accLoop{parser::Unwrap(*it)}) { RewriteOpenACCLoopConstruct(*accLoop, block, it); + } else if (auto *accCombined{ + parser::Unwrap(*it)}) { + RewriteOpenACCCombinedConstruct(*accCombined, block, it); + } else if (auto *endDir{ + parser::Unwrap(*it)}) { + // Unmatched AccEndCombinedDirective + messages_.Say(endDir->v.source, + "The %s directive must follow the DO loop associated with the " + "loop construct"_err_en_US, + parser::ToUpperCaseLetters(endDir->v.source.ToString())); } } // Block list } @@ -73,6 +86,55 @@ class CanonicalizationOfAcc { parser::ToUpperCaseLetters(dir.source.ToString())); } + void RewriteOpenACCCombinedConstruct(parser::OpenACCCombinedConstruct &x, + parser::Block &block, parser::Block::iterator it) { + // Check the sequence of DoConstruct in the same iteration + // + // Original: + // ExecutableConstruct -> OpenACCConstruct -> OpenACCCombinedConstruct + // ACCBeginCombinedDirective + // ExecutableConstruct -> DoConstruct + // ExecutableConstruct -> AccEndCombinedDirective (if available) + // + // After rewriting: + // ExecutableConstruct -> OpenACCConstruct -> OpenACCCombinedConstruct + // ACCBeginCombinedDirective + // DoConstruct + // AccEndCombinedDirective (if available) + parser::Block::iterator nextIt; + auto &beginDir{std::get(x.t)}; + auto &dir{std::get(beginDir.t)}; + + nextIt = it; + if (++nextIt != block.end()) { + if (auto *doCons{parser::Unwrap(*nextIt)}) { + if (doCons->GetLoopControl()) { + // move DoConstruct + std::get>(x.t) = + std::move(*doCons); + nextIt = block.erase(nextIt); + // try to match AccEndCombinedDirective + if (nextIt != block.end()) { + if (auto *endDir{ + parser::Unwrap(*nextIt)}) { + std::get>(x.t) = + std::move(*endDir); + block.erase(nextIt); + } + } + } else { + messages_.Say(dir.source, + "DO loop after the %s directive must have loop control"_err_en_US, + parser::ToUpperCaseLetters(dir.source.ToString())); + } + return; // found do-loop + } + } + messages_.Say(dir.source, + "A DO loop must follow the %s directive"_err_en_US, + parser::ToUpperCaseLetters(dir.source.ToString())); + } + parser::Messages &messages_; }; diff --git a/flang/lib/Semantics/check-acc-structure.cpp b/flang/lib/Semantics/check-acc-structure.cpp index 311a7c4d3328e8..4dcf5ed27f7058 100644 --- a/flang/lib/Semantics/check-acc-structure.cpp +++ b/flang/lib/Semantics/check-acc-structure.cpp @@ -156,9 +156,17 @@ void AccStructureChecker::Leave( } void AccStructureChecker::Enter(const parser::OpenACCCombinedConstruct &x) { - const auto &beginBlockDir{std::get(x.t)}; + const auto &beginCombinedDir{ + std::get(x.t)}; const auto &combinedDir{ - std::get(beginBlockDir.t)}; + std::get(beginCombinedDir.t)}; + + // check matching, End directive is optional + if (const auto &endCombinedDir{ + std::get>(x.t)}) { + CheckMatching(combinedDir, endCombinedDir->v); + } + PushContextAndClauseSets(combinedDir.source, combinedDir.v); } diff --git a/flang/test/Lower/pre-fir-tree05.f90 b/flang/test/Lower/pre-fir-tree05.f90 index f635785e3274b4..98af5c2de94431 100644 --- a/flang/test/Lower/pre-fir-tree05.f90 +++ b/flang/test/Lower/pre-fir-tree05.f90 @@ -31,3 +31,19 @@ subroutine foo() end subroutine ! CHECK-NEXT: EndSubroutine foo +! CHECK: Subroutine foo +subroutine foo2() + ! CHECK-NEXT: <> + !$acc parallel loop + ! CHECK-NEXT: <> + ! CHECK-NEXT: NonLabelDoStmt + do i=1,5 + ! CHECK-NEXT: EndDoStmt + ! CHECK-NEXT: <> + end do + !$acc end parallel loop + ! CHECK-NEXT: <> + ! CHECK-NEXT: ContinueStmt +end subroutine +! CHECK-NEXT: EndSubroutine foo2 + diff --git a/flang/test/Semantics/acc-canonicalization-validity.f90 b/flang/test/Semantics/acc-canonicalization-validity.f90 new file mode 100644 index 00000000000000..06c63ed25ddbb7 --- /dev/null +++ b/flang/test/Semantics/acc-canonicalization-validity.f90 @@ -0,0 +1,95 @@ +! RUN: %S/test_errors.sh %s %t %f18 -fopenacc + +! Check OpenACC canonalization validity for the construct defined below: +! 2.9 Loop +! 2.11 Parallel Loop +! 2.11 Kernels Loop +! 2.11 Serial Loop + +program openacc_clause_validity + + implicit none + + integer :: i, j + integer :: N = 256 + real(8) :: a(256) + + !ERROR: A DO loop must follow the LOOP directive + !$acc loop + i = 1 + + !ERROR: DO loop after the LOOP directive must have loop control + !$acc loop + do + end do + + !ERROR: A DO loop must follow the PARALLEL LOOP directive + !$acc parallel loop + i = 1 + + !ERROR: A DO loop must follow the KERNELS LOOP directive + !$acc kernels loop + i = 1 + + !ERROR: A DO loop must follow the SERIAL LOOP directive + !$acc serial loop + i = 1 + + !ERROR: The END PARALLEL LOOP directive must follow the DO loop associated with the loop construct + !$acc end parallel loop + + !ERROR: The END KERNELS LOOP directive must follow the DO loop associated with the loop construct + !$acc end kernels loop + + !ERROR: The END SERIAL LOOP directive must follow the DO loop associated with the loop construct + !$acc end serial loop + + !$acc parallel loop + do i = 1, N + a(i) = 3.14 + end do + + !$acc kernels loop + do i = 1, N + a(i) = 3.14 + end do + + !$acc serial loop + do i = 1, N + a(i) = 3.14 + end do + + !$acc parallel loop + do i = 1, N + a(i) = 3.14 + end do + !$acc end parallel loop + + !$acc kernels loop + do i = 1, N + a(i) = 3.14 + end do + !$acc end kernels loop + + !$acc serial loop + do i = 1, N + a(i) = 3.14 + end do + !$acc end serial loop + + !ERROR: DO loop after the PARALLEL LOOP directive must have loop control + !$acc parallel loop + do + end do + + !ERROR: DO loop after the KERNELS LOOP directive must have loop control + !$acc kernels loop + do + end do + + !ERROR: DO loop after the SERIAL LOOP directive must have loop control + !$acc serial loop + do + end do + +end program openacc_clause_validity diff --git a/flang/test/Semantics/acc-clause-validity.f90 b/flang/test/Semantics/acc-clause-validity.f90 index 75a0efa87d3529..207ca2ec72cdd1 100644 --- a/flang/test/Semantics/acc-clause-validity.f90 +++ b/flang/test/Semantics/acc-clause-validity.f90 @@ -5,6 +5,10 @@ ! 2.5.1 Parallel ! 2.5.2 Kernels ! 2.5.3 Serial +! 2.9 Loop +! 2.13 Declare +! 2.14.3 Set +! 2.14.4 Update ! 2.15.1 Routine ! 2.11 Parallel Loop ! 2.11 Kernels Loop @@ -162,6 +166,27 @@ program openacc_clause_validity end do !$acc end serial loop + !$acc parallel loop + do i = 1, N + a(i) = 3.14 + end do + !ERROR: Unmatched END KERNELS LOOP directive + !$acc end kernels loop + + !$acc kernels loop + do i = 1, N + a(i) = 3.14 + end do + !ERROR: Unmatched END SERIAL LOOP directive + !$acc end serial loop + + !$acc serial loop + do i = 1, N + a(i) = 3.14 + end do + !ERROR: Unmatched END PARALLEL LOOP directive + !$acc end parallel loop + contains subroutine sub1(a) From bd2853f7998d41e0d16c00e2b043e35b688eaf00 Mon Sep 17 00:00:00 2001 From: Sameer Arora Date: Tue, 28 Jul 2020 09:50:51 -0700 Subject: [PATCH 04/23] [llvm-libtool-darwin] Add support for -arch_only Add support for -arch_only option for llvm-libtool-darwin. This diff also adds support for accepting universal files as input and flattening them to create the required static library. Supports input universal files contaning both Mach-O object files or archives. Differences from cctools' libtool: - `-arch_only` can be specified multiple times - archives containing universal files are considered invalid (libtool allows such archives) Reviewed by jhenderson, smeenai Differential Revision: https://reviews.llvm.org/D84770 --- .../docs/CommandGuide/llvm-libtool-darwin.rst | 43 +-- .../cpu-subtype-matching.test | 269 ++++++++++++++++++ .../universal-file-flattening.test | 240 ++++++++++++++++ llvm/tools/llvm-libtool-darwin/CMakeLists.txt | 1 + llvm/tools/llvm-libtool-darwin/LLVMBuild.txt | 2 +- .../llvm-libtool-darwin.cpp | 201 ++++++++++--- 6 files changed, 703 insertions(+), 53 deletions(-) create mode 100644 llvm/test/tools/llvm-libtool-darwin/cpu-subtype-matching.test create mode 100644 llvm/test/tools/llvm-libtool-darwin/universal-file-flattening.test diff --git a/llvm/docs/CommandGuide/llvm-libtool-darwin.rst b/llvm/docs/CommandGuide/llvm-libtool-darwin.rst index 2944aa6ee37f0c..a5383c03da5330 100644 --- a/llvm/docs/CommandGuide/llvm-libtool-darwin.rst +++ b/llvm/docs/CommandGuide/llvm-libtool-darwin.rst @@ -21,30 +21,34 @@ OPTIONS -------- :program:`llvm-libtool-darwin` supports the following options: -.. option:: -h, -help - - Show help and usage for this command. +.. option:: -arch_only -.. option:: -help-list - - Show help and usage for this command without grouping the options - into categories. + Build a static library only for the specified `` and ignore all + other architectures in the files. .. option:: -color Use colors in output. -.. option:: -version +.. option:: -D - Display the version of this program. + Use zero for timestamps and UIDs/GIDs. This is set by default. -.. option:: -D +.. option:: -filelist - Use zero for timestamps and UIDs/GIDs. This is set by default. + Read input file names from ``. File names are specified in `` + one per line, separated only by newlines. Whitespace on a line is assumed + to be part of the filename. If the directory name, `dirname`, is also + specified then it is prepended to each file name in the ``. -.. option:: -U +.. option:: -h, -help + + Show help and usage for this command. - Use actual timestamps and UIDs/GIDs. +.. option:: -help-list + + Show help and usage for this command without grouping the options + into categories. .. option:: -o @@ -52,14 +56,15 @@ OPTIONS .. option:: -static - Produces a static library from the input files. + Produces a static library from the input files. -.. option:: -filelist +.. option:: -U + + Use actual timestamps and UIDs/GIDs. - Read input file names from ``. File names are specified in `` - one per line, separated only by newlines. Whitespace on a line is assumed - to be part of the filename. If the directory name, `dirname`, is also - specified then it is prepended to each file name in the ``. +.. option:: -version + + Display the version of this program. EXIT STATUS ----------- diff --git a/llvm/test/tools/llvm-libtool-darwin/cpu-subtype-matching.test b/llvm/test/tools/llvm-libtool-darwin/cpu-subtype-matching.test new file mode 100644 index 00000000000000..4789361ba69315 --- /dev/null +++ b/llvm/test/tools/llvm-libtool-darwin/cpu-subtype-matching.test @@ -0,0 +1,269 @@ +## This test checks that the CPU subtype matching logic is handled correctly. + +# RUN: yaml2obj %s --docnum=1 -o %t.armv6 +# RUN: yaml2obj %s --docnum=2 -o %t.armv7 + +# RUN: llvm-libtool-darwin -static -o %t.lib %t.armv6 %t.armv7 -arch_only armv7 + +## Check that only armv7 binary is present: +# RUN: llvm-ar t %t.lib | \ +# RUN: FileCheck %s --check-prefix=ARM-NAMES --implicit-check-not={{.}} -DPREFIX=%basename_t.tmp + +# ARM-NAMES: [[PREFIX]].armv7 + +## Check that only armv7 symbol is present: +# RUN: llvm-nm --print-armap %t.lib | \ +# RUN: FileCheck %s --check-prefix=ARM-SYMBOLS -DPREFIX=%basename_t.tmp --match-full-lines + +# ARM-SYMBOLS: Archive map +# ARM-SYMBOLS-NEXT: _armv7 in [[PREFIX]].armv7 +# ARM-SYMBOLS-EMPTY: + +## armv6.yaml +## CPUTYPE: CPU_TYPE_ARM +## CPUSUBTYPE: CPU_SUBTYPE_ARM_V6 +--- !mach-o +FileHeader: + magic: 0xFEEDFACE + cputype: 0x0000000C + cpusubtype: 0x00000006 + filetype: 0x00000001 + ncmds: 2 + sizeofcmds: 148 + flags: 0x00002000 +LoadCommands: + - cmd: LC_SEGMENT + cmdsize: 124 + segname: '' + vmaddr: 0 + vmsize: 24 + fileoff: 296 + filesize: 24 + maxprot: 7 + initprot: 7 + nsects: 1 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x0000000000000000 + size: 24 + offset: 0x00000128 + align: 2 + reloff: 0x00000000 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: 04D04DE208009FE500008DE504D08DE21EFF2FE100000000 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 328 + nsyms: 1 + stroff: 340 + strsize: 8 +LinkEditData: + NameList: + - n_strx: 1 + n_type: 0x0F + n_sect: 1 + n_desc: 0 + n_value: 0 + StringTable: + - '' + - _armv6 + - '' +... + +## armv7.yaml +## CPUTYPE: CPU_TYPE_ARM +## CPUSUBTYPE: CPU_SUBTYPE_ARM_V7 +--- !mach-o +FileHeader: + magic: 0xFEEDFACE + cputype: 0x0000000C + cpusubtype: 0x00000009 + filetype: 0x00000001 + ncmds: 2 + sizeofcmds: 148 + flags: 0x00002000 +LoadCommands: + - cmd: LC_SEGMENT + cmdsize: 124 + segname: '' + vmaddr: 0 + vmsize: 10 + fileoff: 280 + filesize: 10 + maxprot: 7 + initprot: 7 + nsects: 1 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x0000000000000000 + size: 10 + offset: 0x00000118 + align: 1 + reloff: 0x00000000 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: 81B00020009001B07047 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 292 + nsyms: 1 + stroff: 304 + strsize: 8 +LinkEditData: + NameList: + - n_strx: 1 + n_type: 0x0F + n_sect: 1 + n_desc: 8 + n_value: 0 + StringTable: + - '' + - _armv7 + - '' +... + +# RUN: yaml2obj %s --docnum=3 -o %t.x86_64 +# RUN: yaml2obj %s --docnum=4 -o %t.x86_64_h + +# RUN: llvm-libtool-darwin -static -o %t.lib %t.x86_64 %t.x86_64_h -arch_only x86_64 + +## Check that only x86_64 binary is present: +# RUN: llvm-ar t %t.lib | \ +# RUN: FileCheck %s --check-prefix=X86-NAMES --implicit-check-not={{.}} -DPREFIX=%basename_t.tmp + +# X86-NAMES: [[PREFIX]].x86_64 + +## Check that only x86_64 symbol is present: +# RUN: llvm-nm --print-armap %t.lib | \ +# RUN: FileCheck %s --check-prefix=X86-SYMBOLS -DPREFIX=%basename_t.tmp --match-full-lines + +# X86-SYMBOLS: Archive map +# X86-SYMBOLS-NEXT: _x86_64 in [[PREFIX]].x86_64 +# X86-SYMBOLS-EMPTY: + +## x86_64.yaml +## CPUTYPE: CPU_TYPE_X86_64 +## CPUSUBTYPE: CPU_SUBTYPE_X86_64_ALL +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x01000007 + cpusubtype: 0x00000003 + filetype: 0x00000001 + ncmds: 2 + sizeofcmds: 176 + flags: 0x00002000 + reserved: 0x00000000 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: '' + vmaddr: 0 + vmsize: 15 + fileoff: 312 + filesize: 15 + maxprot: 7 + initprot: 7 + nsects: 1 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x0000000000000000 + size: 15 + offset: 0x00000138 + align: 4 + reloff: 0x00000000 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: 554889E531C0C745FC000000005DC3 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 328 + nsyms: 1 + stroff: 344 + strsize: 8 +LinkEditData: + NameList: + - n_strx: 1 + n_type: 0x0F + n_sect: 1 + n_desc: 0 + n_value: 0 + StringTable: + - '' + - _x86_64 + - '' +... + +## x86_64h.yaml +## CPUTYPE: CPU_TYPE_X86_64 +## CPUSUBTYPE: CPU_SUBTYPE_X86_64_H +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x01000007 + cpusubtype: 0x00000008 + filetype: 0x00000001 + ncmds: 2 + sizeofcmds: 176 + flags: 0x00002000 + reserved: 0x00000000 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: '' + vmaddr: 0 + vmsize: 15 + fileoff: 312 + filesize: 15 + maxprot: 7 + initprot: 7 + nsects: 1 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x0000000000000000 + size: 15 + offset: 0x00000138 + align: 4 + reloff: 0x00000000 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: 554889E531C0C745FC000000005DC3 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 328 + nsyms: 1 + stroff: 344 + strsize: 8 +LinkEditData: + NameList: + - n_strx: 1 + n_type: 0x0F + n_sect: 1 + n_desc: 0 + n_value: 0 + StringTable: + - '' + - _x86_64_h + - '' +... diff --git a/llvm/test/tools/llvm-libtool-darwin/universal-file-flattening.test b/llvm/test/tools/llvm-libtool-darwin/universal-file-flattening.test new file mode 100644 index 00000000000000..cd1dfc99858f21 --- /dev/null +++ b/llvm/test/tools/llvm-libtool-darwin/universal-file-flattening.test @@ -0,0 +1,240 @@ +## This test checks that a universal file is flattened correctly. + +# RUN: yaml2obj %s -o %t-universal.o +# RUN: yaml2obj %S/Inputs/input1.yaml -o %t-input1.o +# RUN: yaml2obj %S/Inputs/input2.yaml -o %t-input2.o + +# RUN: llvm-libtool-darwin -static -o %t.lib %t-universal.o -arch_only arm64 + +## Check that the binary is present: +# RUN: llvm-ar t %t.lib | \ +# RUN: FileCheck %s --check-prefix=CHECK-NAMES --implicit-check-not={{.}} -DPREFIX=%basename_t.tmp + +# CHECK-NAMES: [[PREFIX]]-universal.o + +## Check that symbols are present: +# RUN: llvm-nm --print-armap %t.lib | \ +# RUN: FileCheck %s --check-prefix=CHECK-SYMBOLS -DPREFIX=%basename_t.tmp --match-full-lines + +# CHECK-SYMBOLS: Archive map +# CHECK-SYMBOLS-NEXT: _arm64 in [[PREFIX]]-universal.o +# CHECK-SYMBOLS-EMPTY: + +## Check that the output archive is in Darwin format: +# RUN: llvm-objdump --macho --archive-headers %t.lib | \ +# RUN: FileCheck %s --check-prefix=FORMAT -DPREFIX=%basename_t.tmp -DARCHIVE=%t.lib + +# FORMAT: Archive : [[ARCHIVE]] +# FORMAT-NEXT: __.SYMDEF +# FORMAT-NEXT: [[PREFIX]]-universal.o +# FORMAT-NOT: {{.}} + +## Passing both a universal file and an object file: +# RUN: llvm-libtool-darwin -static -o %t.lib %t-universal.o %t-input1.o -arch_only x86_64 +# RUN: llvm-ar t %t.lib | \ +# RUN: FileCheck %s --check-prefix=BOTH-NAMES --implicit-check-not={{.}} -DPREFIX=%basename_t.tmp +# RUN: llvm-nm --print-armap %t.lib | \ +# RUN: FileCheck %s --check-prefix=BOTH-SYMBOLS -DPREFIX=%basename_t.tmp --match-full-lines + +# BOTH-NAMES: [[PREFIX]]-universal.o +# BOTH-NAMES-NEXT: [[PREFIX]]-input1.o + +# BOTH-SYMBOLS: Archive map +# BOTH-SYMBOLS-NEXT: _x86_64 in [[PREFIX]]-universal.o +# BOTH-SYMBOLS-NEXT: _symbol1 in [[PREFIX]]-input1.o +# BOTH-SYMBOLS-EMPTY: + +## Passing both a universal file and an object file but filtering out the object file: +# RUN: llvm-libtool-darwin -static -o %t.lib %t-universal.o %t-input1.o -arch_only arm64 +# RUN: llvm-ar t %t.lib | \ +# RUN: FileCheck %s --check-prefix=CHECK-NAMES --implicit-check-not={{.}} -DPREFIX=%basename_t.tmp +# RUN: llvm-nm --print-armap %t.lib | \ +# RUN: FileCheck %s --check-prefix=CHECK-SYMBOLS -DPREFIX=%basename_t.tmp --match-full-lines + +## Universal file containing an archive: +# RUN: rm -f %t.ar +# RUN: llvm-ar cr %t.ar %t-input1.o %t-input2.o +# RUN: llvm-lipo %t.ar -create -output %t-fat-with-archive.o +# RUN: llvm-libtool-darwin -static -o %t.lib %t-fat-with-archive.o -arch_only x86_64 +# RUN: llvm-ar t %t.lib | \ +# RUN: FileCheck %s --check-prefix=ARCHIVE-NAMES --implicit-check-not={{.}} -DPREFIX=%basename_t.tmp +# RUN: llvm-nm --print-armap %t.lib | \ +# RUN: FileCheck %s --check-prefix=ARCHIVE-SYMBOLS -DPREFIX=%basename_t.tmp --match-full-lines + +# ARCHIVE-NAMES: [[PREFIX]]-input1.o +# ARCHIVE-NAMES-NEXT: [[PREFIX]]-input2.o + +# ARCHIVE-SYMBOLS: Archive map +# ARCHIVE-SYMBOLS-NEXT: _symbol1 in [[PREFIX]]-input1.o +# ARCHIVE-SYMBOLS-NEXT: _symbol2 in [[PREFIX]]-input2.o +# ARCHIVE-SYMBOLS-EMPTY: + +## Allow arch_only to be specified more than once (pick the last one): +# RUN: llvm-libtool-darwin -static -o %t.lib %t-universal.o -arch_only arm64 -arch_only x86_64 +# RUN: llvm-ar t %t.lib | \ +# RUN: FileCheck %s --check-prefix=DOUBLE-NAMES --implicit-check-not={{.}} -DPREFIX=%basename_t.tmp +# RUN: llvm-nm --print-armap %t.lib | \ +# RUN: FileCheck %s --check-prefix=DOUBLE-SYMBOLS -DPREFIX=%basename_t.tmp --match-full-lines + +# DOUBLE-NAMES: [[PREFIX]]-universal.o + +# DOUBLE-SYMBOLS: Archive map +# DOUBLE-SYMBOLS-NEXT: _x86_64 in [[PREFIX]]-universal.o +# DOUBLE-SYMBOLS-EMPTY: + +## Invalid architecture: +# RUN: not llvm-libtool-darwin -static -o %t.lib %t-universal.o -arch_only arch101 2>&1 | \ +# RUN: FileCheck %s --check-prefix=INVALID-ARCH + +# INVALID-ARCH: invalid architecture 'arch101': valid architecture names are + +## Empty architecture: +# RUN: not llvm-libtool-darwin -static -o %t.lib %t-universal.o -arch_only "" 2>&1 | \ +# RUN: FileCheck %s --check-prefix=EMPTY-ARCH + +# EMPTY-ARCH: invalid architecture '': valid architecture names are + +## Missing architecture: +# RUN: not llvm-libtool-darwin -static -o %t.lib %t-universal.o -arch_only ppc 2>&1 | \ +# RUN: FileCheck %s --check-prefix=MISSING-ARCH + +# MISSING-ARCH: error: no library created (no object files in input files matching -arch_only ppc) + +## arch_only missing argument: +# RUN: not llvm-libtool-darwin -static -o %t.lib %t-universal.o -arch_only 2>&1 | \ +# RUN: FileCheck %s --check-prefix=REQUIRE-ARCH + +# REQUIRE-ARCH: for the --arch_only option: requires a value! + +## x86_64-arm64-universal.yaml +--- !fat-mach-o +FatHeader: + magic: 0xCAFEBABE + nfat_arch: 2 +FatArchs: + - cputype: 0x01000007 + cpusubtype: 0x00000003 + offset: 0x0000000000001000 + size: 352 + align: 12 + - cputype: 0x0100000C + cpusubtype: 0x00000000 + offset: 0x0000000000004000 + size: 384 + align: 14 +Slices: + - !mach-o + FileHeader: + magic: 0xFEEDFACF + cputype: 0x01000007 + cpusubtype: 0x00000003 + filetype: 0x00000001 + ncmds: 2 + sizeofcmds: 176 + flags: 0x00002000 + reserved: 0x00000000 + LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: '' + vmaddr: 0 + vmsize: 15 + fileoff: 312 + filesize: 15 + maxprot: 7 + initprot: 7 + nsects: 1 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x0000000000000000 + size: 15 + offset: 0x00000138 + align: 4 + reloff: 0x00000000 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: 554889E531C0C745FC000000005DC3 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 328 + nsyms: 1 + stroff: 344 + strsize: 8 + LinkEditData: + NameList: + - n_strx: 1 + n_type: 0x0F + n_sect: 1 + n_desc: 0 + n_value: 0 + StringTable: + - '' + - _x86_64 + - '' + - !mach-o + FileHeader: + magic: 0xFEEDFACF + cputype: 0x0100000C + cpusubtype: 0x00000000 + filetype: 0x00000001 + ncmds: 2 + sizeofcmds: 176 + flags: 0x00002000 + reserved: 0x00000000 + LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: '' + vmaddr: 0 + vmsize: 24 + fileoff: 312 + filesize: 24 + maxprot: 7 + initprot: 7 + nsects: 1 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x0000000000000000 + size: 24 + offset: 0x00000138 + align: 2 + reloff: 0x00000000 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: FF4300D1FF0F00B908008052E00308AAFF430091C0035FD6 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 336 + nsyms: 2 + stroff: 368 + strsize: 16 + LinkEditData: + NameList: + - n_strx: 7 + n_type: 0x0E + n_sect: 1 + n_desc: 0 + n_value: 0 + - n_strx: 1 + n_type: 0x0F + n_sect: 1 + n_desc: 0 + n_value: 0 + StringTable: + - '' + - _arm64 + - ltmp0 + - '' + - '' + - '' +... diff --git a/llvm/tools/llvm-libtool-darwin/CMakeLists.txt b/llvm/tools/llvm-libtool-darwin/CMakeLists.txt index eb83fa1a3ee935..8e2421f1f3bf74 100644 --- a/llvm/tools/llvm-libtool-darwin/CMakeLists.txt +++ b/llvm/tools/llvm-libtool-darwin/CMakeLists.txt @@ -2,6 +2,7 @@ set(LLVM_LINK_COMPONENTS BinaryFormat Object Support + TextAPI ) add_llvm_tool(llvm-libtool-darwin diff --git a/llvm/tools/llvm-libtool-darwin/LLVMBuild.txt b/llvm/tools/llvm-libtool-darwin/LLVMBuild.txt index 3c557a3aaf61fd..a31a3a1400f5d3 100644 --- a/llvm/tools/llvm-libtool-darwin/LLVMBuild.txt +++ b/llvm/tools/llvm-libtool-darwin/LLVMBuild.txt @@ -17,4 +17,4 @@ type = Tool name = llvm-libtool-darwin parent = Tools -required_libraries = BinaryFormat Object Support +required_libraries = BinaryFormat Object Support TextAPI diff --git a/llvm/tools/llvm-libtool-darwin/llvm-libtool-darwin.cpp b/llvm/tools/llvm-libtool-darwin/llvm-libtool-darwin.cpp index 871a8036dab07b..5e0c356985db15 100644 --- a/llvm/tools/llvm-libtool-darwin/llvm-libtool-darwin.cpp +++ b/llvm/tools/llvm-libtool-darwin/llvm-libtool-darwin.cpp @@ -13,11 +13,13 @@ #include "llvm/BinaryFormat/Magic.h" #include "llvm/Object/ArchiveWriter.h" #include "llvm/Object/MachO.h" +#include "llvm/Object/MachOUniversal.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/InitLLVM.h" #include "llvm/Support/LineIterator.h" #include "llvm/Support/WithColor.h" +#include "llvm/TextAPI/MachO/Architecture.h" using namespace llvm; using namespace llvm::object; @@ -33,6 +35,10 @@ static cl::list InputFiles(cl::Positional, cl::ZeroOrMore, cl::cat(LibtoolCategory)); +static cl::opt ArchType( + "arch_only", cl::desc("Specify architecture type for output library"), + cl::value_desc("arch_type"), cl::ZeroOrMore, cl::cat(LibtoolCategory)); + enum class Operation { Static }; static cl::opt LibraryOperation( @@ -92,7 +98,51 @@ static Error processFileList() { return Error::success(); } -static Error verifyMachOObject(const NewArchiveMember &Member) { +static Error validateArchitectureName(StringRef ArchitectureName) { + if (!MachOObjectFile::isValidArch(ArchitectureName)) { + std::string Buf; + raw_string_ostream OS(Buf); + for (StringRef Arch : MachOObjectFile::getValidArchs()) + OS << Arch << " "; + + return createStringError( + std::errc::invalid_argument, + "invalid architecture '%s': valid architecture names are %s", + ArchitectureName.str().c_str(), OS.str().c_str()); + } + return Error::success(); +} + +// Check that a file's architecture [FileCPUType, FileCPUSubtype] +// matches the architecture specified under -arch_only flag. +static bool acceptFileArch(uint32_t FileCPUType, uint32_t FileCPUSubtype) { + uint32_t ArchCPUType, ArchCPUSubtype; + std::tie(ArchCPUType, ArchCPUSubtype) = MachO::getCPUTypeFromArchitecture( + MachO::getArchitectureFromName(ArchType)); + + if (ArchCPUType != FileCPUType) + return false; + + switch (ArchCPUType) { + case MachO::CPU_TYPE_ARM: + case MachO::CPU_TYPE_ARM64_32: + case MachO::CPU_TYPE_X86_64: + return ArchCPUSubtype == FileCPUSubtype; + + case MachO::CPU_TYPE_ARM64: + if (ArchCPUSubtype == MachO::CPU_SUBTYPE_ARM64_ALL) + return FileCPUSubtype == MachO::CPU_SUBTYPE_ARM64_ALL || + FileCPUSubtype == MachO::CPU_SUBTYPE_ARM64_V8; + else + return ArchCPUSubtype == FileCPUSubtype; + + default: + return true; + } +} + +static Error verifyAndAddMachOObject(std::vector &Members, + NewArchiveMember Member) { auto MBRef = Member.Buf->getMemBufferRef(); Expected> ObjOrErr = object::ObjectFile::createObjectFile(MBRef); @@ -107,6 +157,18 @@ static Error verifyMachOObject(const NewArchiveMember &Member) { "'%s': format not supported", Member.MemberName.data()); + auto *O = dyn_cast(ObjOrErr->get()); + uint32_t FileCPUType, FileCPUSubtype; + std::tie(FileCPUType, FileCPUSubtype) = MachO::getCPUTypeFromArchitecture( + MachO::getArchitectureFromName(O->getArchTriple().getArchName())); + + // If -arch_only is specified then skip this file if it doesn't match + // the architecture specified. + if (!ArchType.empty() && !acceptFileArch(FileCPUType, FileCPUSubtype)) { + return Error::success(); + } + + Members.push_back(std::move(Member)); return Error::success(); } @@ -117,18 +179,94 @@ static Error addChildMember(std::vector &Members, if (!NMOrErr) return NMOrErr.takeError(); - // Verify that Member is a Mach-O object file. - if (Error E = verifyMachOObject(*NMOrErr)) + if (Error E = verifyAndAddMachOObject(Members, std::move(*NMOrErr))) return E; - Members.push_back(std::move(*NMOrErr)); + return Error::success(); +} + +static Error processArchive(std::vector &Members, + object::Archive &Lib, StringRef FileName, + const Config &C) { + Error Err = Error::success(); + for (const object::Archive::Child &Child : Lib.children(Err)) + if (Error E = addChildMember(Members, Child, C)) + return createFileError(FileName, std::move(E)); + if (Err) + return createFileError(FileName, std::move(Err)); + return Error::success(); } static Error -addMember(std::vector &Members, StringRef FileName, - std::vector> &ArchiveBuffers, - const Config &C) { +addArchiveMembers(std::vector &Members, + std::vector> &ArchiveBuffers, + NewArchiveMember NM, StringRef FileName, const Config &C) { + Expected> LibOrErr = + object::Archive::create(NM.Buf->getMemBufferRef()); + if (!LibOrErr) + return createFileError(FileName, LibOrErr.takeError()); + + if (Error E = processArchive(Members, **LibOrErr, FileName, C)) + return E; + + // Update vector ArchiveBuffers with the MemoryBuffers to transfer + // ownership. + ArchiveBuffers.push_back(std::move(NM.Buf)); + return Error::success(); +} + +static Error addUniversalMembers( + std::vector &Members, + std::vector> &UniversalBuffers, + NewArchiveMember NM, StringRef FileName, const Config &C) { + Expected> BinaryOrErr = + MachOUniversalBinary::create(NM.Buf->getMemBufferRef()); + if (!BinaryOrErr) + return createFileError(FileName, BinaryOrErr.takeError()); + + auto *UO = BinaryOrErr->get(); + for (const MachOUniversalBinary::ObjectForArch &O : UO->objects()) { + + Expected> MachOObjOrErr = + O.getAsObjectFile(); + if (MachOObjOrErr) { + NewArchiveMember NewMember = + NewArchiveMember(MachOObjOrErr->get()->getMemoryBufferRef()); + NewMember.MemberName = sys::path::filename(NewMember.MemberName); + + if (Error E = verifyAndAddMachOObject(Members, std::move(NewMember))) + return E; + continue; + } + + Expected> ArchiveOrError = O.getAsArchive(); + if (ArchiveOrError) { + // A universal file member can either be a MachOObjectFile or an Archive. + // In case we can successfully cast the member as an Archive, it is safe + // to throw away the error generated due to casting the object as a + // MachOObjectFile. + consumeError(MachOObjOrErr.takeError()); + + if (Error E = processArchive(Members, **ArchiveOrError, FileName, C)) + return E; + continue; + } + + Error CombinedError = + joinErrors(ArchiveOrError.takeError(), MachOObjOrErr.takeError()); + return createFileError(FileName, std::move(CombinedError)); + } + + // Update vector UniversalBuffers with the MemoryBuffers to transfer + // ownership. + UniversalBuffers.push_back(std::move(NM.Buf)); + return Error::success(); +} + +static Error addMember(std::vector &Members, + std::vector> &FileBuffers, + StringRef FileName, const Config &C) { Expected NMOrErr = NewArchiveMember::getFile(FileName, C.Deterministic); if (!NMOrErr) @@ -137,43 +275,36 @@ addMember(std::vector &Members, StringRef FileName, // For regular archives, use the basename of the object path for the member // name. NMOrErr->MemberName = sys::path::filename(NMOrErr->MemberName); + file_magic Magic = identify_magic(NMOrErr->Buf->getBuffer()); // Flatten archives. - if (identify_magic(NMOrErr->Buf->getBuffer()) == file_magic::archive) { - Expected> LibOrErr = - object::Archive::create(NMOrErr->Buf->getMemBufferRef()); - if (!LibOrErr) - return createFileError(FileName, LibOrErr.takeError()); - object::Archive &Lib = **LibOrErr; - - Error Err = Error::success(); - for (const object::Archive::Child &Child : Lib.children(Err)) - if (Error E = addChildMember(Members, Child, C)) - return createFileError(FileName, std::move(E)); - if (Err) - return createFileError(FileName, std::move(Err)); - - // Update vector ArchiveBuffers with the MemoryBuffers to transfer - // ownership. - ArchiveBuffers.push_back(std::move(NMOrErr->Buf)); - return Error::success(); - } + if (Magic == file_magic::archive) + return addArchiveMembers(Members, FileBuffers, std::move(*NMOrErr), + FileName, C); - // Verify that Member is a Mach-O object file. - if (Error E = verifyMachOObject(*NMOrErr)) - return E; + // Flatten universal files. + if (Magic == file_magic::macho_universal_binary) + return addUniversalMembers(Members, FileBuffers, std::move(*NMOrErr), + FileName, C); - Members.push_back(std::move(*NMOrErr)); + if (Error E = verifyAndAddMachOObject(Members, std::move(*NMOrErr))) + return E; return Error::success(); } static Error createStaticLibrary(const Config &C) { std::vector NewMembers; - std::vector> ArchiveBuffers; - for (StringRef Member : InputFiles) - if (Error E = addMember(NewMembers, Member, ArchiveBuffers, C)) + std::vector> FileBuffers; + for (StringRef FileName : InputFiles) + if (Error E = addMember(NewMembers, FileBuffers, FileName, C)) return E; + if (NewMembers.empty() && !ArchType.empty()) + return createStringError(std::errc::invalid_argument, + "no library created (no object files in input " + "files matching -arch_only %s)", + ArchType.c_str()); + if (Error E = writeArchive(OutputFile, NewMembers, /*WriteSymtab=*/true, @@ -201,6 +332,10 @@ static Expected parseCommandLine(int Argc, char **Argv) { return createStringError(std::errc::invalid_argument, "no input files specified"); + if (ArchType.getNumOccurrences()) + if (Error E = validateArchitectureName(ArchType)) + return std::move(E); + return C; } From adaadbfeac98ab9d5ce34b8bb2ceedddc5dc1fd4 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Wed, 12 Aug 2020 17:43:28 -0700 Subject: [PATCH 05/23] [JITLink][MachO] Return an error when MachO TLV relocations are encountered. MachO TLV relocations aren't supported yet. Error out rather than falling through to llvm_unreachable. --- llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp index 134b01f0f6560c..a70b0dcd8f8574 100644 --- a/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp @@ -339,6 +339,9 @@ class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder { assert(TargetSymbol && "No target symbol from parsePairRelocation?"); break; } + case PCRel32TLV: + return make_error( + "MachO TLV relocations not yet supported"); default: llvm_unreachable("Special relocation kind should not appear in " "mach-o file"); From e137b550587a85b0d9c9c539edc79de0122b6946 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Wed, 12 Aug 2020 20:44:05 -0700 Subject: [PATCH 06/23] [llvm-jitlink] Don't demote unreferenced definitions in -harness mode. Demoting unreferenced externals is unsafe if multiple interdependent test objects are used, including objects loaded from archives. --- llvm/tools/llvm-jitlink/llvm-jitlink.cpp | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp index 8e651d903a3b72..49424bf9774607 100644 --- a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp +++ b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp @@ -187,7 +187,7 @@ static Error applyHarnessPromotions(Session &S, LinkGraph &G) { // If this graph is part of the test then promote any symbols referenced by // the harness to default scope, remove all symbols that clash with harness - // definitions, demote all other definitions. + // definitions. std::vector DefinitionsToRemove; for (auto *Sym : G.defined_symbols()) { @@ -219,10 +219,6 @@ static Error applyHarnessPromotions(Session &S, LinkGraph &G) { } else if (S.HarnessDefinitions.count(Sym->getName())) { LLVM_DEBUG(dbgs() << " Externalizing " << Sym->getName() << "\n"); DefinitionsToRemove.push_back(Sym); - } else { - LLVM_DEBUG(dbgs() << " Demoting " << Sym->getName() << "\n"); - Sym->setScope(Scope::Local); - Sym->setLive(false); } } @@ -521,7 +517,8 @@ Error LLVMJITLinkObjectLinkingLayer::add(JITDylib &JD, return SymFlagsOrErr.takeError(); // Skip symbols not defined in this object file. - if (*SymFlagsOrErr & object::BasicSymbolRef::SF_Undefined) + if ((*SymFlagsOrErr & object::BasicSymbolRef::SF_Undefined) || + !(*SymFlagsOrErr & object::BasicSymbolRef::SF_Global)) continue; auto Name = Sym.getName(); @@ -551,10 +548,8 @@ Error LLVMJITLinkObjectLinkingLayer::add(JITDylib &JD, *SymFlags &= ~JITSymbolFlags::Exported; } else if (S.HarnessExternals.count(*Name)) { *SymFlags |= JITSymbolFlags::Exported; - } else { - // Skip symbols that aren't in the HarnessExternals set. + } else if (S.HarnessDefinitions.count(*Name)) continue; - } auto InternedName = S.ES.intern(*Name); SymbolFlags[InternedName] = std::move(*SymFlags); From 2f7adf5ee37934ee5769276644fcafbc9d4dcda3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1vid=20Bolvansk=C3=BD?= Date: Thu, 13 Aug 2020 20:19:21 +0200 Subject: [PATCH 07/23] [Diagnostics] Skip var decl of structs for -Wstring-concatenation --- clang/lib/Sema/SemaDecl.cpp | 5 +++-- clang/test/Sema/string-concat.c | 6 ++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index fee748bf9f9da3..ab14963372109e 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -12886,7 +12886,8 @@ void Sema::CheckCompleteVariableDeclaration(VarDecl *var) { AttributeCommonInfo::AS_Pragma)); } - if (var->hasInit() && isa(var->getInit())) { + if (!var->getType()->isStructureType() && var->hasInit() && + isa(var->getInit())) { const auto *ILE = cast(var->getInit()); unsigned NumInits = ILE->getNumInits(); if (NumInits > 2) @@ -12927,7 +12928,7 @@ void Sema::CheckCompleteVariableDeclaration(VarDecl *var) { Diag(SL->getBeginLoc(), diag::note_concatenated_string_literal_silence); } - // Warn just once. + // In any case, stop now. break; } } diff --git a/clang/test/Sema/string-concat.c b/clang/test/Sema/string-concat.c index b6bae9c95b0b09..63abf100c020f0 100644 --- a/clang/test/Sema/string-concat.c +++ b/clang/test/Sema/string-concat.c @@ -148,6 +148,12 @@ const A not_warn6 = A{"", ""}; #endif +static A not_warn7 = {"", + + "" + "", + ""}; + // Do not warn when all the elements in the initializer are concatenated together. const char *all_elems_in_init_concatenated[] = {"a" "b" "c"}; From 3944d3df4f062db1e1fb1deab24e4c40bd5c8095 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1vid=20Bolvansk=C3=BD?= Date: Thu, 13 Aug 2020 20:21:19 +0200 Subject: [PATCH 08/23] [Tests] Removed debug copy command --- compiler-rt/test/profile/Linux/counter_promo_for.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-rt/test/profile/Linux/counter_promo_for.c b/compiler-rt/test/profile/Linux/counter_promo_for.c index 7cab70b08773bc..464c97cb7dd3d8 100644 --- a/compiler-rt/test/profile/Linux/counter_promo_for.c +++ b/compiler-rt/test/profile/Linux/counter_promo_for.c @@ -2,7 +2,7 @@ // RUN: rm -fr %t.nopromo.prof // RUN: %clang_pgogen=%t.promo.prof/ -o %t.promo.gen -O2 %s // RUN: %clang_pgogen=%t.promo.prof/ -o %t.promo.gen.ll -emit-llvm -S -O2 %s -// RUN: cp %t.promo.gen.ll /tmp/d.txt ; cat %t.promo.gen.ll | FileCheck --check-prefix=PROMO %s +// RUN: cat %t.promo.gen.ll | FileCheck --check-prefix=PROMO %s // RUN: %run %t.promo.gen // RUN: llvm-profdata merge -o %t.promo.profdata %t.promo.prof/ // RUN: llvm-profdata show --counts --all-functions %t.promo.profdata > %t.promo.dump From 1a8c9cd1d96e680a3c519e9b3295ba9c4a34736d Mon Sep 17 00:00:00 2001 From: Aditya Kumar <1894981+hiraditya@users.noreply.github.com> Date: Thu, 13 Aug 2020 00:06:22 -0700 Subject: [PATCH 09/23] Fix PR45442: Bail out when MemorySSA information is not available Reviewers: sebpop, uabelho, fhahn Reviewed by: fhahn Differential Revision: https://reviews.llvm.org/D85881 --- llvm/lib/Transforms/Scalar/GVNHoist.cpp | 11 ++++------- llvm/test/Transforms/GVNHoist/pr45442.ll | 14 +++++++------- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/GVNHoist.cpp b/llvm/lib/Transforms/Scalar/GVNHoist.cpp index 07f40de6a1f2f5..74c4a480098a31 100644 --- a/llvm/lib/Transforms/Scalar/GVNHoist.cpp +++ b/llvm/lib/Transforms/Scalar/GVNHoist.cpp @@ -521,10 +521,6 @@ class GVNHoist { if (NewPt == OldPt) return true; - // MemoryUseDef information is not available, bail out. - if (!U) - return false; - const BasicBlock *NewBB = NewPt->getParent(); const BasicBlock *OldBB = OldPt->getParent(); const BasicBlock *UBB = U->getBlock(); @@ -609,9 +605,10 @@ class GVNHoist { if (safeToHoistScalar(BB, Insn->getParent(), NumBBsOnAllPaths)) Safe.push_back(CHI); } else { - MemoryUseOrDef *UD = MSSA->getMemoryAccess(Insn); - if (safeToHoistLdSt(BB->getTerminator(), Insn, UD, K, NumBBsOnAllPaths)) - Safe.push_back(CHI); + auto *T = BB->getTerminator(); + if (MemoryUseOrDef *UD = MSSA->getMemoryAccess(Insn)) + if (safeToHoistLdSt(T, Insn, UD, K, NumBBsOnAllPaths)) + Safe.push_back(CHI); } } } diff --git a/llvm/test/Transforms/GVNHoist/pr45442.ll b/llvm/test/Transforms/GVNHoist/pr45442.ll index db3ecb2a8d878c..fe6a8be01507df 100644 --- a/llvm/test/Transforms/GVNHoist/pr45442.ll +++ b/llvm/test/Transforms/GVNHoist/pr45442.ll @@ -1,32 +1,32 @@ ; RUN: opt < %s -gvn-hoist -S | FileCheck %s ; gvn-hoist shouldn't crash in this case. -; CHECK-LABEL: @func() +; CHECK-LABEL: @func(i1 %b) ; CHECK: entry: ; CHECK-NEXT: br i1 ; CHECK: bb1: -; CHECK-NEXT: unreachable +; CHECK-NEXT: ret void ; CHECK: bb2: ; CHECK-NEXT: call ; CHECK-NEXT: call -; CHECK-NEXT: unreachable +; CHECK-NEXT: ret void define void @v_1_0() #0 { entry: ret void } -define void @func() { +define void @func(i1 %b) { entry: - br i1 undef, label %bb1, label %bb2 + br i1 %b, label %bb1, label %bb2 bb1: - unreachable + ret void bb2: call void @v_1_0() call void @v_1_0() - unreachable + ret void } attributes #0 = { nounwind readonly } From d25cb5a8a23ec9192e32a318eb565e956b87f553 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Thu, 13 Aug 2020 11:22:01 -0700 Subject: [PATCH 10/23] [AMDGPU] Fix misleading SDWA verifier error. NFC. The old error from GFX9 shall be updated to GFX9+. --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 0cd7acb7a789b9..1221b927b58379 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3695,7 +3695,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, } else { // No immediates on GFX9 if (!MO.isReg()) { - ErrInfo = "Only reg allowed as operands in SDWA instructions on GFX9"; + ErrInfo = + "Only reg allowed as operands in SDWA instructions on GFX9+"; return false; } } From 0462aef5f359497b29bc811f94e8d6f7c1f2923e Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Thu, 13 Aug 2020 11:15:44 -0700 Subject: [PATCH 11/23] [AMDGPU] Inhibit SDWA if target instruction has FI Differential Revision: https://reviews.llvm.org/D85918 --- llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp | 10 +++++++ llvm/test/CodeGen/AMDGPU/sdwa-stack.mir | 32 +++++++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/sdwa-stack.mir diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp index 87bacc5880ac88..4774041f2b820e 100644 --- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -992,6 +992,16 @@ bool SIPeepholeSDWA::isConvertibleToSDWA(MachineInstr &MI, if (Opc == AMDGPU::V_CNDMASK_B32_e32) return false; + if (MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0)) { + if (!Src0->isReg() && !Src0->isImm()) + return false; + } + + if (MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1)) { + if (!Src1->isReg() && !Src1->isImm()) + return false; + } + return true; } diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-stack.mir b/llvm/test/CodeGen/AMDGPU/sdwa-stack.mir new file mode 100644 index 00000000000000..d804605c5d2a04 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/sdwa-stack.mir @@ -0,0 +1,32 @@ +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=si-peephole-sdwa -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s + +# Do not fold stack objects into SDWA. + +--- +# GCN-LABEL: name: sdwa_stack_object_src0 +# GCN: V_ADD_U32_e64 %stack.0, killed %1 +name: sdwa_stack_object_src0 +stack: + - { id: 0, type: default, offset: 0, size: 32, alignment: 4, stack-id: default } +body: | + bb.0: + %0:vgpr_32 = IMPLICIT_DEF + %1:vgpr_32 = V_AND_B32_e32 255, %0, implicit $exec + %2:vgpr_32 = V_ADD_U32_e64 %stack.0, killed %1, 0, implicit $exec + S_ENDPGM 0, implicit %2 + +... +--- +name: sdwa_stack_object_src1 +# GCN-LABEL: name: sdwa_stack_object_src1 +# GCN: V_ADD_U32_e64 killed %1, %stack.0 +stack: + - { id: 0, type: default, offset: 0, size: 32, alignment: 4, stack-id: default } +body: | + bb.0: + %0:vgpr_32 = IMPLICIT_DEF + %1:vgpr_32 = V_AND_B32_e32 255, %0, implicit $exec + %2:vgpr_32 = V_ADD_U32_e64 killed %1, %stack.0, 0, implicit $exec + S_ENDPGM 0, implicit %2 + +... From 759f9a7acdfc33afd5d8bd1c33446c1b4d721388 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Thu, 13 Aug 2020 11:44:42 -0700 Subject: [PATCH 12/23] Revert "[llvm-jitlink] Don't demote unreferenced definitions in -harness mode." This reverts commit e137b550587a85b0d9c9c539edc79de0122b6946. This commit broke a test case. Reverting while I investigate. --- llvm/tools/llvm-jitlink/llvm-jitlink.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp index 49424bf9774607..8e651d903a3b72 100644 --- a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp +++ b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp @@ -187,7 +187,7 @@ static Error applyHarnessPromotions(Session &S, LinkGraph &G) { // If this graph is part of the test then promote any symbols referenced by // the harness to default scope, remove all symbols that clash with harness - // definitions. + // definitions, demote all other definitions. std::vector DefinitionsToRemove; for (auto *Sym : G.defined_symbols()) { @@ -219,6 +219,10 @@ static Error applyHarnessPromotions(Session &S, LinkGraph &G) { } else if (S.HarnessDefinitions.count(Sym->getName())) { LLVM_DEBUG(dbgs() << " Externalizing " << Sym->getName() << "\n"); DefinitionsToRemove.push_back(Sym); + } else { + LLVM_DEBUG(dbgs() << " Demoting " << Sym->getName() << "\n"); + Sym->setScope(Scope::Local); + Sym->setLive(false); } } @@ -517,8 +521,7 @@ Error LLVMJITLinkObjectLinkingLayer::add(JITDylib &JD, return SymFlagsOrErr.takeError(); // Skip symbols not defined in this object file. - if ((*SymFlagsOrErr & object::BasicSymbolRef::SF_Undefined) || - !(*SymFlagsOrErr & object::BasicSymbolRef::SF_Global)) + if (*SymFlagsOrErr & object::BasicSymbolRef::SF_Undefined) continue; auto Name = Sym.getName(); @@ -548,8 +551,10 @@ Error LLVMJITLinkObjectLinkingLayer::add(JITDylib &JD, *SymFlags &= ~JITSymbolFlags::Exported; } else if (S.HarnessExternals.count(*Name)) { *SymFlags |= JITSymbolFlags::Exported; - } else if (S.HarnessDefinitions.count(*Name)) + } else { + // Skip symbols that aren't in the HarnessExternals set. continue; + } auto InternedName = S.ES.intern(*Name); SymbolFlags[InternedName] = std::move(*SymFlags); From d650cbc349ccc4f477568c2827f1bce650020058 Mon Sep 17 00:00:00 2001 From: Haowei Wu Date: Tue, 11 Aug 2020 11:44:22 -0700 Subject: [PATCH 13/23] [elfabi] Move llvm-elfabi related code to InterfaceStub library This change moves elfabi related code to llvm/InterfaceStub library so it can be shared by multiple llvm tools without causing cyclic dependencies. Differential Revision: https://reviews.llvm.org/D85678 --- .../llvm/InterfaceStub}/ELFObjHandler.h | 2 +- .../{TextAPI/ELF => InterfaceStub}/ELFStub.h | 8 ++- .../ELF => InterfaceStub}/TBEHandler.h | 2 +- llvm/lib/CMakeLists.txt | 1 + llvm/lib/InterfaceStub/CMakeLists.txt | 8 +++ .../InterfaceStub}/ELFObjHandler.cpp | 53 +++++++++---------- .../ELF => InterfaceStub}/ELFStub.cpp | 2 +- llvm/lib/InterfaceStub/LLVMBuild.txt | 21 ++++++++ .../ELF => InterfaceStub}/TBEHandler.cpp | 6 +-- llvm/lib/LLVMBuild.txt | 1 + llvm/lib/TextAPI/CMakeLists.txt | 2 - llvm/tools/llvm-elfabi/CMakeLists.txt | 2 +- llvm/tools/llvm-elfabi/LLVMBuild.txt | 2 +- llvm/tools/llvm-elfabi/llvm-elfabi.cpp | 6 +-- llvm/unittests/CMakeLists.txt | 1 + llvm/unittests/InterfaceStub/CMakeLists.txt | 9 ++++ .../ELFYAMLTest.cpp | 4 +- llvm/unittests/TextAPI/CMakeLists.txt | 1 - 18 files changed, 82 insertions(+), 49 deletions(-) rename llvm/{tools/llvm-elfabi => include/llvm/InterfaceStub}/ELFObjHandler.h (96%) rename llvm/include/llvm/{TextAPI/ELF => InterfaceStub}/ELFStub.h (93%) rename llvm/include/llvm/{TextAPI/ELF => InterfaceStub}/TBEHandler.h (100%) create mode 100644 llvm/lib/InterfaceStub/CMakeLists.txt rename llvm/{tools/llvm-elfabi => lib/InterfaceStub}/ELFObjHandler.cpp (91%) rename llvm/lib/{TextAPI/ELF => InterfaceStub}/ELFStub.cpp (95%) create mode 100644 llvm/lib/InterfaceStub/LLVMBuild.txt rename llvm/lib/{TextAPI/ELF => InterfaceStub}/TBEHandler.cpp (98%) create mode 100644 llvm/unittests/InterfaceStub/CMakeLists.txt rename llvm/unittests/{TextAPI => InterfaceStub}/ELFYAMLTest.cpp (98%) diff --git a/llvm/tools/llvm-elfabi/ELFObjHandler.h b/llvm/include/llvm/InterfaceStub/ELFObjHandler.h similarity index 96% rename from llvm/tools/llvm-elfabi/ELFObjHandler.h rename to llvm/include/llvm/InterfaceStub/ELFObjHandler.h index 97f0d68f4d4fbc..1ffd9a614eecd3 100644 --- a/llvm/tools/llvm-elfabi/ELFObjHandler.h +++ b/llvm/include/llvm/InterfaceStub/ELFObjHandler.h @@ -13,9 +13,9 @@ #ifndef LLVM_TOOLS_ELFABI_ELFOBJHANDLER_H #define LLVM_TOOLS_ELFABI_ELFOBJHANDLER_H +#include "llvm/InterfaceStub/ELFStub.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/ELFTypes.h" -#include "llvm/TextAPI/ELF/ELFStub.h" namespace llvm { diff --git a/llvm/include/llvm/TextAPI/ELF/ELFStub.h b/llvm/include/llvm/InterfaceStub/ELFStub.h similarity index 93% rename from llvm/include/llvm/TextAPI/ELF/ELFStub.h rename to llvm/include/llvm/InterfaceStub/ELFStub.h index 76b2af12166289..7832c1c7413b39 100644 --- a/llvm/include/llvm/TextAPI/ELF/ELFStub.h +++ b/llvm/include/llvm/InterfaceStub/ELFStub.h @@ -16,8 +16,8 @@ #include "llvm/BinaryFormat/ELF.h" #include "llvm/Support/VersionTuple.h" -#include #include +#include namespace llvm { namespace elfabi { @@ -42,15 +42,13 @@ struct ELFSymbol { bool Undefined; bool Weak; Optional Warning; - bool operator<(const ELFSymbol &RHS) const { - return Name < RHS.Name; - } + bool operator<(const ELFSymbol &RHS) const { return Name < RHS.Name; } }; // A cumulative representation of ELF stubs. // Both textual and binary stubs will read into and write from this object. class ELFStub { -// TODO: Add support for symbol versioning. + // TODO: Add support for symbol versioning. public: VersionTuple TbeVersion; Optional SoName; diff --git a/llvm/include/llvm/TextAPI/ELF/TBEHandler.h b/llvm/include/llvm/InterfaceStub/TBEHandler.h similarity index 100% rename from llvm/include/llvm/TextAPI/ELF/TBEHandler.h rename to llvm/include/llvm/InterfaceStub/TBEHandler.h index 76484410987fa5..5c523eba037e7d 100644 --- a/llvm/include/llvm/TextAPI/ELF/TBEHandler.h +++ b/llvm/include/llvm/InterfaceStub/TBEHandler.h @@ -15,8 +15,8 @@ #ifndef LLVM_TEXTAPI_ELF_TBEHANDLER_H #define LLVM_TEXTAPI_ELF_TBEHANDLER_H -#include "llvm/Support/VersionTuple.h" #include "llvm/Support/Error.h" +#include "llvm/Support/VersionTuple.h" #include namespace llvm { diff --git a/llvm/lib/CMakeLists.txt b/llvm/lib/CMakeLists.txt index abe3ec59aec1aa..35d204d7d63e27 100644 --- a/llvm/lib/CMakeLists.txt +++ b/llvm/lib/CMakeLists.txt @@ -3,6 +3,7 @@ add_subdirectory(IR) add_subdirectory(FuzzMutate) +add_subdirectory(InterfaceStub) add_subdirectory(IRReader) add_subdirectory(CodeGen) add_subdirectory(BinaryFormat) diff --git a/llvm/lib/InterfaceStub/CMakeLists.txt b/llvm/lib/InterfaceStub/CMakeLists.txt new file mode 100644 index 00000000000000..be2529f6d60566 --- /dev/null +++ b/llvm/lib/InterfaceStub/CMakeLists.txt @@ -0,0 +1,8 @@ +add_llvm_component_library(LLVMInterfaceStub + ELFObjHandler.cpp + ELFStub.cpp + TBEHandler.cpp + + ADDITIONAL_HEADER_DIRS + "${LLVM_MAIN_INCLUDE_DIR}/llvm/InterfaceStub" +) diff --git a/llvm/tools/llvm-elfabi/ELFObjHandler.cpp b/llvm/lib/InterfaceStub/ELFObjHandler.cpp similarity index 91% rename from llvm/tools/llvm-elfabi/ELFObjHandler.cpp rename to llvm/lib/InterfaceStub/ELFObjHandler.cpp index 124fffbb9cf6a7..82e7a3c8b1baab 100644 --- a/llvm/tools/llvm-elfabi/ELFObjHandler.cpp +++ b/llvm/lib/InterfaceStub/ELFObjHandler.cpp @@ -6,14 +6,14 @@ // //===-----------------------------------------------------------------------===/ -#include "ELFObjHandler.h" +#include "llvm/InterfaceStub/ELFObjHandler.h" +#include "llvm/InterfaceStub/ELFStub.h" #include "llvm/Object/Binary.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/ELFTypes.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/TextAPI/ELF/ELFStub.h" using llvm::MemoryBufferRef; using llvm::object::ELFObjectFile; @@ -128,19 +128,17 @@ static Error populateDynamic(DynamicEntries &Dyn, "Couldn't locate dynamic symbol table (no DT_SYMTAB entry)"); } if (Dyn.SONameOffset.hasValue() && *Dyn.SONameOffset >= Dyn.StrSize) { - return createStringError( - object_error::parse_failed, - "DT_SONAME string offset (0x%016" PRIx64 - ") outside of dynamic string table", - *Dyn.SONameOffset); + return createStringError(object_error::parse_failed, + "DT_SONAME string offset (0x%016" PRIx64 + ") outside of dynamic string table", + *Dyn.SONameOffset); } for (uint64_t Offset : Dyn.NeededLibNames) { if (Offset >= Dyn.StrSize) { - return createStringError( - object_error::parse_failed, - "DT_NEEDED string offset (0x%016" PRIx64 - ") outside of dynamic string table", - Offset); + return createStringError(object_error::parse_failed, + "DT_NEEDED string offset (0x%016" PRIx64 + ") outside of dynamic string table", + Offset); } } @@ -212,16 +210,16 @@ static Expected getNumSyms(DynamicEntries &Dyn, static ELFSymbolType convertInfoToType(uint8_t Info) { Info = Info & 0xf; switch (Info) { - case ELF::STT_NOTYPE: - return ELFSymbolType::NoType; - case ELF::STT_OBJECT: - return ELFSymbolType::Object; - case ELF::STT_FUNC: - return ELFSymbolType::Func; - case ELF::STT_TLS: - return ELFSymbolType::TLS; - default: - return ELFSymbolType::Unknown; + case ELF::STT_NOTYPE: + return ELFSymbolType::NoType; + case ELF::STT_OBJECT: + return ELFSymbolType::Object; + case ELF::STT_FUNC: + return ELFSymbolType::Func; + case ELF::STT_TLS: + return ELFSymbolType::TLS; + default: + return ELFSymbolType::Unknown; } } @@ -259,8 +257,8 @@ static ELFSymbol createELFSym(StringRef SymName, /// @param DynStr StringRef to the dynamic string table. template static Error populateSymbols(ELFStub &TargetStub, - const typename ELFT::SymRange DynSym, - StringRef DynStr) { + const typename ELFT::SymRange DynSym, + StringRef DynStr) { // Skips the first symbol since it's the NULL symbol. for (auto RawSym : DynSym.drop_front(1)) { // If a symbol does not have global or weak binding, ignore it. @@ -311,7 +309,7 @@ buildStub(const ELFObjectFile &ElfObj) { if (Error Err = populateDynamic(DynEnt, *DynTable)) return std::move(Err); - // Get pointer to in-memory location of .dynstr section. + // Get pointer to in-memory location of .dynstr section. Expected DynStrPtr = ElfFile->toMappedAddr(DynEnt.StrTabAddr); if (!DynStrPtr) @@ -355,9 +353,8 @@ buildStub(const ELFObjectFile &ElfObj) { if (!DynSymPtr) return appendToError(DynSymPtr.takeError(), "when locating .dynsym section contents"); - Elf_Sym_Range DynSyms = - ArrayRef(reinterpret_cast(*DynSymPtr), - *SymCount); + Elf_Sym_Range DynSyms = ArrayRef( + reinterpret_cast(*DynSymPtr), *SymCount); Error SymReadError = populateSymbols(*DestStub, DynSyms, DynStr); if (SymReadError) return appendToError(std::move(SymReadError), diff --git a/llvm/lib/TextAPI/ELF/ELFStub.cpp b/llvm/lib/InterfaceStub/ELFStub.cpp similarity index 95% rename from llvm/lib/TextAPI/ELF/ELFStub.cpp rename to llvm/lib/InterfaceStub/ELFStub.cpp index f8463497093b14..3c637695d8e714 100644 --- a/llvm/lib/TextAPI/ELF/ELFStub.cpp +++ b/llvm/lib/InterfaceStub/ELFStub.cpp @@ -6,7 +6,7 @@ // //===-----------------------------------------------------------------------===/ -#include "llvm/TextAPI/ELF/ELFStub.h" +#include "llvm/InterfaceStub/ELFStub.h" using namespace llvm; using namespace llvm::elfabi; diff --git a/llvm/lib/InterfaceStub/LLVMBuild.txt b/llvm/lib/InterfaceStub/LLVMBuild.txt new file mode 100644 index 00000000000000..e69544d4f5f67f --- /dev/null +++ b/llvm/lib/InterfaceStub/LLVMBuild.txt @@ -0,0 +1,21 @@ +;===- ./lib/InterfaceStub/LLVMBuild.txt ------------------------*- Conf -*--===; +; +; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +; See https://llvm.org/LICENSE.txt for license information. +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = InterfaceStub +parent = Libraries +required_libraries = Object Support diff --git a/llvm/lib/TextAPI/ELF/TBEHandler.cpp b/llvm/lib/InterfaceStub/TBEHandler.cpp similarity index 98% rename from llvm/lib/TextAPI/ELF/TBEHandler.cpp rename to llvm/lib/InterfaceStub/TBEHandler.cpp index cb597d8896e811..06a2f9bf2451b1 100644 --- a/llvm/lib/TextAPI/ELF/TBEHandler.cpp +++ b/llvm/lib/InterfaceStub/TBEHandler.cpp @@ -6,12 +6,12 @@ // //===-----------------------------------------------------------------------===/ -#include "llvm/TextAPI/ELF/TBEHandler.h" -#include "llvm/ADT/StringSwitch.h" +#include "llvm/InterfaceStub/TBEHandler.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/InterfaceStub/ELFStub.h" #include "llvm/Support/Error.h" #include "llvm/Support/YAMLTraits.h" -#include "llvm/TextAPI/ELF/ELFStub.h" using namespace llvm; using namespace llvm::elfabi; diff --git a/llvm/lib/LLVMBuild.txt b/llvm/lib/LLVMBuild.txt index 824abd36fc9986..a81c6a1fe3fa46 100644 --- a/llvm/lib/LLVMBuild.txt +++ b/llvm/lib/LLVMBuild.txt @@ -30,6 +30,7 @@ subdirectories = FuzzMutate LineEditor Linker + InterfaceStub IR IRReader LTO diff --git a/llvm/lib/TextAPI/CMakeLists.txt b/llvm/lib/TextAPI/CMakeLists.txt index 36528f0995d8e3..b63bc64b31f14d 100644 --- a/llvm/lib/TextAPI/CMakeLists.txt +++ b/llvm/lib/TextAPI/CMakeLists.txt @@ -1,6 +1,4 @@ add_llvm_component_library(LLVMTextAPI - ELF/ELFStub.cpp - ELF/TBEHandler.cpp MachO/Architecture.cpp MachO/ArchitectureSet.cpp MachO/InterfaceFile.cpp diff --git a/llvm/tools/llvm-elfabi/CMakeLists.txt b/llvm/tools/llvm-elfabi/CMakeLists.txt index bd3ec851887aba..43b4b5b5faa963 100644 --- a/llvm/tools/llvm-elfabi/CMakeLists.txt +++ b/llvm/tools/llvm-elfabi/CMakeLists.txt @@ -1,11 +1,11 @@ set(LLVM_LINK_COMPONENTS + InterfaceStub Object Support TextAPI ) add_llvm_tool(llvm-elfabi - ELFObjHandler.cpp ErrorCollector.cpp llvm-elfabi.cpp ) diff --git a/llvm/tools/llvm-elfabi/LLVMBuild.txt b/llvm/tools/llvm-elfabi/LLVMBuild.txt index b1a80e9e4a15f9..4e8021442fb6b4 100644 --- a/llvm/tools/llvm-elfabi/LLVMBuild.txt +++ b/llvm/tools/llvm-elfabi/LLVMBuild.txt @@ -18,4 +18,4 @@ type = Tool name = llvm-elfabi parent = Tools -required_libraries = Object Support TextAPI +required_libraries = InterfaceStub Object Support TextAPI diff --git a/llvm/tools/llvm-elfabi/llvm-elfabi.cpp b/llvm/tools/llvm-elfabi/llvm-elfabi.cpp index 044b5f77c6d71d..8bf2ad4ed53736 100644 --- a/llvm/tools/llvm-elfabi/llvm-elfabi.cpp +++ b/llvm/tools/llvm-elfabi/llvm-elfabi.cpp @@ -6,16 +6,16 @@ // //===-----------------------------------------------------------------------===/ -#include "ELFObjHandler.h" #include "ErrorCollector.h" +#include "llvm/InterfaceStub/ELFObjHandler.h" +#include "llvm/InterfaceStub/TBEHandler.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Errc.h" #include "llvm/Support/FileOutputBuffer.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/WithColor.h" -#include "llvm/TextAPI/ELF/TBEHandler.h" +#include "llvm/Support/raw_ostream.h" #include namespace llvm { diff --git a/llvm/unittests/CMakeLists.txt b/llvm/unittests/CMakeLists.txt index d7dbaeaa32fe87..850bc14b207fd1 100644 --- a/llvm/unittests/CMakeLists.txt +++ b/llvm/unittests/CMakeLists.txt @@ -26,6 +26,7 @@ add_subdirectory(Demangle) add_subdirectory(ExecutionEngine) add_subdirectory(Frontend) add_subdirectory(FuzzMutate) +add_subdirectory(InterfaceStub) add_subdirectory(IR) add_subdirectory(LineEditor) add_subdirectory(Linker) diff --git a/llvm/unittests/InterfaceStub/CMakeLists.txt b/llvm/unittests/InterfaceStub/CMakeLists.txt new file mode 100644 index 00000000000000..22ff0af67a3d4b --- /dev/null +++ b/llvm/unittests/InterfaceStub/CMakeLists.txt @@ -0,0 +1,9 @@ +set(LLVM_LINK_COMPONENTS + InterfaceStub +) + +add_llvm_unittest(InterfaceStubTests + ELFYAMLTest.cpp +) + +target_link_libraries(InterfaceStubTests PRIVATE LLVMTestingSupport) diff --git a/llvm/unittests/TextAPI/ELFYAMLTest.cpp b/llvm/unittests/InterfaceStub/ELFYAMLTest.cpp similarity index 98% rename from llvm/unittests/TextAPI/ELFYAMLTest.cpp rename to llvm/unittests/InterfaceStub/ELFYAMLTest.cpp index 8217507b5a5dc5..2b86b89fbbb6ff 100644 --- a/llvm/unittests/TextAPI/ELFYAMLTest.cpp +++ b/llvm/unittests/InterfaceStub/ELFYAMLTest.cpp @@ -7,8 +7,8 @@ //===-----------------------------------------------------------------------===/ #include "llvm/ADT/StringRef.h" -#include "llvm/TextAPI/ELF/ELFStub.h" -#include "llvm/TextAPI/ELF/TBEHandler.h" +#include "llvm/InterfaceStub/ELFStub.h" +#include "llvm/InterfaceStub/TBEHandler.h" #include "llvm/Support/Error.h" #include "llvm/Testing/Support/Error.h" #include "gtest/gtest.h" diff --git a/llvm/unittests/TextAPI/CMakeLists.txt b/llvm/unittests/TextAPI/CMakeLists.txt index 775ec2f1d1e886..d575d57610b96e 100644 --- a/llvm/unittests/TextAPI/CMakeLists.txt +++ b/llvm/unittests/TextAPI/CMakeLists.txt @@ -3,7 +3,6 @@ set(LLVM_LINK_COMPONENTS ) add_llvm_unittest(TextAPITests - ELFYAMLTest.cpp TextStubV1Tests.cpp TextStubV2Tests.cpp TextStubV3Tests.cpp From 25bbceb047a3ce85394d510a16bd3fcfd69b8c75 Mon Sep 17 00:00:00 2001 From: shafik Date: Thu, 13 Aug 2020 10:49:40 -0700 Subject: [PATCH 14/23] [LLDB] Fix how ValueObjectChild handles bit-fields stored in a Scalar in UpdateValue() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When bit-field data was stored in a Scalar in ValueObjectChild during UpdateValue() it was extracting the bit-field value. Later on in lldb_private::DumpDataExtractor(…) we were again attempting to extract the bit-field. Which would then not obtain the correct value. This will remove the extra extraction in UpdateValue(). We hit this specific case when values are passed in registers, which we could only reproduce in an optimized build. Differential Revision: https://reviews.llvm.org/D85376 --- lldb/source/Core/ValueObjectChild.cpp | 6 +- .../DW_AT_data_bit_offset-DW_OP_stack_value.s | 312 ++++++++++++++++++ 2 files changed, 313 insertions(+), 5 deletions(-) create mode 100644 lldb/test/Shell/SymbolFile/DWARF/DW_AT_data_bit_offset-DW_OP_stack_value.s diff --git a/lldb/source/Core/ValueObjectChild.cpp b/lldb/source/Core/ValueObjectChild.cpp index 28cb49328f34eb..1059c8f34b3b74 100644 --- a/lldb/source/Core/ValueObjectChild.cpp +++ b/lldb/source/Core/ValueObjectChild.cpp @@ -199,11 +199,7 @@ bool ValueObjectChild::UpdateValue() { // try to extract the child value from the parent's scalar value { Scalar scalar(m_value.GetScalar()); - if (m_bitfield_bit_size) - scalar.ExtractBitfield(m_bitfield_bit_size, - m_bitfield_bit_offset); - else - scalar.ExtractBitfield(8 * m_byte_size, 8 * m_byte_offset); + scalar.ExtractBitfield(8 * m_byte_size, 8 * m_byte_offset); m_value.GetScalar() = scalar; } break; diff --git a/lldb/test/Shell/SymbolFile/DWARF/DW_AT_data_bit_offset-DW_OP_stack_value.s b/lldb/test/Shell/SymbolFile/DWARF/DW_AT_data_bit_offset-DW_OP_stack_value.s new file mode 100644 index 00000000000000..074da09bc61eed --- /dev/null +++ b/lldb/test/Shell/SymbolFile/DWARF/DW_AT_data_bit_offset-DW_OP_stack_value.s @@ -0,0 +1,312 @@ +# RUN: llvm-mc -filetype=obj -o %t -triple x86_64-apple-macosx10.15.0 %s +# RUN: %lldb %t -o "target variable ug" -b | FileCheck %s + +# CHECK: (lldb) target variable ug +# CHECK: (U) ug = { +# CHECK: raw = 1688469761 +# CHECK: = (a = 1, b = 1, c = 36, d = 2, e = 36, f = 1) +# CHECK: } + +# We are testing how ValueObject deals with bit-fields when an argument is +# passed by register. Compiling at -O1 allows us to capture this case and +# test it. +# +# typedef union { +# unsigned raw; +# struct { +# unsigned a : 8; +# unsigned b : 8; +# unsigned c : 6; +# unsigned d : 2; +# unsigned e : 6; +# unsigned f : 2; +# }; +# } U; +# +# // This appears first in the debug info and pulls the type definition in... +# static U __attribute__((used)) _type_anchor; +# // ... then our useful variable appears last in the debug info and we can +# // tweak the assembly without needing to edit a lot of offsets by hand. +# static U ug; +# +# extern void f(U); +# +# // Omit debug info for main. +# __attribute__((nodebug)) +# int main() { +# ug.raw = 0x64A40101; +# f(ug); +# f((U)ug.raw); +# } +# +# Compiled as follows: +# +# clang -O1 -gdwarf-4 weird.c -S -o weird.s +# +# Then the DWARF was hand modified to get DW_AT_LOCATION for ug from: +# +# DW_AT_location (DW_OP_addr 0x3f8, DW_OP_deref, DW_OP_constu 0x64a40101, DW_OP_mul, DW_OP_lit0, DW_OP_plus, DW_OP_stack_value) +# +# to this: +# +# DW_AT_location (DW_OP_constu 0x64a40101, DW_OP_stack_value) +# +# to work-around a seperate bug. + +.zerofill __DATA,__bss,__type_anchor,4,2 ## @_type_anchor +.zerofill __DATA,__bss,_ug.0,1,2 ## @ug.0 + .no_dead_strip __type_anchor + .section __DWARF,__debug_str,regular,debug +Linfo_string: + .zero 138 + .asciz "_type_anchor" ## string offset=138 + .asciz "U" ## string offset=151 + .asciz "raw" ## string offset=153 + .asciz "unsigned int" ## string offset=157 + .asciz "a" ## string offset=170 + .asciz "b" ## string offset=172 + .asciz "c" ## string offset=174 + .asciz "d" ## string offset=176 + .asciz "e" ## string offset=178 + .asciz "f" ## string offset=180 + .asciz "ug" ## string offset=182 + .section __DWARF,__debug_abbrev,regular,debug +Lsection_abbrev: + .byte 1 ## Abbreviation Code + .byte 17 ## DW_TAG_compile_unit + .byte 1 ## DW_CHILDREN_yes + .byte 37 ## DW_AT_producer + .byte 14 ## DW_FORM_strp + .byte 19 ## DW_AT_language + .byte 5 ## DW_FORM_data2 + .byte 3 ## DW_AT_name + .byte 14 ## DW_FORM_strp + .ascii "\202|" ## DW_AT_LLVM_sysroot + .byte 14 ## DW_FORM_strp + .ascii "\357\177" ## DW_AT_APPLE_sdk + .byte 14 ## DW_FORM_strp + .byte 16 ## DW_AT_stmt_list + .byte 23 ## DW_FORM_sec_offset + .byte 27 ## DW_AT_comp_dir + .byte 14 ## DW_FORM_strp + .ascii "\341\177" ## DW_AT_APPLE_optimized + .byte 25 ## DW_FORM_flag_present + .byte 0 ## EOM(1) + .byte 0 ## EOM(2) + .byte 2 ## Abbreviation Code + .byte 52 ## DW_TAG_variable + .byte 0 ## DW_CHILDREN_no + .byte 3 ## DW_AT_name + .byte 14 ## DW_FORM_strp + .byte 73 ## DW_AT_type + .byte 19 ## DW_FORM_ref4 + .byte 58 ## DW_AT_decl_file + .byte 11 ## DW_FORM_data1 + .byte 59 ## DW_AT_decl_line + .byte 11 ## DW_FORM_data1 + .byte 2 ## DW_AT_location + .byte 24 ## DW_FORM_exprloc + .byte 0 ## EOM(1) + .byte 0 ## EOM(2) + .byte 3 ## Abbreviation Code + .byte 22 ## DW_TAG_typedef + .byte 0 ## DW_CHILDREN_no + .byte 73 ## DW_AT_type + .byte 19 ## DW_FORM_ref4 + .byte 3 ## DW_AT_name + .byte 14 ## DW_FORM_strp + .byte 58 ## DW_AT_decl_file + .byte 11 ## DW_FORM_data1 + .byte 59 ## DW_AT_decl_line + .byte 11 ## DW_FORM_data1 + .byte 0 ## EOM(1) + .byte 0 ## EOM(2) + .byte 4 ## Abbreviation Code + .byte 23 ## DW_TAG_union_type + .byte 1 ## DW_CHILDREN_yes + .byte 11 ## DW_AT_byte_size + .byte 11 ## DW_FORM_data1 + .byte 58 ## DW_AT_decl_file + .byte 11 ## DW_FORM_data1 + .byte 59 ## DW_AT_decl_line + .byte 11 ## DW_FORM_data1 + .byte 0 ## EOM(1) + .byte 0 ## EOM(2) + .byte 5 ## Abbreviation Code + .byte 13 ## DW_TAG_member + .byte 0 ## DW_CHILDREN_no + .byte 3 ## DW_AT_name + .byte 14 ## DW_FORM_strp + .byte 73 ## DW_AT_type + .byte 19 ## DW_FORM_ref4 + .byte 58 ## DW_AT_decl_file + .byte 11 ## DW_FORM_data1 + .byte 59 ## DW_AT_decl_line + .byte 11 ## DW_FORM_data1 + .byte 56 ## DW_AT_data_member_location + .byte 11 ## DW_FORM_data1 + .byte 0 ## EOM(1) + .byte 0 ## EOM(2) + .byte 6 ## Abbreviation Code + .byte 13 ## DW_TAG_member + .byte 0 ## DW_CHILDREN_no + .byte 73 ## DW_AT_type + .byte 19 ## DW_FORM_ref4 + .byte 58 ## DW_AT_decl_file + .byte 11 ## DW_FORM_data1 + .byte 59 ## DW_AT_decl_line + .byte 11 ## DW_FORM_data1 + .byte 56 ## DW_AT_data_member_location + .byte 11 ## DW_FORM_data1 + .byte 0 ## EOM(1) + .byte 0 ## EOM(2) + .byte 7 ## Abbreviation Code + .byte 19 ## DW_TAG_structure_type + .byte 1 ## DW_CHILDREN_yes + .byte 11 ## DW_AT_byte_size + .byte 11 ## DW_FORM_data1 + .byte 58 ## DW_AT_decl_file + .byte 11 ## DW_FORM_data1 + .byte 59 ## DW_AT_decl_line + .byte 11 ## DW_FORM_data1 + .byte 0 ## EOM(1) + .byte 0 ## EOM(2) + .byte 8 ## Abbreviation Code + .byte 13 ## DW_TAG_member + .byte 0 ## DW_CHILDREN_no + .byte 3 ## DW_AT_name + .byte 14 ## DW_FORM_strp + .byte 73 ## DW_AT_type + .byte 19 ## DW_FORM_ref4 + .byte 58 ## DW_AT_decl_file + .byte 11 ## DW_FORM_data1 + .byte 59 ## DW_AT_decl_line + .byte 11 ## DW_FORM_data1 + .byte 13 ## DW_AT_bit_size + .byte 11 ## DW_FORM_data1 + .byte 107 ## DW_AT_data_bit_offset + .byte 11 ## DW_FORM_data1 + .byte 0 ## EOM(1) + .byte 0 ## EOM(2) + .byte 9 ## Abbreviation Code + .byte 36 ## DW_TAG_base_type + .byte 0 ## DW_CHILDREN_no + .byte 3 ## DW_AT_name + .byte 14 ## DW_FORM_strp + .byte 62 ## DW_AT_encoding + .byte 11 ## DW_FORM_data1 + .byte 11 ## DW_AT_byte_size + .byte 11 ## DW_FORM_data1 + .byte 0 ## EOM(1) + .byte 0 ## EOM(2) + .byte 0 ## EOM(3) + .section __DWARF,__debug_info,regular,debug +Lsection_info: +Lcu_begin0: +.set Lset0, Ldebug_info_end0-Ldebug_info_start0 ## Length of Unit + .long Lset0 +Ldebug_info_start0: + .short 4 ## DWARF version number +.set Lset1, Lsection_abbrev-Lsection_abbrev ## Offset Into Abbrev. Section + .long Lset1 + .byte 8 ## Address Size (in bytes) + .byte 1 ## Abbrev [1] 0xb:0xd0 DW_TAG_compile_unit + .long 0 ## DW_AT_producer + .short 12 ## DW_AT_language + .long 47 ## DW_AT_name + .long 60 ## DW_AT_LLVM_sysroot + .long 117 ## DW_AT_APPLE_sdk + .long 0 ## DW_AT_stmt_list + .long 133 ## DW_AT_comp_dir + ## DW_AT_APPLE_optimized + .byte 2 ## Abbrev [2] 0x26:0x15 DW_TAG_variable + .long 138 ## DW_AT_name + .long 59 ## DW_AT_type + .byte 1 ## DW_AT_decl_file + .byte 14 ## DW_AT_decl_line + .byte 9 ## DW_AT_location + .byte 3 + .quad __type_anchor + .byte 3 ## Abbrev [3] 0x3b:0xb DW_TAG_typedef + .long 70 ## DW_AT_type + .long 151 ## DW_AT_name + .byte 1 ## DW_AT_decl_file + .byte 11 ## DW_AT_decl_line + .byte 4 ## Abbrev [4] 0x46:0x6c DW_TAG_union_type + .byte 4 ## DW_AT_byte_size + .byte 1 ## DW_AT_decl_file + .byte 1 ## DW_AT_decl_line + .byte 5 ## Abbrev [5] 0x4a:0xc DW_TAG_member + .long 153 ## DW_AT_name + .long 178 ## DW_AT_type + .byte 1 ## DW_AT_decl_file + .byte 2 ## DW_AT_decl_line + .byte 0 ## DW_AT_data_member_location + .byte 6 ## Abbrev [6] 0x56:0x8 DW_TAG_member + .long 94 ## DW_AT_type + .byte 1 ## DW_AT_decl_file + .byte 3 ## DW_AT_decl_line + .byte 0 ## DW_AT_data_member_location + .byte 7 ## Abbrev [7] 0x5e:0x53 DW_TAG_structure_type + .byte 4 ## DW_AT_byte_size + .byte 1 ## DW_AT_decl_file + .byte 3 ## DW_AT_decl_line + .byte 8 ## Abbrev [8] 0x62:0xd DW_TAG_member + .long 170 ## DW_AT_name + .long 178 ## DW_AT_type + .byte 1 ## DW_AT_decl_file + .byte 4 ## DW_AT_decl_line + .byte 8 ## DW_AT_bit_size + .byte 0 ## DW_AT_data_bit_offset + .byte 8 ## Abbrev [8] 0x6f:0xd DW_TAG_member + .long 172 ## DW_AT_name + .long 178 ## DW_AT_type + .byte 1 ## DW_AT_decl_file + .byte 5 ## DW_AT_decl_line + .byte 8 ## DW_AT_bit_size + .byte 8 ## DW_AT_data_bit_offset + .byte 8 ## Abbrev [8] 0x7c:0xd DW_TAG_member + .long 174 ## DW_AT_name + .long 178 ## DW_AT_type + .byte 1 ## DW_AT_decl_file + .byte 6 ## DW_AT_decl_line + .byte 6 ## DW_AT_bit_size + .byte 16 ## DW_AT_data_bit_offset + .byte 8 ## Abbrev [8] 0x89:0xd DW_TAG_member + .long 176 ## DW_AT_name + .long 178 ## DW_AT_type + .byte 1 ## DW_AT_decl_file + .byte 7 ## DW_AT_decl_line + .byte 2 ## DW_AT_bit_size + .byte 22 ## DW_AT_data_bit_offset + .byte 8 ## Abbrev [8] 0x96:0xd DW_TAG_member + .long 178 ## DW_AT_name + .long 178 ## DW_AT_type + .byte 1 ## DW_AT_decl_file + .byte 8 ## DW_AT_decl_line + .byte 6 ## DW_AT_bit_size + .byte 24 ## DW_AT_data_bit_offset + .byte 8 ## Abbrev [8] 0xa3:0xd DW_TAG_member + .long 180 ## DW_AT_name + .long 178 ## DW_AT_type + .byte 1 ## DW_AT_decl_file + .byte 9 ## DW_AT_decl_line + .byte 2 ## DW_AT_bit_size + .byte 30 ## DW_AT_data_bit_offset + .byte 0 ## End Of Children Mark + .byte 0 ## End Of Children Mark + .byte 9 ## Abbrev [9] 0xb2:0x7 DW_TAG_base_type + .long 157 ## DW_AT_name + .byte 7 ## DW_AT_encoding + .byte 4 ## DW_AT_byte_size + .byte 2 ## Abbrev [2] 0xb9:0x21 DW_TAG_variable + .long 182 ## DW_AT_name + .long 59 ## DW_AT_type + .byte 1 ## DW_AT_decl_file + .byte 17 ## DW_AT_decl_line + .byte 7 ## DW_AT_location + .byte 16 + .ascii "\201\202\220\245\006" + .byte 159 + .byte 0 ## End Of Children Mark +Ldebug_info_end0: From 34a5669ccd8b8c4edd35488a5ece407f0ed77601 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20Gr=C3=A4nitz?= Date: Sun, 2 Aug 2020 14:36:02 +0200 Subject: [PATCH 15/23] [ORC] Fix SymbolLookupSet::containsDuplicates() --- llvm/include/llvm/ExecutionEngine/Orc/Core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h index a117acefd2d361..101017f89def17 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h @@ -287,7 +287,7 @@ class SymbolLookupSet { for (UnderlyingVector::size_type I = 1; I != Symbols.size(); ++I) if (Symbols[I].first == Symbols[I - 1].first) return true; - return true; + return false; } #endif From f12db8cf750bb16515ba635143ca34b0c012968a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20Gr=C3=A4nitz?= Date: Sun, 2 Aug 2020 14:37:33 +0200 Subject: [PATCH 16/23] [ORC] cloneToNewContext() can work with a const-ref to ThreadSafeModule --- llvm/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h | 2 +- llvm/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h b/llvm/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h index 58c96737e58076..82f2b7464953f1 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h @@ -162,7 +162,7 @@ using GVModifier = std::function; /// Clones the given module on to a new context. ThreadSafeModule -cloneToNewContext(ThreadSafeModule &TSMW, +cloneToNewContext(const ThreadSafeModule &TSMW, GVPredicate ShouldCloneDef = GVPredicate(), GVModifier UpdateClonedDefSource = GVModifier()); diff --git a/llvm/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp b/llvm/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp index 1f4e6f1321150d..2e128dd2374439 100644 --- a/llvm/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp @@ -15,7 +15,7 @@ namespace llvm { namespace orc { -ThreadSafeModule cloneToNewContext(ThreadSafeModule &TSM, +ThreadSafeModule cloneToNewContext(const ThreadSafeModule &TSM, GVPredicate ShouldCloneDef, GVModifier UpdateClonedDefSource) { assert(TSM && "Can not clone null module"); From fa4b3147e3368f63e27b86ef66cd35f484ceb6d6 Mon Sep 17 00:00:00 2001 From: River Riddle Date: Thu, 13 Aug 2020 12:04:57 -0700 Subject: [PATCH 17/23] [mlir][DialectConversion] Update the documentation for dialect conversion This revision updates the documentation for dialect conversion, as many concepts have changed/evolved over time. Differential Revision: https://reviews.llvm.org/D85167 --- mlir/docs/DialectConversion.md | 345 +++++++++++++++++++++++---------- 1 file changed, 242 insertions(+), 103 deletions(-) diff --git a/mlir/docs/DialectConversion.md b/mlir/docs/DialectConversion.md index c7174147b72eb0..8a308dd6788224 100644 --- a/mlir/docs/DialectConversion.md +++ b/mlir/docs/DialectConversion.md @@ -7,7 +7,7 @@ of pattern-based operation rewriting patterns. [TOC] -To utilize the framework, a few things must be provided: +The dialect conversion framework consists of the following components: * A [Conversion Target](#conversion-target) * A set of [Rewrite Patterns](#rewrite-pattern-specification) @@ -15,41 +15,44 @@ To utilize the framework, a few things must be provided: ## Modes of Conversion -When applying a conversion to a set of operations, there are several conversion -modes that can be selected from: +When applying a conversion to a set of operations, there are several different +conversion modes that may be selected from: * Partial Conversion - A partial conversion will legalize as many operations to the target as possible, but will allow pre-existing operations that were not - explicitly marked as `illegal` to remain unconverted. This allows for - partially lowering parts of the module in the presence of unknown + explicitly marked as "illegal" to remain unconverted. This allows for + partially lowering parts of the input in the presence of unknown operations. - A partial conversion can be applied via `applyPartialConversion`. * Full Conversion - - A full conversion is only successful if all operations are properly - legalized to the given conversion target. This ensures that only known - operations will exist after the conversion process. + - A full conversion legalizes all input operations, and is only successful + if all operations are properly legalized to the given conversion target. + This ensures that only known operations will exist after the conversion + process. - A full conversion can be applied via `applyFullConversion`. * Analysis Conversion - An analysis conversion will analyze which operations are legalizable to - the given conversion target if a conversion were to be applied. Note - that no rewrites, or transformations, are actually applied to the input + the given conversion target if a conversion were to be applied. This is + done by performing a 'partial' conversion and recording which operations + would have been successfully converted if successful. Note that no + rewrites, or transformations, are actually applied to the input operations. - An analysis conversion can be applied via `applyAnalysisConversion`. ## Conversion Target -The conversion target is the formal definition of what is considered to be legal +The conversion target is a formal definition of what is considered to be legal during the conversion process. The final operations generated by the conversion framework must be marked as legal on the `ConversionTarget` for the rewrite to -be a success. Existing operations need not always be legal, though; see the -different conversion modes for why. Operations and dialects may be marked with -any of the provided legality actions below: +be a success. Depending on the conversion mode, existing operations need not +always be legal. Operations and dialects may be marked with any of the provided +legality actions below: * Legal @@ -68,7 +71,7 @@ any of the provided legality actions below: * Illegal - This action signals that no instance of a given operation is legal. - Operations marked as `illegal` must always be converted for the + Operations marked as "illegal" must always be converted for the conversion to be successful. This action also allows for selectively marking specific operations as illegal in an otherwise legal dialect. @@ -123,13 +126,12 @@ struct MyTarget : public ConversionTarget { ### Recursive Legality -In some cases, it may be desirable to mark entire regions of operations as -legal. This provides an additional granularity of context to the concept of -"legal". The `ConversionTarget` supports marking operations, that were -previously added as `Legal` or `Dynamic`, as `recursively` legal. Recursive -legality means that if an operation instance is legal, either statically or -dynamically, all of the operations nested within are also considered legal. An -operation can be marked via `markOpRecursivelyLegal<>`: +In some cases, it may be desirable to mark entire regions as legal. This +provides an additional granularity of context to the concept of "legal". If an +operation is marked recursively legal, either statically or dynamically, then +all of the operations nested within are also considered legal even if they would +otherwise be considered "illegal". An operation can be marked via +`markOpRecursivelyLegal<>`: ```c++ ConversionTarget &target = ...; @@ -149,14 +151,12 @@ target.markOpRecursivelyLegal([](MyOp op) { ... }); ## Rewrite Pattern Specification After the conversion target has been defined, a set of legalization patterns -must be provided to transform illegal operations into legal ones. The patterns -supplied here, that do not [require type changes](#conversion-patterns), are the -same as those described in the -[quickstart rewrites guide](Tutorials/QuickstartRewrites.md#adding-patterns), but have a -few additional [restrictions](#restrictions). The patterns provided do not need -to generate operations that are directly legal on the target. The framework will -automatically build a graph of conversions to convert non-legal operations into -a set of legal ones. +must be provided to transform illegal operations into legal ones. The structure +of the patterns supplied here is the same as those described in the +[quickstart rewrites guide](Tutorials/QuickstartRewrites.md#adding-patterns). +The patterns provided do not need to generate operations that are directly legal +on the target. The framework will automatically build a graph of conversions to +convert non-legal operations into a set of legal ones. As an example, say you define a target that supports one operation: `foo.add`. When providing the following patterns: [`bar.add` -> `baz.add`, `baz.add` -> @@ -165,38 +165,139 @@ When providing the following patterns: [`bar.add` -> `baz.add`, `baz.add` -> means that you don’t have to define a direct legalization pattern for `bar.add` -> `foo.add`. -### Restrictions +### Conversion Patterns + +Along with the general `RewritePattern` classes, the conversion framework +provides a special type of rewrite pattern that can be used when a pattern +relies on interacting with constructs specific to the conversion process, the +`ConversionPattern`. For example, the conversion process does not necessarily +update operations in-place and instead creates a mapping of events such as +replacements and erasures, and only applies them when the entire conversion +process is successful. Certain classes of patterns rely on using the +updated/remapped operands of an operation, such as when the types of results +defined by an operation have changed. The general Rewrite Patterns can no longer +be used in these situations, as the types of the operands of the operation being +matched will not correspond with those expected by the user. This pattern +provides, as an additional argument to the `matchAndRewrite` and `rewrite` +methods, the list of operands that the operation should use after conversion. If +an operand was the result of a non-converted operation, for example if it was +already legal, the original operand is used. This means that the operands +provided always have a 1-1 non-null correspondence with the operands on the +operation. The original operands of the operation are still intact and may be +inspected as normal. These patterns also utilize a special `PatternRewriter`, +`ConversionPatternRewriter`, that provides special hooks for use with the +conversion infrastructure. -The framework processes operations in topological order, trying to legalize them -individually. As such, patterns used in the conversion framework have a few -additional restrictions: +```c++ +struct MyConversionPattern : public ConversionPattern { + /// The `matchAndRewrite` hooks on ConversionPatterns take an additional + /// `operands` parameter, containing the remapped operands of the original + /// operation. + virtual LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const; +}; +``` -1. If a pattern matches, it must erase or replace the op it matched on. - Operations can *not* be updated in place. -2. Match criteria should not be based on the IR outside of the op itself. The - preceding ops will already have been processed by the framework (although it - may not update uses), and the subsequent IR will not yet be processed. This - can create confusion if a pattern attempts to match against a sequence of - ops (e.g. rewrite A + B -> C). That sort of rewrite should be performed in a - separate pass. +#### Type Safety + +The types of the remapped operands provided to a conversion pattern must be of a +type expected by the pattern. The expected types of a pattern are determined by +a provided [TypeConverter](#type-converter). If no type converter is provided, +the types of the remapped operands are expected to match the types of the +original operands. If a type converter is provided, the types of the remapped +operands are expected to be legal as determined by the converter. If the +remapped operand types are not of an expected type, and a materialization to the +expected type could not be performed, the pattern fails application before the +`matchAndRewrite` hook is invoked. This ensures that patterns do not have to +explicitly ensure type safety, or sanitize the types of the incoming remapped +operands. More information on type conversion is detailed in the +[dedicated section](#type-conversion) below. ## Type Conversion It is sometimes necessary as part of a conversion to convert the set types of being operated on. In these cases, a `TypeConverter` object may be defined that -details how types should be converted. The `TypeConverter` is used by patterns -and by the general conversion infrastructure to convert the signatures of blocks -and regions. +details how types should be converted when interfacing with a pattern. A +`TypeConverter` may be used to convert the signatures of block arguments and +regions, to define the expected inputs types of the pattern, and to reconcile +type differences in general. ### Type Converter -As stated above, the `TypeConverter` contains several hooks for detailing how to -convert types. Several of these hooks are detailed below: +The `TypeConverter` contains several hooks for detailing how to convert types, +and how to materialize conversions between types in various situations. The two +main aspects of the `TypeConverter` are conversion and materialization. + +A `conversion` describes how a given illegal source `Type` should be converted +to N target types. If the source type is already "legal", it should convert to +itself. Type conversions are specified via the `addConversion` method described +below. + +A `materialization` describes how a set of values should be converted to a +single value of a desired type. An important distinction with a `conversion` is +that a `materialization` can produce IR, whereas a `conversion` cannot. These +materializations are used by the conversion framework to ensure type safety +during the conversion process. There are several types of materializations +depending on the situation. + +* Argument Materialization + + - An argument materialization is used when converting the type of a block + argument during a [signature conversion](#region-signature-conversion). + +* Source Materialization + + - A source materialization converts from a value with a "legal" target + type, back to a specific source type. This is used when an operation is + "legal" during the conversion process, but contains a use of an illegal + type. This may happen during a conversion where some operations are + converted to those with different resultant types, but still retain + users of the original type system. + - This materialization is used in the following situations: + * When a block argument has been converted to a different type, but + the original argument still has users that will remain live after + the conversion process has finished. + * When the result type of an operation has been converted to a + different type, but the original result still has users that will + remain live after the conversion process is finished. + +* Target Materialization + + - A target materialization converts from a value with an "illegal" source + type, to a value of a "legal" type. This is used when a pattern expects + the remapped operands to be of a certain set of types, but the original + input operands have not been converted. This may happen during a + conversion where some operations are converted to those with different + resultant types, but still retain uses of the original type system. + - This materialization is used in the following situations: + * When the remapped operands of a + [conversion pattern](#conversion-patterns) are not legal for the + type conversion provided by the pattern. + +If a converted value is used by an operation that isn't converted, it needs a +conversion back to the `source` type, hence source materialization; if an +unconverted value is used by an operation that is being converted, it needs +conversion to the `target` type, hence target materialization. + +As noted above, the conversion process guarantees that the type contract of the +IR is preserved during the conversion. This means that the types of value uses +will not implicitly change during the conversion process. When the type of a +value definition, either block argument or operation result, is being changed, +the users of that definition must also be updated during the conversion process. +If they aren't, a type conversion must be materialized to ensure that a value of +the expected type is still present within the IR. If a target materialization is +required, but cannot be performed, the pattern application fails. If a source +materialization is required, but cannot be performed, the entire conversion +process fails. + +Several of the available hooks are detailed below: ```c++ class TypeConverter { public: - /// Register a conversion function. A conversion function must be convertible + /// Register a conversion function. A conversion function defines how a given + /// source type should be converted. A conversion function must be convertible /// to any of the following forms(where `T` is a class derived from `Type`: /// * Optional(T) /// - This form represents a 1-1 type conversion. It should return nullptr @@ -210,56 +311,53 @@ class TypeConverter { /// existing value are expected to be removed during conversion. If /// `llvm::None` is returned, the converter is allowed to try another /// conversion function to perform the conversion. - /// - /// When attempting to convert a type, e.g. via `convertType`, the - /// `TypeConverter` will invoke each of the converters starting with the one - /// most recently registered. - template - void addConversion(ConversionFnT &&callback); - - /// Register a materialization function, which must be convertibe to the - /// following form - /// `Optional(PatternRewriter &, T, ValueRange, Location)`, - /// where `T` is any subclass of `Type`. This function is responsible for - /// creating an operation, using the PatternRewriter and Location provided, - /// that "casts" a range of values into a single value of the given type `T`. - /// It must return a Value of the converted type on success, an `llvm::None` - /// if it failed but other materialization can be attempted, and `nullptr` on - /// unrecoverable failure. It will only be called for (sub)types of `T`. - /// Materialization functions must be provided when a type conversion - /// results in more than one type, or if a type conversion may persist after - /// the conversion has finished. - template - void addMaterialization(FnT &&callback); -}; -``` - -### Conversion Patterns - -When type conversion comes into play, the general Rewrite Patterns can no longer -be used. This is due to the fact that the operands of the operation being -matched will not correspond with the operands of the correct type as determined -by `TypeConverter`. The operation rewrites on type boundaries must thus use a -special pattern, the `ConversionPattern`. This pattern provides, as an -additional argument to the `matchAndRewrite` and `rewrite` methods, the set of -remapped operands corresponding to the desired type. These patterns also utilize -a special `PatternRewriter`, `ConversionPatternRewriter`, that provides special -hooks for use with the conversion infrastructure. + /// Note: When attempting to convert a type, e.g. via 'convertType', the + /// mostly recently added conversions will be invoked first. + template ::template arg_t<0>> + void addConversion(FnT &&callback) { + registerConversion(wrapCallback(std::forward(callback))); + } -```c++ -struct MyConversionPattern : public ConversionPattern { - /// The `matchAndRewrite` hooks on ConversionPatterns take an additional - /// `operands` parameter, containing the remapped operands of the original - /// operation. - virtual LogicalResult - matchAndRewrite(Operation *op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const; + /// Register a materialization function, which must be convertible to the + /// following form: + /// `Optional (OpBuilder &, T, ValueRange, Location)`, + /// where `T` is any subclass of `Type`. + /// This function is responsible for creating an operation, using the + /// OpBuilder and Location provided, that "converts" a range of values into a + /// single value of the given type `T`. It must return a Value of the + /// converted type on success, an `llvm::None` if it failed but other + /// materialization can be attempted, and `nullptr` on unrecoverable failure. + /// It will only be called for (sub)types of `T`. + /// + /// This method registers a materialization that will be called when + /// converting an illegal block argument type, to a legal type. + template ::template arg_t<1>> + void addArgumentMaterialization(FnT &&callback) { + argumentMaterializations.emplace_back( + wrapMaterialization(std::forward(callback))); + } + /// This method registers a materialization that will be called when + /// converting a legal type to an illegal source type. This is used when + /// conversions to an illegal type must persist beyond the main conversion. + template ::template arg_t<1>> + void addSourceMaterialization(FnT &&callback) { + sourceMaterializations.emplace_back( + wrapMaterialization(std::forward(callback))); + } + /// This method registers a materialization that will be called when + /// converting type from an illegal, or source, type to a legal type. + template ::template arg_t<1>> + void addTargetMaterialization(FnT &&callback) { + targetMaterializations.emplace_back( + wrapMaterialization(std::forward(callback))); + } }; ``` -These patterns have the same [restrictions](#restrictions) as the basic rewrite -patterns used in dialect conversion. - ### Region Signature Conversion From the perspective of type conversion, the types of block arguments are a bit @@ -268,15 +366,16 @@ different operations. Given this, the conversion of the types for blocks must be done explicitly via a conversion pattern. To convert the types of block arguments within a Region, a custom hook on the `ConversionPatternRewriter` must be invoked; `convertRegionTypes`. This hook uses a provided type converter to -apply type conversions to all blocks within the region, and all blocks that move -into that region. This hook also takes an optional -`TypeConverter::SignatureConversion` parameter that applies a custom conversion -to the entry block of the region. The types of the entry block arguments are -often tied semantically to details on the operation, e.g. FuncOp, AffineForOp, -etc. To convert the signature of just the region entry block, and not any other -blocks within the region, the `applySignatureConversion` hook may be used -instead. A signature conversion, `TypeConverter::SignatureConversion`, can be -built programmatically: +apply type conversions to all blocks within a given region, and all blocks that +move into that region. As noted above, the conversions performed by this method +use the argument materialization hook on the `TypeConverter`. This hook also +takes an optional `TypeConverter::SignatureConversion` parameter that applies a +custom conversion to the entry block of the region. The types of the entry block +arguments are often tied semantically to details on the operation, e.g. FuncOp, +AffineForOp, etc. To convert the signature of just the region entry block, and +not any other blocks within the region, the `applySignatureConversion` hook may +be used instead. A signature conversion, `TypeConverter::SignatureConversion`, +can be built programmatically: ```c++ class SignatureConversion { @@ -303,3 +402,43 @@ public: The `TypeConverter` provides several default utilities for signature conversion and legality checking: `convertSignatureArgs`/`convertBlockSignature`/`isLegal(Region *|Type)`. + +## Debugging + +To debug the execution of the dialect conversion framework, +`-debug-only=dialect-conversion` may be used. This command line flag activates +LLVM's debug logging infrastructure solely for the conversion framework. The +output is formatted as a tree structure, mirroring the structure of the +conversion process. This output contains all of the actions performed by the +rewriter, how generated operations get legalized, and why they fail. + +Example output is shown below: + +``` +//===-------------------------------------------===// +Legalizing operation : 'std.return'(0x608000002e20) { + "std.return"() : () -> () + + * Fold { + } -> FAILURE : unable to fold + + * Pattern : 'std.return -> ()' { + ** Insert : 'spv.Return'(0x6070000453e0) + ** Replace : 'std.return'(0x608000002e20) + + //===-------------------------------------------===// + Legalizing operation : 'spv.Return'(0x6070000453e0) { + "spv.Return"() : () -> () + + } -> SUCCESS : operation marked legal by the target + //===-------------------------------------------===// + } -> SUCCESS : pattern applied successfully +} -> SUCCESS +//===-------------------------------------------===// +``` + +This output is describing the legalization of an `std.return` operation. We +first try to legalize by folding the operation, but that is unsuccessful for +`std.return`. From there, a pattern is applied that replaces the `std.return` +with a `spv.Return`. The newly generated `spv.Return` is then processed for +legalization, but is found to already legal as per the target. From f7a13479b809cdeb9d63d0daa0d6ab61f04d5f7a Mon Sep 17 00:00:00 2001 From: River Riddle Date: Thu, 13 Aug 2020 12:05:04 -0700 Subject: [PATCH 18/23] [mlir][docs] Update/Add documentation for MLIRs Pattern Rewrite infrastructure This infrastructure has evolved a lot over the course of MLIRs lifetime, and has never truly been documented outside of rationale or proposals. This revision aims to document the infrastructure and user facing API, with the rationale specific portions moved to the Rationale folder and updated. Differential Revision: https://reviews.llvm.org/D85260 --- mlir/docs/DialectConversion.md | 12 +- mlir/docs/GenericDAGRewriter.md | 415 ------------------ mlir/docs/PatternRewriter.md | 256 +++++++++++ mlir/docs/Rationale/MLIRForGraphAlgorithms.md | 2 +- .../Rationale/RationaleGenericDAGRewriter.md | 286 ++++++++++++ mlir/docs/Tutorials/Toy/Ch-3.md | 2 +- 6 files changed, 550 insertions(+), 423 deletions(-) delete mode 100644 mlir/docs/GenericDAGRewriter.md create mode 100644 mlir/docs/PatternRewriter.md create mode 100644 mlir/docs/Rationale/RationaleGenericDAGRewriter.md diff --git a/mlir/docs/DialectConversion.md b/mlir/docs/DialectConversion.md index 8a308dd6788224..4d3be5ed2a98cd 100644 --- a/mlir/docs/DialectConversion.md +++ b/mlir/docs/DialectConversion.md @@ -151,12 +151,12 @@ target.markOpRecursivelyLegal([](MyOp op) { ... }); ## Rewrite Pattern Specification After the conversion target has been defined, a set of legalization patterns -must be provided to transform illegal operations into legal ones. The structure -of the patterns supplied here is the same as those described in the -[quickstart rewrites guide](Tutorials/QuickstartRewrites.md#adding-patterns). -The patterns provided do not need to generate operations that are directly legal -on the target. The framework will automatically build a graph of conversions to -convert non-legal operations into a set of legal ones. +must be provided to transform illegal operations into legal ones. The patterns +supplied here have the same structure and restrictions as those described in the +main [Pattern](PatternRewriter.md) documentation. The patterns provided do not +need to generate operations that are directly legal on the target. The framework +will automatically build a graph of conversions to convert non-legal operations +into a set of legal ones. As an example, say you define a target that supports one operation: `foo.add`. When providing the following patterns: [`bar.add` -> `baz.add`, `baz.add` -> diff --git a/mlir/docs/GenericDAGRewriter.md b/mlir/docs/GenericDAGRewriter.md deleted file mode 100644 index a187c989889016..00000000000000 --- a/mlir/docs/GenericDAGRewriter.md +++ /dev/null @@ -1,415 +0,0 @@ -# Generic DAG Rewriter Infrastructure - -## Introduction and Motivation - -The goal of a compiler IR is to represent code - at various levels of -abstraction which pose different sets of tradeoffs in terms of representational -capabilities and ease of transformation. However, the ability to represent code -is not itself very useful - you also need to be able to implement those -transformations. - -There are many different sorts of compiler transformations, but this document -focuses on a particularly important class of transformation that comes up -repeatedly at scale, and is important for the immediate goals of MLIR: that of -pattern matching on a set of operations and replacing with another set. This is -the key algorithm required to implement the "op fission" algorithm used by the -tf2xla bridge, pattern matching rewrites from TF ops to TF/Lite, peephole -optimizations like "eliminate identity nodes" or "replace x+0 with x", as well -as a useful abstraction to implement optimization algorithms for MLIR graphs at -all levels. - -A particular strength of MLIR (and a major difference vs other compiler -infrastructures like LLVM, GCC, XLA, TensorFlow, etc) is that it uses a single -compiler IR to represent code at multiple levels of abstraction: an MLIR -operation can be a "TensorFlow operation", an "XLA HLO", a "TF Lite -FlatBufferModel op", a TPU LLO instruction, an LLVM IR instruction (transitively -including X86, Lanai, CUDA, and other target specific instructions), or anything -else that the MLIR type system can reasonably express. Because MLIR spans such a -wide range of different problems, a single infrastructure for performing -graph-to-graph rewrites can help solve many diverse domain challenges, including -TensorFlow graph level down to the machine code level. - -[Static single assignment](https://en.wikipedia.org/wiki/Static_single_assignment_form) -(SSA) representations like MLIR make it easy to access the operands and "users" -of an operation. As such, a natural abstraction for these graph-to-graph -rewrites is that of DAG pattern matching: clients define DAG tile patterns, and -each pattern includes a result DAG to produce and the cost of the result (or, -inversely, the benefit of doing the replacement). A common infrastructure -efficiently finds and perform the rewrites. - -While this concept is simple, the details are more nuanced. This proposal -defines and explores a set of abstractions that we feel can solve a wide range -of different problems, and can be applied to many different sorts of problems -that MLIR is - and is expected to - face over time. We do this by separating the -pattern definition and matching algorithm from the "driver" of the computation -loop, and make space for the patterns to be defined declaratively in the future. - -## Related Work - -There is a huge amount of related work to consider, given that pretty much every -compiler in existence has to solve this problem many times over. Here are a few -graph rewrite systems we have used, along with the pros and cons of this related -work. One unifying problem with all of these is that these systems are only -trying to solve one particular and usually narrow problem: our proposal would -like to solve many of these problems with a single infrastructure. Of these, the -most similar design to our proposal is the LLVM DAG-to-DAG instruction selection -algorithm at the end. - -### Constant folding - -A degenerate but pervasive case of DAG-to-DAG pattern matching is constant -folding: given an operation whose operands contain constants can often be folded -to a result constant value. - -MLIR already has constant folding routines which provide a simpler API than a -general DAG-to-DAG pattern matcher, and we expect it to remain because the -simpler contract makes it applicable in some cases that a generic matcher would -not. For example, a DAG-rewrite can remove arbitrary nodes in the current -function, which could invalidate iterators. Constant folding as an API does not -remove any nodes, it just provides a (list of) constant values and allows the -clients to update their data structures as necessary. - -### AST-Level Pattern Matchers - -The literature is full of source-to-source translators which transform -identities in order to improve performance (e.g. transforming `X*0` into `0`). -One large example that I'm aware of is the GCC `fold` function, which performs -[many optimizations](https://github.com/gcc-mirror/gcc/blob/master/gcc/fold-const.c) -on ASTs. Clang has -[similar routines](http://releases.llvm.org/3.5.0/tools/clang/docs/InternalsManual.html#constant-folding-in-the-clang-ast) -for simple constant folding of expressions (as required by the C++ standard) but -doesn't perform general optimizations on its ASTs. - -The primary downside of tree optimizers is that you can't see across operations -that have multiple uses. It is -[well known in literature](https://llvm.org/pubs/2008-06-LCTES-ISelUsingSSAGraphs.pdf) -that DAG pattern matching is more powerful than tree pattern matching, but OTOH, -DAG pattern matching can lead to duplication of computation which needs to be -checked for. - -### "Combiners" and other peephole optimizers - -Compilers end up with a lot of peephole optimizers for various things, e.g. the -GCC -["combine" routines](https://github.com/gcc-mirror/gcc/blob/master/gcc/combine.c) -(which try to merge two machine instructions into a single one), the LLVM -[Inst Combine](http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/) -[pass](https://llvm.org/docs/Passes.html#instcombine-combine-redundant-instructions), -LLVM's -[DAG Combiner](https://github.com/llvm-mirror/llvm/blob/master/lib/CodeGen/SelectionDAG/DAGCombiner.cpp), -the Swift compiler's -[SIL Combiner](https://github.com/apple/swift/tree/master/lib/SILOptimizer/SILCombiner), -etc. These generally match one or more operations and produce zero or more -operations as a result. The LLVM -[Legalization](http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/) -infrastructure has a different outer loop but otherwise works the same way. - -These passes have a lot of diversity, but also have a unifying structure: they -mostly have a worklist outer loop which visits operations. They then use the C++ -visitor pattern (or equivalent) to switch over the class of operation and -dispatch to a method. That method contains a long list of hand-written C++ code -that pattern-matches various special cases. LLVM introduced a "match" function -that allows writing patterns in a somewhat more declarative style using template -metaprogramming (MLIR has similar facilities). Here's a simple example: - -```c++ - // Y - (X + 1) --> ~X + Y - if (match(Op1, m_OneUse(m_Add(m_Value(X), m_One())))) - return BinaryOperator::CreateAdd(Builder.CreateNot(X), Op0); -``` - -Here is a somewhat more complicated one (this is not the biggest or most -complicated :) - -```c++ - // C2 is ODD - // LHS = XOR(Y,C1), Y = AND(Z,C2), C1==(C2+1) => LHS == NEG(OR(Z, ~C2)) - // ADD(LHS, RHS) == SUB(RHS, OR(Z, ~C2)) - if (match(LHS, m_Xor(m_Value(Y), m_APInt(C1)))) - if (C1->countTrailingZeros() == 0) - if (match(Y, m_And(m_Value(Z), m_APInt(C2))) && *C1 == (*C2 + 1)) { - Value NewOr = Builder.CreateOr(Z, ~(*C2)); - return Builder.CreateSub(RHS, NewOr, "sub"); - } -``` - -These systems are simple to set up, and pattern matching templates have some -advantages (they are extensible for new sorts of sub-patterns, look compact at -point of use). OTOH, they have lots of well known problems, for example: - -* These patterns are very error prone to write, and contain lots of - redundancies. -* The IR being matched often has identities (e.g. when matching commutative - operators) and the C++ code has to handle it manually - take a look at - [the full code](http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineAddSub.cpp?view=markup#l775) - for checkForNegativeOperand that defines the second pattern). -* The matching code compiles slowly, both because it generates tons of code - and because the templates instantiate slowly. -* Adding new patterns (e.g. for count leading zeros in the example above) is - awkward and doesn't often happen. -* The cost model for these patterns is not really defined - it is emergent - based on the order the patterns are matched in code. -* They are non-extensible without rebuilding the compiler. -* It isn't practical to apply theorem provers and other tools to these - patterns - they cannot be reused for other purposes. - -In addition to structured "combiners" like these, there are lots of ad-hoc -systems like the -[LLVM Machine code peephole optimizer](http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp?view=markup) -which are related. - -### LLVM's DAG-to-DAG Instruction Selection Infrastructure - -The instruction selection subsystem in LLVM is the result of many years worth of -iteration and discovery, driven by the need for LLVM to support code generation -for lots of targets, the complexity of code generators for modern instruction -sets (e.g. X86), and the fanatical pursuit of reusing code across targets. Eli -wrote a -[nice short overview](https://eli.thegreenplace.net/2013/02/25/a-deeper-look-into-the-llvm-code-generator-part-1) -of how this works, and the -[LLVM documentation](https://llvm.org/docs/CodeGenerator.html#select-instructions-from-dag) -describes it in more depth including its advantages and limitations. It allows -writing patterns like this. - -``` -def : Pat<(or GR64:$src, (not (add GR64:$src, 1))), - (BLCI64rr GR64:$src)>; -``` - -This example defines a matcher for the -["blci" instruction](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#TBM_\(Trailing_Bit_Manipulation\)) -in the -[X86 target description](http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?view=markup), -there are many others in that file (look for `Pat<>` patterns, since they aren't -entangled in details of the compiler like assembler/disassembler generation -logic). - -For our purposes, there is much to like about this system, for example: - -* It is defined in a declarative format. -* It is extensible to target-defined operations. -* It automates matching across identities, like commutative patterns. -* It allows custom abstractions and intense factoring of target-specific - commonalities. -* It generates compact code - it compiles into a state machine, which is - interpreted. -* It allows the instruction patterns to be defined and reused for multiple - purposes. -* The patterns are "type checked" at compile time, detecting lots of bugs - early and eliminating redundancy from the pattern specifications. -* It allows the use of general C++ code for weird/complex cases. - -While there is a lot that is good here, there is also a lot of bad things: - -* All of this machinery is only applicable to instruction selection. Even - directly adjacent problems like the DAGCombiner and Legalizer can't use it. -* This isn't extensible at compiler runtime, you have to rebuild the compiler - to extend it. -* The error messages when failing to match a pattern - [are not exactly optimal](https://www.google.com/search?q=llvm+cannot+select). -* It has lots of implementation problems and limitations (e.g. can't write a - pattern for a multi-result operation) as a result of working with the - awkward SelectionDAG representation and being designed and implemented - lazily. -* This stuff all grew organically over time and has lots of sharp edges. - -### Summary - -MLIR will face a wide range of pattern matching and graph rewrite problems, and -one of the major advantages of having a common representation for code at -multiple levels that it allows us to invest in - and highly leverage - a single -infra for doing this sort of work. - -## Goals - -This proposal includes support for defining pattern matching and rewrite -algorithms on MLIR. We'd like these algorithms to encompass many problems in the -MLIR space, including 1-to-N expansions (e.g. as seen in the TF/XLA bridge when -lowering a "tf.AddN" to multiple "add" HLOs), M-to-1 patterns (as seen in -Grappler optimization passes, e.g. that convert multiple/add into a single -muladd op), as well as general M-to-N patterns (e.g. instruction selection for -target instructions). Patterns should have a cost associated with them, and the -common infrastructure should be responsible for sorting out the lowest cost -match for a given application. - -We separate the task of picking a particular locally optimal pattern from a -given root node, the algorithm used to rewrite an entire graph given a -particular set of goals, and the definition of the patterns themselves. We do -this because DAG tile pattern matching is NP complete, which means that there -are no known polynomial time algorithms to optimally solve this problem. -Additionally, we would like to support iterative rewrite algorithms that -progressively transform the input program through multiple steps. Furthermore, -we would like to support many different sorts of clients across the MLIR stack, -and they may have different tolerances for compile time cost, different demands -for optimality, and other algorithmic goals or constraints. - -We aim for MLIR transformations to be easy to implement and reduce the -likelihood for compiler bugs. We expect there to be a very very large number of -patterns that are defined over time, and we believe that these sorts of patterns -will have a very large number of legality/validity constraints - many of which -are difficult to reason about in a consistent way, may be target specific, and -whose implementation may be particularly bug-prone. As such, we aim to design the -API around pattern definition to be simple, resilient to programmer errors, and -allow separation of concerns between the legality of the nodes generated from -the idea of the pattern being defined. - -Finally, error handling is a topmost concern: in addition to allowing patterns -to be defined in a target-independent way that may not apply for all hardware, -we also want failure for any pattern to match to be diagnosable in a reasonable -way. To be clear, this is not a solvable problem in general - the space of -malfunction is too great to be fully enumerated and handled optimally, but there -are better and worse ways to handle the situation. MLIR is already designed to -represent the provenance of an operation well. This project aims to propagate -that provenance information precisely, as well as diagnose pattern match -failures with the rationale for why a set of patterns do not apply. - -### Non goals - -This proposal doesn't aim to solve all compiler problems, it is simply a -DAG-to-DAG pattern matching system, starting with a greedy driver algorithm. -Compiler algorithms that require global dataflow analysis (e.g. common -subexpression elimination, conditional constant propagation, and many many -others) will not be directly solved by this infrastructure. - -This proposal is limited to DAG patterns, which (by definition) prevent the -patterns from seeing across cycles in a graph. In an SSA-based IR like MLIR, -this means that these patterns don't see across PHI nodes / basic block -arguments. We consider this acceptable given the set of problems we are trying -to solve - we don't know of any other system that attempts to do so, and -consider the payoff of worrying about this to be low. - -This design includes the ability for DAG patterns to have associated costs -(benefits), but those costs are defined in terms of magic numbers (typically -equal to the number of nodes being replaced). For any given application, the -units of magic numbers will have to be defined. - -## Overall design - -We decompose the problem into four major pieces: - -1. the code that is used to define patterns to match, cost, and their - replacement actions -1. the driver logic to pick the best match for a given root node -1. the client that is implementing some transformation (e.g. a combiner) -1. (future) the subsystem that allows patterns to be described with a - declarative syntax, which sugars step #1. - -We sketch the first three of these pieces, each in turn. This is not intended to -be a concrete API proposal, merely to describe the design - -### Defining Patterns - -Each pattern will be an instance of a mlir::Pattern class, whose subclasses -implement methods like this. Note that this API is meant for exposition, the -actual details are different for efficiency and coding standards reasons (e.g. -the memory management of `PatternState` is not specified below, etc): - -```c++ -class Pattern { - /// Return the benefit (the inverse of "cost") of matching this pattern. The - /// benefit of a Pattern is always static - rewrites that may have dynamic - /// benefit can be instantiated multiple times (different Pattern instances) - /// for each benefit that they may return, and be guarded by different match - /// condition predicates. - PatternBenefit getBenefit() const { return benefit; } - - /// Return the root node that this pattern matches. Patterns that can - /// match multiple root types are instantiated once per root. - OperationName getRootKind() const { return rootKind; } - - /// Attempt to match against code rooted at the specified operation, - /// which is the same operation code as getRootKind(). On failure, this - /// returns a None value. On success it a (possibly null) pattern-specific - /// state wrapped in a Some. This state is passed back into its rewrite - /// function if this match is selected. - virtual Optional match(Operation *op) const = 0; - - /// Rewrite the IR rooted at the specified operation with the result of - /// this pattern, generating any new operations with the specified - /// rewriter. If an unexpected error is encountered (an internal - /// compiler error), it is emitted through the normal MLIR diagnostic - /// hooks and the IR is left in a valid state. - virtual void rewrite(Operation *op, PatternState *state, - PatternRewriter &rewriter) const; -}; -``` - -In practice, the first patterns we implement will directly subclass and -implement this stuff, but we will define some helpers to reduce boilerplate. -When we have a declarative way to describe patterns, this should be -automatically generated from the description. - -Instances of `Pattern` have a benefit that is static upon construction of the -pattern instance, but may be computed dynamically at pattern initialization -time, e.g. allowing the benefit to be derived from domain specific information, -like the target architecture). This limitation allows us MLIR to (eventually) -perform pattern fusion and compile patterns into an efficient state machine, and -[Thier, Ertl, and Krall](https://dl.acm.org/citation.cfm?id=3179501) have shown -that match predicates eliminate the need for dynamically computed costs in -almost all cases: you can simply instantiate the same pattern one time for each -possible cost and use the predicate to guard the match. - -The two-phase nature of this API (match separate from rewrite) is important for -two reasons: 1) some clients may want to explore different ways to tile the -graph, and only rewrite after committing to one tiling. 2) We want to support -runtime extensibility of the pattern sets, but want to be able to statically -compile the bulk of known patterns into a state machine at "compiler compile -time". Both of these reasons lead to us needing to match multiple patterns -before committing to an answer. - -### Picking and performing a replacement - -In the short term, this API can be very simple, something like this can work and -will be useful for many clients: - -```c++ -class PatternMatcher { - // Create a pattern matcher with a bunch of patterns. This constructor - // looks across all of the specified patterns, and builds an internal - // data structure that allows efficient matching. - PatternMatcher(ArrayRef patterns); - - // Given a specific operation, see if there is some rewrite that is - // interesting. If so, return success and return the list of new - // operations that were created. If not, return failure. - bool matchAndRewrite(Operation *op, - SmallVectorImpl &newlyCreatedOps); -}; -``` - -In practice the interesting part of this class is the acceleration structure it -builds internally. It buckets up the patterns by root operation, and sorts them -by their static benefit. When performing a match, it tests any dynamic patterns, -then tests statically known patterns from highest to lowest benefit. - -### First Client: A Greedy Worklist Combiner - -We expect that there will be lots of clients for this, but a simple greedy -worklist-driven combiner should be powerful enough to serve many important ones, -including the -[TF2XLA op expansion logic](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/compiler/tf2xla/kernels), -many of the pattern substitution passes of the -[TOCO compiler](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/toco) -for TF-Lite, many -[Grappler](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/grappler) -passes, and other general performance optimizations for applying identities. - -The structure of this algorithm is straight-forward, here is pseudo code: - -* Walk a function in preorder, adding each operation to a worklist. -* While the worklist is non-empty, pull something off the back (processing - things generally in postorder) - * Perform matchAndRewrite on the operation. If failed, continue to the - next operation. - * On success, add the newly created ops to the worklist and continue. - -## Future directions - -It is important to get implementation and usage experience with this, and many -patterns can be defined using this sort of framework. Over time, we can look to -make it easier to declare patterns in a declarative form (e.g. with the LLVM -tblgen tool or something newer/better). Once we have that, we can define an -internal abstraction for describing the patterns to match, allowing better high -level optimization of patterns (including fusion of the matching logic across -patterns, which the LLVM instruction selector does) and allow the patterns to be -defined without rebuilding the compiler itself. diff --git a/mlir/docs/PatternRewriter.md b/mlir/docs/PatternRewriter.md new file mode 100644 index 00000000000000..2a2c30d98e04a7 --- /dev/null +++ b/mlir/docs/PatternRewriter.md @@ -0,0 +1,256 @@ +# Pattern Rewriting : Generic DAG-to-DAG Rewriting + +[TOC] + +This document details the design and API of the pattern rewriting infrastructure +present in MLIR, a general DAG-to-DAG transformation framework. This framework +is widely used throughout MLIR for canonicalization, conversion, and general +transformation. + +For an introduction to DAG-to-DAG transformation, and the rationale behind this +framework please take a look at the +[Generic DAG Rewriter Rationale](Rationale/RationaleGenericDAGRewriter.md). + +## Introduction + +The pattern rewriting framework can largely be decomposed into two parts: +Pattern Definition and Pattern Application. + +## Defining Patterns + +Patterns are defined by inheriting from the `RewritePattern` class. This class +represents the base class of all rewrite patterns within MLIR, and is comprised +of the following components: + +### Benefit + +This is the expected benefit of applying a given pattern. This benefit is static +upon construction of the pattern, but may be computed dynamically at pattern +initialization time, e.g. allowing the benefit to be derived from domain +specific information (like the target architecture). This limitation allows for +performing pattern fusion and compiling patterns into an efficient state +machine, and +[Thier, Ertl, and Krall](https://dl.acm.org/citation.cfm?id=3179501) have shown +that match predicates eliminate the need for dynamically computed costs in +almost all cases: you can simply instantiate the same pattern one time for each +possible cost and use the predicate to guard the match. + +### Root Operation Name (Optional) + +The name of the root operation that this pattern matches against. If specified, +only operations with the given root name will be provided to the `match` and +`rewrite` implementation. If not specified, any operation type may be provided. +The root operation name should be provided whenever possible, because it +simplifies the analysis of patterns when applying a cost model. To match any +operation type, a special tag must be provided to make the intent explicit: +`MatchAnyOpTypeTag`. + +### `match` and `rewrite` implementation + +This is the chunk of code that matches a given root `Operation` and performs a +rewrite of the IR. A `RewritePattern` can specify this implementation either via +separate `match` and `rewrite` methods, or via a combined `matchAndRewrite` +method. When using the combined `matchAndRewrite` method, no IR mutation should +take place before the match is deemed successful. The combined `matchAndRewrite` +is useful when non-trivially recomputable information is required by the +matching and rewriting phase. See below for examples: + +```c++ +class MyPattern : public RewritePattern { +public: + /// This overload constructs a pattern that only matches operations with the + /// root name of `MyOp`. + MyPattern(PatternBenefit benefit, MLIRContext *context) + : RewritePattern(MyOp::getOperationName(), benefit, context) {} + /// This overload constructs a pattern that matches any operation type. + MyPattern(PatternBenefit benefit) + : RewritePattern(benefit, MatchAnyOpTypeTag()) {} + + /// In this section, the `match` and `rewrite` implementation is specified + /// using the separate hooks. + LogicalResult match(Operation *op) const override { + // The `match` method returns `success()` if the pattern is a match, failure + // otherwise. + // ... + } + void rewrite(Operation *op, PatternRewriter &rewriter) { + // The `rewrite` method performs mutations on the IR rooted at `op` using + // the provided rewriter. All mutations must go through the provided + // rewriter. + } + + /// In this section, the `match` and `rewrite` implementation is specified + /// using a single hook. + LogicalResult matchAndRewrite(Operation *op, PatternRewriter &rewriter) { + // The `matchAndRewrite` method performs both the matching and the mutation. + // Note that the match must reach a successful point before IR mutation may + // take place. + } +}; +``` + +#### Restrictions + +Within the `match` section of a pattern, the following constraints apply: + +* No mutation of the IR is allowed. + +Within the `rewrite` section of a pattern, the following constraints apply: + +* All IR mutations, including creation, *must* be performed by the given + `PatternRewriter`. This class provides hooks for performing all of the + possible mutations that may take place within a pattern. For example, this + means that an operation should not be erased via its `erase` method. To + erase an operation, the appropriate `PatternRewriter` hook (in this case + `eraseOp`) should be used instead. +* The root operation is required to either be: updated in-place, replaced, or + erased. + +### Pattern Rewriter + +A `PatternRewriter` is a special class that allows for a pattern to communicate +with the driver of pattern application. As noted above, *all* IR mutations, +including creations, are required to be performed via the `PatternRewriter` +class. This is required because the underlying pattern driver may have state +that would be invalidated when a mutation takes place. Examples of some of the +more prevalent `PatternRewriter` API is shown below, please refer to the +[class documentation](https://github.com/llvm/llvm-project/blob/master/mlir/include/mlir/IR/PatternMatch.h#L235) +for a more up-to-date listing of the available API: + +* Erase an Operation : `eraseOp` + +This method erases an operation that either has no results, or whose results are +all known to have no uses. + +* Notify why a `match` failed : `notifyMatchFailure` + +This method allows for providing a diagnostic message within a `matchAndRewrite` +as to why a pattern failed to match. How this message is displayed back to the +user is determined by the specific pattern driver. + +* Replace an Operation : `replaceOp`/`replaceOpWithNewOp` + +This method replaces an operation's results with a set of provided values, and +erases the operation. + +* Update an Operation in-place : `(start|cancel|finalize)RootUpdate` + +This is a collection of methods that provide a transaction-like API for updating +the attributes, location, operands, or successors of an operation in-place +within a pattern. An in-place update transaction is started with +`startRootUpdate`, and may either be canceled or finalized with +`cancelRootUpdate` and `finalizeRootUpdate` respectively. A convenience wrapper, +`updateRootInPlace`, is provided that wraps a `start` and `finalize` around a +callback. + +* OpBuilder API + +The `PatternRewriter` inherits from the `OpBuilder` class, and thus provides all +of the same functionality present within an `OpBuilder`. This includes operation +creation, as well as many useful attribute and type construction methods. + +## Pattern Application + +After a set of patterns have been defined, they are collected and provided to a +specific driver for application. A driver consists of several high levels parts: + +* Input `OwningRewritePatternList` + +The input patterns to a driver are provided in the form of an +`OwningRewritePatternList`. This class provides a simplified API for building a +list of patterns. + +* Driver-specific `PatternRewriter` + +To ensure that the driver state does not become invalidated by IR mutations +within the pattern rewriters, a driver must provide a `PatternRewriter` instance +with the necessary hooks overridden. If a driver does not need to hook into +certain mutations, a default implementation is provided that will perform the +mutation directly. + +* Pattern Application and Cost Model + +Each driver is responsible for defining its own operation visitation order as +well as pattern cost model, but the final application is performed via a +`PatternApplicator` class. This class takes as input the +`OwningRewritePatternList` and transforms the patterns based upon a provided +cost model. This cost model computes a final benefit for a given rewrite +pattern, using whatever driver specific information necessary. After a cost +model has been computed, the driver may begin to match patterns against +operations using `PatternApplicator::matchAndRewrite`. + +An example is shown below: + +```c++ +class MyPattern : public RewritePattern { +public: + MyPattern(PatternBenefit benefit, MLIRContext *context) + : RewritePattern(MyOp::getOperationName(), benefit, context) {} +}; + +/// Populate the pattern list. +void collectMyPatterns(OwningRewritePatternList &patterns, MLIRContext *ctx) { + patterns.insert(/*benefit=*/1, ctx); +} + +/// Define a custom PatternRewriter for use by the driver. +class MyPatternRewriter : public PatternRewriter { +public: + MyPatternRewriter(MLIRContext *ctx) : PatternRewriter(ctx) {} + + /// Override the necessary PatternRewriter hooks here. +}; + +/// Apply the custom driver to `op`. +void applyMyPatternDriver(Operation *op, + const OwningRewritePatternList &patterns) { + // Initialize the custom PatternRewriter. + MyPatternRewriter rewriter(op->getContext()); + + // Create the applicator and apply our cost model. + PatternApplicator applicator(patterns); + applicator.applyCostModel([](const RewritePattern &pattern) { + // Apply a default cost model. + // Note: This is just for demonstration, if the default cost model is truly + // desired `applicator.applyDefaultCostModel()` should be used + // instead. + return pattern.getBenefit(); + }); + + // Try to match and apply a pattern. + LogicalResult result = applicator.matchAndRewrite(op, rewriter); + if (failed(result)) { + // ... No patterns were applied. + } + // ... A pattern was successfully applied. +} +``` + +## Common Pattern Drivers + +MLIR provides several common pattern drivers that serve a variety of different +use cases. + +### Dialect Conversion Driver + +This driver provides a framework in which to perform operation conversions +between, and within dialects using a concept of "legality". This framework +allows for transforming illegal operations to those supported by a provided +conversion target, via a set of pattern-based operation rewriting patterns. This +framework also provides support for type conversions. More information on this +driver can be found [here](DialectConversion.nd). + +### Greedy Pattern Rewrite Driver + +This driver performs a post order traversal over the provided operations and +greedily applies the patterns that locally have the most benefit. The benefit of +a pattern is decided solely by the benefit specified on the pattern, and the +relative order of the pattern within the pattern list (when two patterns have +the same local benefit). Patterns are iteratively applied to operations until a +fixed point is reached, at which point the driver finishes. This driver may be +used via the following: `applyPatternsAndFoldGreedily` and +`applyOpPatternsAndFold`. The latter of which only applies patterns to the +provided operation, and will not traverse the IR. + +Note: This driver is the one used by the [canonicalization](Canonicalization.md) +[pass](Passes.md#-canonicalize-canonicalize-operations) in MLIR. diff --git a/mlir/docs/Rationale/MLIRForGraphAlgorithms.md b/mlir/docs/Rationale/MLIRForGraphAlgorithms.md index ac26e5beb9b938..8bd2d9ce8f3543 100644 --- a/mlir/docs/Rationale/MLIRForGraphAlgorithms.md +++ b/mlir/docs/Rationale/MLIRForGraphAlgorithms.md @@ -254,7 +254,7 @@ and the API is easier to work with from an ergonomics perspective. ### Unified Graph Rewriting Infrastructure This is still a work in progress, but we have sightlines towards a -[general rewriting infrastructure](GenericDAGRewriter.md) for transforming DAG +[general rewriting infrastructure](RationaleGenericDAGRewriter.md) for transforming DAG tiles into other DAG tiles, using a declarative pattern format. DAG to DAG rewriting is a generalized solution for many common compiler optimizations, lowerings, and other rewrites and having an IR enables us to invest in building diff --git a/mlir/docs/Rationale/RationaleGenericDAGRewriter.md b/mlir/docs/Rationale/RationaleGenericDAGRewriter.md new file mode 100644 index 00000000000000..289750bdb4abde --- /dev/null +++ b/mlir/docs/Rationale/RationaleGenericDAGRewriter.md @@ -0,0 +1,286 @@ +# Generic DAG Rewriter Infrastructure Rationale + +This document details the rationale behind a general DAG-to-DAG rewrite +infrastructure for MLIR. For up-to-date documentation on the user facing API, +please look at the main [Pattern Rewriting document](../PatternRewriter.md). + +## Introduction and Motivation + +The goal of a compiler IR is to represent code - at various levels of +abstraction which pose different sets of tradeoffs in terms of representational +capabilities and ease of transformation. However, the ability to represent code +is not itself very useful - you also need to be able to implement those +transformations. + +There are many different types of compiler transformations, but this document +focuses on a particularly important class of transformation that comes up +repeatedly at scale, and is important for the goals of MLIR: matching one DAG of +operations, and replacing with another. This is an integral part of many +compilers and necessary for peephole optimizations like "eliminate identity +nodes" or "replace x+0 with x", a generalized canonicalization framework (e.g. +Instruction Combiner in LLVM), as well as a useful abstraction to implement +optimization algorithms for optimization algorithms for IR at multiple levels. + +A particular strength of MLIR (and a major difference vs other compiler +infrastructures like LLVM, GCC, XLA, TensorFlow, etc) is that it uses a single +compiler IR to represent code at multiple levels of abstraction: an MLIR +operation can be a "TensorFlow operation", an "XLA HLO", an Affine Loop Nest, an +LLVM IR instruction (transitively including X86, Lanai, PTX, and other target +specific instructions), or anything else that the MLIR operation system can +reasonably express. Given that MLIR spans such a wide range of different problem +scopes, a single infrastructure for performing graph-to-graph rewrites can help +solve many diverse domain challenges. + +[Static single assignment](https://en.wikipedia.org/wiki/Static_single_assignment_form) +(SSA) representations like MLIR make it easy to access the operands and "users" +of an operation. As such, a natural abstraction for these graph-to-graph +rewrites is that of DAG pattern matching: clients define DAG tile patterns +(where a tile is a sequence of operations defining a subgraph of the DAG), and +each pattern includes a result DAG to produce and the cost of the result (or, +inversely, the benefit of doing the replacement). A common infrastructure +efficiently finds and performs the rewrites. + +While this concept is simple, the details are more nuanced. This document +defines and explores a set of abstractions that can solve a wide range of +different problems, and be applied to many different sorts of problems that MLIR +is - and is expected to - face over time. We do this by separating the pattern +application algorithm from the "driver" of the computation loop, and make space +for the patterns to be defined declaratively. + +### Constant folding + +A degenerate but pervasive case of DAG-to-DAG pattern matching is constant +folding: an operation whose operands contain constants can often be folded to a +result constant value. + +MLIR operations may override a +[`fold`](../Canonicalization.md/#canonicalizing-with-fold) routine, which +exposes a simpler API compared to a general DAG-to-DAG pattern matcher, and +allows for it to be applicable in cases that a generic matcher would not. For +example, a DAG-rewrite can remove arbitrary nodes in the current function, which +could invalidate iterators. Constant folding as an API does not remove any +nodes, it just provides a (list of) constant values and allows the clients to +update their data structures as necessary. + +## Related Work + +There is a huge amount of related work to consider, given that nearly every +compiler in existence has to solve this problem many times over. One unifying +problem is that all of these systems are designed to solve one particular, and +usually, narrow problem: MLIR on the other hand would like to solve many of +these problems within a single infrastructure. Here are a few related graph +rewrite systems, along with the pros and cons of their work (The most similar +design to the infrastructure present in MLIR is the LLVM DAG-to-DAG instruction +selection algorithm). + +### AST-Level Pattern Matchers + +The literature is full of source-to-source translators which transform +identities in order to improve performance (e.g. transforming `X*0` into `0`). +One large example is the GCC `fold` function, which performs +[many optimizations](https://github.com/gcc-mirror/gcc/blob/master/gcc/fold-const.c) +on ASTs. Clang has +[similar routines](https://clang.llvm.org/docs/InternalsManual.html#constant-folding-in-the-clang-ast) +for simple constant folding of expressions (as required by the C++ standard) but +doesn't perform general optimizations on its ASTs. + +The primary downside of AST optimizers is that you can't see across operations +that have multiple uses. It is +[well known in literature](https://llvm.org/pubs/2008-06-LCTES-ISelUsingSSAGraphs.pdf) +that DAG pattern matching is more powerful than tree pattern matching, but on +the other hand, DAG pattern matching can lead to duplication of computation +which needs to be checked for. + +### "Combiners" and other peephole optimizers + +Compilers end up with a lot of peephole optimizers for various things, e.g. the +GCC +["combine" routines](https://github.com/gcc-mirror/gcc/blob/master/gcc/combine.c) +(which try to merge two machine instructions into a single one), the LLVM +[Inst Combine](https://github.com/llvm/llvm-project/tree/master/llvm/lib/Transforms/InstCombine) +[pass](https://llvm.org/docs/Passes.html#instcombine-combine-redundant-instructions), +LLVM's +[DAG Combiner](https://github.com/llvm-mirror/llvm/blob/master/lib/CodeGen/SelectionDAG/DAGCombiner.cpp), +the Swift compiler's +[SIL Combiner](https://github.com/apple/swift/tree/master/lib/SILOptimizer/SILCombiner), +etc. These generally match one or more operations and produce zero or more +operations as a result. The LLVM +[Legalization](https://github.com/llvm/llvm-project/tree/master/llvm/lib/CodeGen/SelectionDAG) +infrastructure has a different outer loop but otherwise works the same way. + +These passes have a lot of diversity, but also have a unifying structure: they +mostly have a worklist outer loop which visits operations. They then use a +visitor pattern (or equivalent) to switch over the class of operation and +dispatch to a method. That method contains a long list of hand-written C++ code +that pattern-matches various special cases. LLVM introduced a "match" function +that allows writing patterns in a somewhat more declarative style using template +metaprogramming (MLIR has similar facilities). Here's a simple example: + +```c++ + // Y - (X + 1) --> ~X + Y + if (match(Op1, m_OneUse(m_Add(m_Value(X), m_One())))) + return BinaryOperator::CreateAdd(Builder.CreateNot(X), Op0); +``` + +Here is a somewhat more complicated one (this is not the biggest or most +complicated :) + +```c++ + // C2 is ODD + // LHS = XOR(Y,C1), Y = AND(Z,C2), C1==(C2+1) => LHS == NEG(OR(Z, ~C2)) + // ADD(LHS, RHS) == SUB(RHS, OR(Z, ~C2)) + if (match(LHS, m_Xor(m_Value(Y), m_APInt(C1)))) + if (C1->countTrailingZeros() == 0) + if (match(Y, m_And(m_Value(Z), m_APInt(C2))) && *C1 == (*C2 + 1)) { + Value NewOr = Builder.CreateOr(Z, ~(*C2)); + return Builder.CreateSub(RHS, NewOr, "sub"); + } +``` + +These systems are simple to set up, and pattern matching templates have some +advantages (they are extensible for new sorts of sub-patterns, look compact at +point of use). On the other hand, they have lots of well known problems, for +example: + +* These patterns are very error prone to write, and contain lots of + redundancies. +* The IR being matched often has identities (e.g. when matching commutative + operators) and the C++ code has to handle it manually - take a look at + [the full code](https://github.com/llvm/llvm-project/blob/c0b5000bd848303320c03f80fbf84d71e74518c9/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp#L767) + for `checkForNegativeOperand` that defines the second pattern). +* The matching code compiles slowly, both because it generates tons of code + and because the templates instantiate slowly. +* Adding new patterns (e.g. for count leading zeros in the example above) is + awkward and doesn't often happen. +* The cost model for these patterns is not really defined - it is emergent + based on the order the patterns are matched in code. +* They are non-extensible without rebuilding the compiler. +* It isn't practical to apply theorem provers and other tools to these + patterns - they cannot be reused for other purposes. + +In addition to structured "combiners" like these, there are lots of ad-hoc +systems like the +[LLVM Machine code peephole optimizer](http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp?view=markup) +which are related. + +### LLVM's DAG-to-DAG Instruction Selection Infrastructure + +The instruction selection subsystem in LLVM is the result of many years worth of +iteration and discovery, driven by the need for LLVM to support code generation +for lots of targets, the complexity of code generators for modern instruction +sets (e.g. X86), and the fanatical pursuit of reusing code across targets. Eli +Bendersky wrote a +[nice short overview](https://eli.thegreenplace.net/2013/02/25/a-deeper-look-into-the-llvm-code-generator-part-1) +of how this works, and the +[LLVM documentation](https://llvm.org/docs/CodeGenerator.html#select-instructions-from-dag) +describes it in more depth including its advantages and limitations. It allows +writing patterns like this. + +``` +def : Pat<(or GR64:$src, (not (add GR64:$src, 1))), + (BLCI64rr GR64:$src)>; +``` + +This example defines a matcher for the +["blci" instruction](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#TBM_\(Trailing_Bit_Manipulation\)) +in the +[X86 target description](https://github.com/llvm/llvm-project/blob/master/llvm/lib/Target/X86/X86InstrInfo.td), +there are many others in that file (look for `Pat<>` patterns, since they aren't +entangled in details of the compiler like assembler/disassembler generation +logic). + +For the purposes of MLIR, there is much to like about this system, for example: + +* It is defined in a declarative format. +* It is extensible to target-defined operations. +* It automates matching across identities, like commutative patterns. +* It allows custom abstractions and intense factoring of target-specific + commonalities. +* It generates compact code - it compiles into a state machine, which is + interpreted. +* It allows the instruction patterns to be defined and reused for multiple + purposes. +* The patterns are "type checked" at compile time, detecting lots of bugs + early and eliminating redundancy from the pattern specifications. +* It allows the use of general C++ code for weird/complex cases. + +While there is a lot that is good here, there are also a few undesirable bits: + +* The representation is specifically designed and only applicable for + instruction selection, meaning that the directly adjacent problems like the + DAGCombiner and Legalizer can't use it. +* This isn't extensible at compiler runtime, you have to rebuild the compiler + to extend it. +* The error messages when failing to match a pattern + [are not exactly optimal](https://www.google.com/search?q=llvm+cannot+select). +* It has lots of implementation problems and limitations (e.g. can't write a + pattern for a multi-result operation) as a result of working with the + awkward SelectionDAG representation and being designed and implemented on + demand. +* Organic growth over time has left lots of sharp edges. + +### Summary + +MLIR faces a wide range of pattern matching and graph rewrite problems, and one +of the major advantages of having a common representation for code at multiple +levels is that it allows for investing in - and highly leveraging - a single +infrastructure for doing this sort of work. + +## Goals + +We'd like the to encompass many problems in the MLIR space, including 1-to-N +expansions (e.g. such as in type legalization during instruction selection when +an add of one bit width may be split into multiple adds of a smaller bit width), +M-to-1 patterns (e.g. when converting a multiply+add into a single muladd +operation), as well as general M-to-N patterns (e.g. instruction selection for +target instructions). Patterns have a benefit associated with them, and the +common infrastructure should be responsible for sorting out the highest benefit +match for a given application. + +We separate the task of picking a particular optimal pattern from a given root +node, the algorithm used to rewrite an entire graph given a particular set of +goals, and the definition of the patterns themselves. We do this because DAG +tile pattern matching is NP complete. Additionally, we would like to support +iterative rewrite algorithms that progressively transform the input program +through multiple steps. Furthermore, we would like to support many different +sorts of clients across the MLIR stack, and they may have different tolerances +for compile time cost, different demands for optimality, and other algorithmic +goals or constraints. + +We aim for MLIR transformations to be easy to implement and reduce the +likelihood for compiler bugs. We expect there to be a very large number of +patterns that are defined over time, and we believe that these sorts of patterns +will have a very large number of legality/validity constraints - many of which +are difficult to reason about in a consistent way, may be target specific, and +whose implementation may be particularly bug-prone. As such, we aim to design +the API around pattern definition to be simple, resilient to programmer errors, +and allow separation of concerns between the legality of the nodes generated +from the idea of the pattern being defined. + +Finally, error handling is a topmost concern, we want pattern match failures to +be diagnosable in a reasonable way. This is a difficult problem in general, as +the space of malfunction is too great to be fully enumerated and handled +optimally, but MLIR is already designed to represent the provenance of an +operation well. The aim of the pattern rewriting infrastructure is simply to +propagate that provenance information precisely, as well as diagnose pattern +match failures with the rationale for why a set of patterns do not apply. + +### Non goals + +The pattern infrastructure does not aim to solve all compiler problems, it is +simply a DAG-to-DAG pattern matching system. Compiler algorithms that require +global dataflow analysis (e.g. common subexpression elimination, conditional +constant propagation, and many many others) will not be directly solved by this +infrastructure. + +This infrastructure is limited to DAG patterns, which (by definition) prevent +the patterns from seeing across cycles in a graph. In an SSA-based IR like MLIR, +this means that these patterns don't see across basic block arguments. We +consider this acceptable given the set of problems we are trying to solve - we +don't know of any other system that attempts to do so, and consider the payoff +of worrying about this to be low. + +This design includes the ability for DAG patterns to have associated benefits, +but those benefits are defined in terms of magic numbers (typically equal to the +number of nodes being replaced). For any given application, the units of magic +numbers will have to be defined. diff --git a/mlir/docs/Tutorials/Toy/Ch-3.md b/mlir/docs/Tutorials/Toy/Ch-3.md index 5353b58acddf8d..7976d7c30db599 100644 --- a/mlir/docs/Tutorials/Toy/Ch-3.md +++ b/mlir/docs/Tutorials/Toy/Ch-3.md @@ -13,7 +13,7 @@ We divide compiler transformations into two categories: local and global. In this chapter, we focus on how to leverage the Toy Dialect and its high-level semantics to perform local pattern-match transformations that would be difficult in LLVM. For this, we use MLIR's -[Generic DAG Rewriter](../../GenericDAGRewriter.md). +[Generic DAG Rewriter](../../PatternRewriter.md). There are two methods that can be used to implement pattern-match transformations: 1. Imperative, C++ pattern-match and rewrite 2. Declarative, From c2807b2e56c05080354818c221ed4a35abd8a5c8 Mon Sep 17 00:00:00 2001 From: Alex Lorenz Date: Thu, 13 Aug 2020 12:05:57 -0700 Subject: [PATCH 19/23] [darwin][driver] fix isMacosxVersionLT minimum supported OS version check The previous Driver's triple check only worked for -target, but not for -arch -mmacosx-version-min invocations --- clang/lib/Driver/ToolChains/Darwin.h | 6 +++++- .../macos-apple-silicon-slice-link-libs-darwin-only.cpp | 6 ++++++ clang/test/Driver/macos-apple-silicon-slice-link-libs.cpp | 1 + 3 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 clang/test/Driver/macos-apple-silicon-slice-link-libs-darwin-only.cpp diff --git a/clang/lib/Driver/ToolChains/Darwin.h b/clang/lib/Driver/ToolChains/Darwin.h index 64c252efea7df1..e67b2c5c87cd75 100644 --- a/clang/lib/Driver/ToolChains/Darwin.h +++ b/clang/lib/Driver/ToolChains/Darwin.h @@ -436,7 +436,11 @@ class LLVM_LIBRARY_VISIBILITY Darwin : public MachO { bool isMacosxVersionLT(unsigned V0, unsigned V1 = 0, unsigned V2 = 0) const { assert(isTargetMacOS() && getTriple().isMacOSX() && "Unexpected call for non OS X target!"); - VersionTuple MinVers = getTriple().getMinimumSupportedOSVersion(); + // The effective triple might not be initialized yet, so construct a + // pseudo-effective triple to get the minimum supported OS version. + VersionTuple MinVers = + llvm::Triple(getTriple().getArchName(), "apple", "macos") + .getMinimumSupportedOSVersion(); return (!MinVers.empty() && MinVers > TargetVersion ? MinVers : TargetVersion) < VersionTuple(V0, V1, V2); diff --git a/clang/test/Driver/macos-apple-silicon-slice-link-libs-darwin-only.cpp b/clang/test/Driver/macos-apple-silicon-slice-link-libs-darwin-only.cpp new file mode 100644 index 00000000000000..ec3b710c4da8cc --- /dev/null +++ b/clang/test/Driver/macos-apple-silicon-slice-link-libs-darwin-only.cpp @@ -0,0 +1,6 @@ +// RUN: %clang -### -arch arm64 -mmacosx-version-min=10.7 %s 2>&1 | FileCheck -check-prefix=ARM64-10_7 %s +// RUN: %clang -### -arch x86_64 -mmacosx-version-min=10.7 %s 2>&1 | FileCheck -check-prefix=x86_64-10_7 %s +// REQUIRES: system-darwin + +// ARM64-10_7-NOT: -lcrt1.10.6.o +// x86_64-10_7: -lcrt1.10.6.o diff --git a/clang/test/Driver/macos-apple-silicon-slice-link-libs.cpp b/clang/test/Driver/macos-apple-silicon-slice-link-libs.cpp index 522fda34987e9d..4a2a029c736fc9 100644 --- a/clang/test/Driver/macos-apple-silicon-slice-link-libs.cpp +++ b/clang/test/Driver/macos-apple-silicon-slice-link-libs.cpp @@ -1,5 +1,6 @@ // RUN: %clang -### -target arm64-apple-macos10.7 %s 2>&1 | FileCheck -check-prefix=ARM64-10_7 %s // RUN: %clang -### -target x86_64-apple-macos10.7 %s 2>&1 | FileCheck -check-prefix=x86_64-10_7 %s +// RUN: %clang -### -target arm64-apple-darwin6 %s 2>&1 | FileCheck -check-prefix=ARM64-10_7 %s // RUN: %clang -### -target arm64-apple-macos10.5 %s 2>&1 | FileCheck -check-prefix=ARM64-10_5 %s // RUN: %clang -### -target x86_64-apple-macos10.5 %s 2>&1 | FileCheck -check-prefix=x86_64-10_5 %s From 5bcd32b7449482d6079d968c08d07a3890c86912 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20Gr=C3=A4nitz?= Date: Tue, 11 Aug 2020 13:39:54 +0200 Subject: [PATCH 20/23] [ORC][NFC] Fix typo in comment --- llvm/lib/ExecutionEngine/Orc/LLJIT.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp index ffe156dcdd62cc..531a71d50b9eca 100644 --- a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp +++ b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp @@ -928,7 +928,7 @@ LLJIT::PlatformSupport::~PlatformSupport() {} Error LLJITBuilderState::prepareForConstruction() { - LLVM_DEBUG(dbgs() << "Preparing to create LLIT instance...\n"); + LLVM_DEBUG(dbgs() << "Preparing to create LLJIT instance...\n"); if (!JTMB) { LLVM_DEBUG({ From 5092039644eac33bdd9e494045a9f99654f792b3 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Thu, 13 Aug 2020 15:43:43 -0400 Subject: [PATCH 21/23] [gn build] (manually) port d650cbc349ccc4 --- .../gn/secondary/llvm/lib/InterfaceStub/BUILD.gn | 13 +++++++++++++ llvm/utils/gn/secondary/llvm/lib/TextAPI/BUILD.gn | 2 -- .../gn/secondary/llvm/tools/llvm-elfabi/BUILD.gn | 2 +- llvm/utils/gn/secondary/llvm/unittests/BUILD.gn | 1 + .../secondary/llvm/unittests/InterfaceStub/BUILD.gn | 9 +++++++++ .../gn/secondary/llvm/unittests/TextAPI/BUILD.gn | 1 - 6 files changed, 24 insertions(+), 4 deletions(-) create mode 100644 llvm/utils/gn/secondary/llvm/lib/InterfaceStub/BUILD.gn create mode 100644 llvm/utils/gn/secondary/llvm/unittests/InterfaceStub/BUILD.gn diff --git a/llvm/utils/gn/secondary/llvm/lib/InterfaceStub/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/InterfaceStub/BUILD.gn new file mode 100644 index 00000000000000..0d157bdd6751f0 --- /dev/null +++ b/llvm/utils/gn/secondary/llvm/lib/InterfaceStub/BUILD.gn @@ -0,0 +1,13 @@ +static_library("InterfaceStub") { + output_name = "LLVMInterfaceStub" + deps = [ + "//llvm/lib/Bitstream/Reader", + "//llvm/lib/Support", + ] + + sources = [ + "ELFObjHandler.cpp", + "ELFStub.cpp", + "TBEHandler.cpp", + ] +} diff --git a/llvm/utils/gn/secondary/llvm/lib/TextAPI/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/TextAPI/BUILD.gn index aa695da4bf30d1..6df06f941d4a99 100644 --- a/llvm/utils/gn/secondary/llvm/lib/TextAPI/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/TextAPI/BUILD.gn @@ -6,8 +6,6 @@ static_library("TextAPI") { ] include_dirs = [ "." ] sources = [ - "ELF/ELFStub.cpp", - "ELF/TBEHandler.cpp", "MachO/Architecture.cpp", "MachO/ArchitectureSet.cpp", "MachO/InterfaceFile.cpp", diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-elfabi/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-elfabi/BUILD.gn index dd12e20a924e30..0b3cb1ec41ba2d 100644 --- a/llvm/utils/gn/secondary/llvm/tools/llvm-elfabi/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/tools/llvm-elfabi/BUILD.gn @@ -1,11 +1,11 @@ executable("llvm-elfabi") { deps = [ + "//llvm/lib/InterfaceStub", "//llvm/lib/Object", "//llvm/lib/Support", "//llvm/lib/TextAPI", ] sources = [ - "ELFObjHandler.cpp", "ErrorCollector.cpp", "llvm-elfabi.cpp", ] diff --git a/llvm/utils/gn/secondary/llvm/unittests/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/BUILD.gn index 3d960d501e4255..5a235671e5c853 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/BUILD.gn @@ -23,6 +23,7 @@ group("unittests") { "Frontend:LLVMFrontendTests", "FuzzMutate:FuzzMutateTests", "IR:IRTests", + "InterfaceStub:InterfaceStubTests", "LineEditor:LineEditorTests", "Linker:LinkerTests", "MC:MCTests", diff --git a/llvm/utils/gn/secondary/llvm/unittests/InterfaceStub/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/InterfaceStub/BUILD.gn new file mode 100644 index 00000000000000..cb508882778c09 --- /dev/null +++ b/llvm/utils/gn/secondary/llvm/unittests/InterfaceStub/BUILD.gn @@ -0,0 +1,9 @@ +import("//llvm/utils/unittest/unittest.gni") + +unittest("InterfaceStubTests") { + deps = [ + "//llvm/lib/InterfaceStub", + "//llvm/lib/Testing/Support", + ] + sources = [ "ELFYAMLTest.cpp" ] +} diff --git a/llvm/utils/gn/secondary/llvm/unittests/TextAPI/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/TextAPI/BUILD.gn index 8e51943b7e2631..0ebeb4f773bbc3 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/TextAPI/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/TextAPI/BUILD.gn @@ -6,7 +6,6 @@ unittest("TextAPITests") { "//llvm/lib/TextAPI", ] sources = [ - "ELFYAMLTest.cpp", "TextStubV1Tests.cpp", "TextStubV2Tests.cpp", "TextStubV3Tests.cpp", From 661d83aa386fd2c2df769a2509f7c1bd5ba99a7b Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Thu, 13 Aug 2020 15:48:03 -0400 Subject: [PATCH 22/23] [gn build] (manually) port d650cbc349ccc4 better --- llvm/utils/gn/secondary/llvm/lib/InterfaceStub/BUILD.gn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/llvm/lib/InterfaceStub/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/InterfaceStub/BUILD.gn index 0d157bdd6751f0..4778cb590bfa68 100644 --- a/llvm/utils/gn/secondary/llvm/lib/InterfaceStub/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/InterfaceStub/BUILD.gn @@ -1,7 +1,7 @@ static_library("InterfaceStub") { output_name = "LLVMInterfaceStub" deps = [ - "//llvm/lib/Bitstream/Reader", + "//llvm/lib/Object", "//llvm/lib/Support", ] From 21810b0e14287a7b885a7822c6e19609e3b902c8 Mon Sep 17 00:00:00 2001 From: Cameron McInally Date: Thu, 13 Aug 2020 14:47:34 -0500 Subject: [PATCH 23/23] [SVE] Lower fixed length vector integer UMIN/UMAX Differential Revision: https://reviews.llvm.org/D85926 --- .../Target/AArch64/AArch64ISelLowering.cpp | 12 +- .../AArch64/sve-fixed-length-int-minmax.ll | 796 ++++++++++++++++++ 2 files changed, 806 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 6f3475e5f97dfe..50da2bf1b71337 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1001,6 +1001,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::SMAX, MVT::v2i64, Custom); setOperationAction(ISD::SMIN, MVT::v1i64, Custom); setOperationAction(ISD::SMIN, MVT::v2i64, Custom); + setOperationAction(ISD::UMAX, MVT::v1i64, Custom); + setOperationAction(ISD::UMAX, MVT::v2i64, Custom); + setOperationAction(ISD::UMIN, MVT::v1i64, Custom); + setOperationAction(ISD::UMIN, MVT::v2i64, Custom); } } @@ -1121,6 +1125,8 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) { setOperationAction(ISD::STORE, VT, Custom); setOperationAction(ISD::SUB, VT, Custom); setOperationAction(ISD::TRUNCATE, VT, Custom); + setOperationAction(ISD::UMAX, VT, Custom); + setOperationAction(ISD::UMIN, VT, Custom); setOperationAction(ISD::XOR, VT, Custom); setOperationAction(ISD::ZERO_EXTEND, VT, Custom); } @@ -3634,12 +3640,14 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED, /*OverrideNEON=*/true); case ISD::UMIN: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED, + /*OverrideNEON=*/true); case ISD::SMAX: return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED, /*OverrideNEON=*/true); case ISD::UMAX: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED, + /*OverrideNEON=*/true); case ISD::SRA: case ISD::SRL: case ISD::SHL: diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-minmax.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-minmax.ll index 6c377786250743..cc9e172de5f889 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-minmax.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-minmax.ll @@ -765,6 +765,751 @@ define void @smin_v32i64(<32 x i64>* %a, <32 x i64>* %b) #0 { ret void } +; +; UMAX +; + +; Don't use SVE for 64-bit vectors. +define <8 x i8> @umax_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 { +; CHECK-LABEL: umax_v8i8: +; CHECK: umax v0.8b, v0.8b, v1.8b +; CHECK: ret + %res = call <8 x i8> @llvm.umax.v8i8(<8 x i8> %op1, <8 x i8> %op2) + ret <8 x i8> %res +} + +; Don't use SVE for 128-bit vectors. +define <16 x i8> @umax_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 { +; CHECK-LABEL: umax_v16i8: +; CHECK: umax v0.16b, v0.16b, v1.16b +; CHECK: ret + %res = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %op1, <16 x i8> %op2) + ret <16 x i8> %res +} + +define void @umax_v32i8(<32 x i8>* %a, <32 x i8>* %b) #0 { +; CHECK-LABEL: umax_v32i8: +; CHECK: ptrue [[PG:p[0-9]+]].b, vl32 +; CHECK-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0] +; CHECK-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1] +; CHECK-NEXT: umax [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b, [[OP2]].b +; CHECK-NEXT: st1b { [[RES]].b }, [[PG]], [x0] +; CHECK-NEXT: ret + %op1 = load <32 x i8>, <32 x i8>* %a + %op2 = load <32 x i8>, <32 x i8>* %b + %res = call <32 x i8> @llvm.umax.v32i8(<32 x i8> %op1, <32 x i8> %op2) + store <32 x i8> %res, <32 x i8>* %a + ret void +} + +define void @umax_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 { +; CHECK-LABEL: umax_v64i8: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].b, vl64 +; VBITS_GE_512-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0] +; VBITS_GE_512-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1] +; VBITS_GE_512-NEXT: umax [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b, [[OP2]].b +; VBITS_GE_512-NEXT: st1b { [[RES]].b }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret +; +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32 +; VBITS_EQ_256-DAG: mov w[[A:[0-9]+]], #32 +; VBITS_EQ_256-DAG: ld1b { [[OP1_LO:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A]]] +; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1b { [[OP2_LO:z[0-9]+]].b }, [[PG]]/z, [x1, x[[A]]] +; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1] +; VBITS_EQ_256-DAG: umax [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[OP1_LO]].b, [[OP2_LO]].b +; VBITS_EQ_256-DAG: umax [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[OP1_HI]].b, [[OP2_HI]].b +; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0, x[[A]]] +; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0] +; VBITS_EQ_256-NEXT: ret + %op1 = load <64 x i8>, <64 x i8>* %a + %op2 = load <64 x i8>, <64 x i8>* %b + %res = call <64 x i8> @llvm.umax.v64i8(<64 x i8> %op1, <64 x i8> %op2) + store <64 x i8> %res, <64 x i8>* %a + ret void +} + +define void @umax_v128i8(<128 x i8>* %a, <128 x i8>* %b) #0 { +; CHECK-LABEL: umax_v128i8: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].b, vl128 +; VBITS_GE_1024-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0] +; VBITS_GE_1024-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1] +; VBITS_GE_1024-NEXT: umax [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b, [[OP2]].b +; VBITS_GE_1024-NEXT: st1b { [[RES]].b }, [[PG]], [x0] +; VBITS_GE_1024-NEXT: ret + %op1 = load <128 x i8>, <128 x i8>* %a + %op2 = load <128 x i8>, <128 x i8>* %b + %res = call <128 x i8> @llvm.umax.v128i8(<128 x i8> %op1, <128 x i8> %op2) + store <128 x i8> %res, <128 x i8>* %a + ret void +} + +define void @umax_v256i8(<256 x i8>* %a, <256 x i8>* %b) #0 { +; CHECK-LABEL: umax_v256i8: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].b, vl256 +; VBITS_GE_2048-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0] +; VBITS_GE_2048-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1] +; VBITS_GE_2048-NEXT: umax [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b, [[OP2]].b +; VBITS_GE_2048-NEXT: st1b { [[RES]].b }, [[PG]], [x0] +; VBITS_GE_2048-NEXT: ret + %op1 = load <256 x i8>, <256 x i8>* %a + %op2 = load <256 x i8>, <256 x i8>* %b + %res = call <256 x i8> @llvm.umax.v256i8(<256 x i8> %op1, <256 x i8> %op2) + store <256 x i8> %res, <256 x i8>* %a + ret void +} + +; Don't use SVE for 64-bit vectors. +define <4 x i16> @umax_v4i16(<4 x i16> %op1, <4 x i16> %op2) #0 { +; CHECK-LABEL: umax_v4i16: +; CHECK: umax v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ret + %res = call <4 x i16> @llvm.umax.v4i16(<4 x i16> %op1, <4 x i16> %op2) + ret <4 x i16> %res +} + +; Don't use SVE for 128-bit vectors. +define <8 x i16> @umax_v8i16(<8 x i16> %op1, <8 x i16> %op2) #0 { +; CHECK-LABEL: umax_v8i16: +; CHECK: umax v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret + %res = call <8 x i16> @llvm.umax.v8i16(<8 x i16> %op1, <8 x i16> %op2) + ret <8 x i16> %res +} + +define void @umax_v16i16(<16 x i16>* %a, <16 x i16>* %b) #0 { +; CHECK-LABEL: umax_v16i16: +; CHECK: ptrue [[PG:p[0-9]+]].h, vl16 +; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0] +; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1] +; CHECK-NEXT: umax [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h +; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; CHECK-NEXT: ret + %op1 = load <16 x i16>, <16 x i16>* %a + %op2 = load <16 x i16>, <16 x i16>* %b + %res = call <16 x i16> @llvm.umax.v16i16(<16 x i16> %op1, <16 x i16> %op2) + store <16 x i16> %res, <16 x i16>* %a + ret void +} + +define void @umax_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 { +; CHECK-LABEL: umax_v32i16: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32 +; VBITS_GE_512-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_512-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1] +; VBITS_GE_512-NEXT: umax [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h +; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1] +; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: umax [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP1_LO]].h, [[OP2_LO]].h +; VBITS_EQ_256-DAG: umax [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP1_HI]].h, [[OP2_HI]].h +; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-NEXT: ret + %op1 = load <32 x i16>, <32 x i16>* %a + %op2 = load <32 x i16>, <32 x i16>* %b + %res = call <32 x i16> @llvm.umax.v32i16(<32 x i16> %op1, <32 x i16> %op2) + store <32 x i16> %res, <32 x i16>* %a + ret void +} + +define void @umax_v64i16(<64 x i16>* %a, <64 x i16>* %b) #0 { +; CHECK-LABEL: umax_v64i16: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64 +; VBITS_GE_1024-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_1024-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1] +; VBITS_GE_1024-NEXT: umax [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h +; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; VBITS_GE_1024-NEXT: ret + %op1 = load <64 x i16>, <64 x i16>* %a + %op2 = load <64 x i16>, <64 x i16>* %b + %res = call <64 x i16> @llvm.umax.v64i16(<64 x i16> %op1, <64 x i16> %op2) + store <64 x i16> %res, <64 x i16>* %a + ret void +} + +define void @umax_v128i16(<128 x i16>* %a, <128 x i16>* %b) #0 { +; CHECK-LABEL: umax_v128i16: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128 +; VBITS_GE_2048-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_2048-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1] +; VBITS_GE_2048-NEXT: umax [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h +; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; VBITS_GE_2048-NEXT: ret + %op1 = load <128 x i16>, <128 x i16>* %a + %op2 = load <128 x i16>, <128 x i16>* %b + %res = call <128 x i16> @llvm.umax.v128i16(<128 x i16> %op1, <128 x i16> %op2) + store <128 x i16> %res, <128 x i16>* %a + ret void +} + +; Don't use SVE for 64-bit vectors. +define <2 x i32> @umax_v2i32(<2 x i32> %op1, <2 x i32> %op2) #0 { +; CHECK-LABEL: umax_v2i32: +; CHECK: umax v0.2s, v0.2s, v1.2s +; CHECK-NEXT: ret + %res = call <2 x i32> @llvm.umax.v2i32(<2 x i32> %op1, <2 x i32> %op2) + ret <2 x i32> %res +} + +; Don't use SVE for 128-bit vectors. +define <4 x i32> @umax_v4i32(<4 x i32> %op1, <4 x i32> %op2) #0 { +; CHECK-LABEL: umax_v4i32: +; CHECK: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %res = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %op1, <4 x i32> %op2) + ret <4 x i32> %res +} + +define void @umax_v8i32(<8 x i32>* %a, <8 x i32>* %b) #0 { +; CHECK-LABEL: umax_v8i32: +; CHECK: ptrue [[PG:p[0-9]+]].s, vl8 +; CHECK-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0] +; CHECK-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1] +; CHECK-NEXT: umax [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s +; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; CHECK-NEXT: ret + %op1 = load <8 x i32>, <8 x i32>* %a + %op2 = load <8 x i32>, <8 x i32>* %b + %res = call <8 x i32> @llvm.umax.v8i32(<8 x i32> %op1, <8 x i32> %op2) + store <8 x i32> %res, <8 x i32>* %a + ret void +} + +define void @umax_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 { +; CHECK-LABEL: umax_v16i32: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16 +; VBITS_GE_512-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_512-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1] +; VBITS_GE_512-NEXT: umax [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s +; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1] +; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: umax [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP1_LO]].s, [[OP2_LO]].s +; VBITS_EQ_256-DAG: umax [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP1_HI]].s, [[OP2_HI]].s +; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-NEXT: ret + %op1 = load <16 x i32>, <16 x i32>* %a + %op2 = load <16 x i32>, <16 x i32>* %b + %res = call <16 x i32> @llvm.umax.v16i32(<16 x i32> %op1, <16 x i32> %op2) + store <16 x i32> %res, <16 x i32>* %a + ret void +} + +define void @umax_v32i32(<32 x i32>* %a, <32 x i32>* %b) #0 { +; CHECK-LABEL: umax_v32i32: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32 +; VBITS_GE_1024-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_1024-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1] +; VBITS_GE_1024-NEXT: umax [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s +; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; VBITS_GE_1024-NEXT: ret + %op1 = load <32 x i32>, <32 x i32>* %a + %op2 = load <32 x i32>, <32 x i32>* %b + %res = call <32 x i32> @llvm.umax.v32i32(<32 x i32> %op1, <32 x i32> %op2) + store <32 x i32> %res, <32 x i32>* %a + ret void +} + +define void @umax_v64i32(<64 x i32>* %a, <64 x i32>* %b) #0 { +; CHECK-LABEL: umax_v64i32: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64 +; VBITS_GE_2048-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_2048-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1] +; VBITS_GE_2048-NEXT: umax [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s +; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; VBITS_GE_2048-NEXT: ret + %op1 = load <64 x i32>, <64 x i32>* %a + %op2 = load <64 x i32>, <64 x i32>* %b + %res = call <64 x i32> @llvm.umax.v64i32(<64 x i32> %op1, <64 x i32> %op2) + store <64 x i32> %res, <64 x i32>* %a + ret void +} + +; Vector i64 max are not legal for NEON so use SVE when available. +define <1 x i64> @umax_v1i64(<1 x i64> %op1, <1 x i64> %op2) #0 { +; CHECK-LABEL: umax_v1i64: +; CHECK: ptrue [[PG:p[0-9]+]].d, vl1 +; CHECK-NEXT: umax z0.d, [[PG]]/m, z0.d, z1.d +; CHECK-NEXT: ret + %res = call <1 x i64> @llvm.umax.v1i64(<1 x i64> %op1, <1 x i64> %op2) + ret <1 x i64> %res +} + +; Vector i64 max are not legal for NEON so use SVE when available. +define <2 x i64> @umax_v2i64(<2 x i64> %op1, <2 x i64> %op2) #0 { +; CHECK-LABEL: umax_v2i64: +; CHECK: ptrue [[PG:p[0-9]+]].d, vl2 +; CHECK-NEXT: umax z0.d, [[PG]]/m, z0.d, z1.d +; CHECK-NEXT: ret + %res = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %op1, <2 x i64> %op2) + ret <2 x i64> %res +} + +define void @umax_v4i64(<4 x i64>* %a, <4 x i64>* %b) #0 { +; CHECK-LABEL: umax_v4i64: +; CHECK: ptrue [[PG:p[0-9]+]].d, vl4 +; CHECK-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0] +; CHECK-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1] +; CHECK-NEXT: umax [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d +; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; CHECK-NEXT: ret + %op1 = load <4 x i64>, <4 x i64>* %a + %op2 = load <4 x i64>, <4 x i64>* %b + %res = call <4 x i64> @llvm.umax.v4i64(<4 x i64> %op1, <4 x i64> %op2) + store <4 x i64> %res, <4 x i64>* %a + ret void +} + +define void @umax_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 { +; CHECK-LABEL: umax_v8i64: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8 +; VBITS_GE_512-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_512-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1] +; VBITS_GE_512-NEXT: umax [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d +; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1] +; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: umax [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP1_LO]].d, [[OP2_LO]].d +; VBITS_EQ_256-DAG: umax [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP1_HI]].d, [[OP2_HI]].d +; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-NEXT: ret + %op1 = load <8 x i64>, <8 x i64>* %a + %op2 = load <8 x i64>, <8 x i64>* %b + %res = call <8 x i64> @llvm.umax.v8i64(<8 x i64> %op1, <8 x i64> %op2) + store <8 x i64> %res, <8 x i64>* %a + ret void +} + +define void @umax_v16i64(<16 x i64>* %a, <16 x i64>* %b) #0 { +; CHECK-LABEL: umax_v16i64: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16 +; VBITS_GE_1024-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_1024-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1] +; VBITS_GE_1024-NEXT: umax [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d +; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; VBITS_GE_1024-NEXT: ret + %op1 = load <16 x i64>, <16 x i64>* %a + %op2 = load <16 x i64>, <16 x i64>* %b + %res = call <16 x i64> @llvm.umax.v16i64(<16 x i64> %op1, <16 x i64> %op2) + store <16 x i64> %res, <16 x i64>* %a + ret void +} + +define void @umax_v32i64(<32 x i64>* %a, <32 x i64>* %b) #0 { +; CHECK-LABEL: umax_v32i64: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32 +; VBITS_GE_2048-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_2048-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1] +; VBITS_GE_2048-NEXT: umax [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d +; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; VBITS_GE_2048-NEXT: ret + %op1 = load <32 x i64>, <32 x i64>* %a + %op2 = load <32 x i64>, <32 x i64>* %b + %res = call <32 x i64> @llvm.umax.v32i64(<32 x i64> %op1, <32 x i64> %op2) + store <32 x i64> %res, <32 x i64>* %a + ret void +} + +; +; UMIN +; + +; Don't use SVE for 64-bit vectors. +define <8 x i8> @umin_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 { +; CHECK-LABEL: umin_v8i8: +; CHECK: umin v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ret + %res = call <8 x i8> @llvm.umin.v8i8(<8 x i8> %op1, <8 x i8> %op2) + ret <8 x i8> %res +} + +; Don't use SVE for 128-bit vectors. +define <16 x i8> @umin_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 { +; CHECK-LABEL: umin_v16i8: +; CHECK: umin v0.16b, v0.16b, v1.16b +; CHECK: ret + %res = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %op1, <16 x i8> %op2) + ret <16 x i8> %res +} + +define void @umin_v32i8(<32 x i8>* %a, <32 x i8>* %b) #0 { +; CHECK-LABEL: umin_v32i8: +; CHECK: ptrue [[PG:p[0-9]+]].b, vl32 +; CHECK-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0] +; CHECK-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1] +; CHECK-NEXT: umin [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b, [[OP2]].b +; CHECK-NEXT: st1b { [[RES]].b }, [[PG]], [x0] +; CHECK-NEXT: ret + %op1 = load <32 x i8>, <32 x i8>* %a + %op2 = load <32 x i8>, <32 x i8>* %b + %res = call <32 x i8> @llvm.umin.v32i8(<32 x i8> %op1, <32 x i8> %op2) + store <32 x i8> %res, <32 x i8>* %a + ret void +} + +define void @umin_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 { +; CHECK-LABEL: umin_v64i8: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].b, vl64 +; VBITS_GE_512-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0] +; VBITS_GE_512-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1] +; VBITS_GE_512-NEXT: umin [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b, [[OP2]].b +; VBITS_GE_512-NEXT: st1b { [[RES]].b }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret +; +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32 +; VBITS_EQ_256-DAG: mov w[[A:[0-9]+]], #32 +; VBITS_EQ_256-DAG: ld1b { [[OP1_LO:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A]]] +; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1b { [[OP2_LO:z[0-9]+]].b }, [[PG]]/z, [x1, x[[A]]] +; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1] +; VBITS_EQ_256-DAG: umin [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[OP1_LO]].b, [[OP2_LO]].b +; VBITS_EQ_256-DAG: umin [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[OP1_HI]].b, [[OP2_HI]].b +; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0, x[[A]]] +; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0] + %op1 = load <64 x i8>, <64 x i8>* %a + %op2 = load <64 x i8>, <64 x i8>* %b + %res = call <64 x i8> @llvm.umin.v64i8(<64 x i8> %op1, <64 x i8> %op2) + store <64 x i8> %res, <64 x i8>* %a + ret void +} + +define void @umin_v128i8(<128 x i8>* %a, <128 x i8>* %b) #0 { +; CHECK-LABEL: umin_v128i8: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].b, vl128 +; VBITS_GE_1024-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0] +; VBITS_GE_1024-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1] +; VBITS_GE_1024-NEXT: umin [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b, [[OP2]].b +; VBITS_GE_1024-NEXT: st1b { [[RES]].b }, [[PG]], [x0] +; VBITS_GE_1024-NEXT: ret + %op1 = load <128 x i8>, <128 x i8>* %a + %op2 = load <128 x i8>, <128 x i8>* %b + %res = call <128 x i8> @llvm.umin.v128i8(<128 x i8> %op1, <128 x i8> %op2) + store <128 x i8> %res, <128 x i8>* %a + ret void +} + +define void @umin_v256i8(<256 x i8>* %a, <256 x i8>* %b) #0 { +; CHECK-LABEL: umin_v256i8: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].b, vl256 +; VBITS_GE_2048-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0] +; VBITS_GE_2048-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1] +; VBITS_GE_2048-NEXT: umin [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b, [[OP2]].b +; VBITS_GE_2048-NEXT: st1b { [[RES]].b }, [[PG]], [x0] +; VBITS_GE_2048-NEXT: ret + %op1 = load <256 x i8>, <256 x i8>* %a + %op2 = load <256 x i8>, <256 x i8>* %b + %res = call <256 x i8> @llvm.umin.v256i8(<256 x i8> %op1, <256 x i8> %op2) + store <256 x i8> %res, <256 x i8>* %a + ret void +} + +; Don't use SVE for 64-bit vectors. +define <4 x i16> @umin_v4i16(<4 x i16> %op1, <4 x i16> %op2) #0 { +; CHECK-LABEL: umin_v4i16: +; CHECK: umin v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ret + %res = call <4 x i16> @llvm.umin.v4i16(<4 x i16> %op1, <4 x i16> %op2) + ret <4 x i16> %res +} + +; Don't use SVE for 128-bit vectors. +define <8 x i16> @umin_v8i16(<8 x i16> %op1, <8 x i16> %op2) #0 { +; CHECK-LABEL: umin_v8i16: +; CHECK: umin v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret + %res = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %op1, <8 x i16> %op2) + ret <8 x i16> %res +} + +define void @umin_v16i16(<16 x i16>* %a, <16 x i16>* %b) #0 { +; CHECK-LABEL: umin_v16i16: +; CHECK: ptrue [[PG:p[0-9]+]].h, vl16 +; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0] +; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1] +; CHECK-NEXT: umin [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h +; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; CHECK-NEXT: ret + %op1 = load <16 x i16>, <16 x i16>* %a + %op2 = load <16 x i16>, <16 x i16>* %b + %res = call <16 x i16> @llvm.umin.v16i16(<16 x i16> %op1, <16 x i16> %op2) + store <16 x i16> %res, <16 x i16>* %a + ret void +} + +define void @umin_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 { +; CHECK-LABEL: umin_v32i16: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32 +; VBITS_GE_512-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_512-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1] +; VBITS_GE_512-NEXT: umin [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h +; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1] +; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: umin [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP1_LO]].h, [[OP2_LO]].h +; VBITS_EQ_256-DAG: umin [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP1_HI]].h, [[OP2_HI]].h +; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-NEXT: ret + %op1 = load <32 x i16>, <32 x i16>* %a + %op2 = load <32 x i16>, <32 x i16>* %b + %res = call <32 x i16> @llvm.umin.v32i16(<32 x i16> %op1, <32 x i16> %op2) + store <32 x i16> %res, <32 x i16>* %a + ret void +} + +define void @umin_v64i16(<64 x i16>* %a, <64 x i16>* %b) #0 { +; CHECK-LABEL: umin_v64i16: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64 +; VBITS_GE_1024-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_1024-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1] +; VBITS_GE_1024-NEXT: umin [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h +; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; VBITS_GE_1024-NEXT: ret + %op1 = load <64 x i16>, <64 x i16>* %a + %op2 = load <64 x i16>, <64 x i16>* %b + %res = call <64 x i16> @llvm.umin.v64i16(<64 x i16> %op1, <64 x i16> %op2) + store <64 x i16> %res, <64 x i16>* %a + ret void +} + +define void @umin_v128i16(<128 x i16>* %a, <128 x i16>* %b) #0 { +; CHECK-LABEL: umin_v128i16: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128 +; VBITS_GE_2048-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_GE_2048-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1] +; VBITS_GE_2048-NEXT: umin [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h +; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0] +; VBITS_GE_2048-NEXT: ret + %op1 = load <128 x i16>, <128 x i16>* %a + %op2 = load <128 x i16>, <128 x i16>* %b + %res = call <128 x i16> @llvm.umin.v128i16(<128 x i16> %op1, <128 x i16> %op2) + store <128 x i16> %res, <128 x i16>* %a + ret void +} + +; Don't use SVE for 64-bit vectors. +define <2 x i32> @umin_v2i32(<2 x i32> %op1, <2 x i32> %op2) #0 { +; CHECK-LABEL: umin_v2i32: +; CHECK: umin v0.2s, v0.2s, v1.2s +; CHECK-NEXT: ret + %res = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %op1, <2 x i32> %op2) + ret <2 x i32> %res +} + +; Don't use SVE for 128-bit vectors. +define <4 x i32> @umin_v4i32(<4 x i32> %op1, <4 x i32> %op2) #0 { +; CHECK-LABEL: umin_v4i32: +; CHECK: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %res = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %op1, <4 x i32> %op2) + ret <4 x i32> %res +} + +define void @umin_v8i32(<8 x i32>* %a, <8 x i32>* %b) #0 { +; CHECK-LABEL: umin_v8i32: +; CHECK: ptrue [[PG:p[0-9]+]].s, vl8 +; CHECK-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0] +; CHECK-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1] +; CHECK-NEXT: umin [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s +; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; CHECK-NEXT: ret + %op1 = load <8 x i32>, <8 x i32>* %a + %op2 = load <8 x i32>, <8 x i32>* %b + %res = call <8 x i32> @llvm.umin.v8i32(<8 x i32> %op1, <8 x i32> %op2) + store <8 x i32> %res, <8 x i32>* %a + ret void +} + +define void @umin_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 { +; CHECK-LABEL: umin_v16i32: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16 +; VBITS_GE_512-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_512-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1] +; VBITS_GE_512-NEXT: umin [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s +; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1] +; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: umin [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP1_LO]].s, [[OP2_LO]].s +; VBITS_EQ_256-DAG: umin [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP1_HI]].s, [[OP2_HI]].s +; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-NEXT: ret + %op1 = load <16 x i32>, <16 x i32>* %a + %op2 = load <16 x i32>, <16 x i32>* %b + %res = call <16 x i32> @llvm.umin.v16i32(<16 x i32> %op1, <16 x i32> %op2) + store <16 x i32> %res, <16 x i32>* %a + ret void +} + +define void @umin_v32i32(<32 x i32>* %a, <32 x i32>* %b) #0 { +; CHECK-LABEL: umin_v32i32: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32 +; VBITS_GE_1024-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_1024-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1] +; VBITS_GE_1024-NEXT: umin [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s +; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; VBITS_GE_1024-NEXT: ret + %op1 = load <32 x i32>, <32 x i32>* %a + %op2 = load <32 x i32>, <32 x i32>* %b + %res = call <32 x i32> @llvm.umin.v32i32(<32 x i32> %op1, <32 x i32> %op2) + store <32 x i32> %res, <32 x i32>* %a + ret void +} + +define void @umin_v64i32(<64 x i32>* %a, <64 x i32>* %b) #0 { +; CHECK-LABEL: umin_v64i32: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64 +; VBITS_GE_2048-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_GE_2048-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1] +; VBITS_GE_2048-NEXT: umin [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s +; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0] +; VBITS_GE_2048-NEXT: ret + %op1 = load <64 x i32>, <64 x i32>* %a + %op2 = load <64 x i32>, <64 x i32>* %b + %res = call <64 x i32> @llvm.umin.v64i32(<64 x i32> %op1, <64 x i32> %op2) + store <64 x i32> %res, <64 x i32>* %a + ret void +} + +; Vector i64 min are not legal for NEON so use SVE when available. +define <1 x i64> @umin_v1i64(<1 x i64> %op1, <1 x i64> %op2) #0 { +; CHECK-LABEL: umin_v1i64: +; CHECK: ptrue [[PG:p[0-9]+]].d, vl1 +; CHECK-NEXT: umin z0.d, [[PG]]/m, z0.d, z1.d +; CHECK-NEXT: ret + %res = call <1 x i64> @llvm.umin.v1i64(<1 x i64> %op1, <1 x i64> %op2) + ret <1 x i64> %res +} + +; Vector i64 min are not legal for NEON so use SVE when available. +define <2 x i64> @umin_v2i64(<2 x i64> %op1, <2 x i64> %op2) #0 { +; CHECK-LABEL: umin_v2i64: +; CHECK: ptrue [[PG:p[0-9]+]].d, vl2 +; CHECK-NEXT: umin z0.d, [[PG]]/m, z0.d, z1.d +; CHECK-NEXT: ret + %res = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %op1, <2 x i64> %op2) + ret <2 x i64> %res +} + +define void @umin_v4i64(<4 x i64>* %a, <4 x i64>* %b) #0 { +; CHECK-LABEL: umin_v4i64: +; CHECK: ptrue [[PG:p[0-9]+]].d, vl4 +; CHECK-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0] +; CHECK-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1] +; CHECK-NEXT: umin [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d +; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; CHECK-NEXT: ret + %op1 = load <4 x i64>, <4 x i64>* %a + %op2 = load <4 x i64>, <4 x i64>* %b + %res = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %op1, <4 x i64> %op2) + store <4 x i64> %res, <4 x i64>* %a + ret void +} + +define void @umin_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 { +; CHECK-LABEL: umin_v8i64: +; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8 +; VBITS_GE_512-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_512-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1] +; VBITS_GE_512-NEXT: umin [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d +; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 +; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1] +; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: umin [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP1_LO]].d, [[OP2_LO]].d +; VBITS_EQ_256-DAG: umin [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP1_HI]].d, [[OP2_HI]].d +; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-NEXT: ret + %op1 = load <8 x i64>, <8 x i64>* %a + %op2 = load <8 x i64>, <8 x i64>* %b + %res = call <8 x i64> @llvm.umin.v8i64(<8 x i64> %op1, <8 x i64> %op2) + store <8 x i64> %res, <8 x i64>* %a + ret void +} + +define void @umin_v16i64(<16 x i64>* %a, <16 x i64>* %b) #0 { +; CHECK-LABEL: umin_v16i64: +; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16 +; VBITS_GE_1024-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_1024-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1] +; VBITS_GE_1024-NEXT: umin [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d +; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; VBITS_GE_1024-NEXT: ret + %op1 = load <16 x i64>, <16 x i64>* %a + %op2 = load <16 x i64>, <16 x i64>* %b + %res = call <16 x i64> @llvm.umin.v16i64(<16 x i64> %op1, <16 x i64> %op2) + store <16 x i64> %res, <16 x i64>* %a + ret void +} + +define void @umin_v32i64(<32 x i64>* %a, <32 x i64>* %b) #0 { +; CHECK-LABEL: umin_v32i64: +; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32 +; VBITS_GE_2048-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_GE_2048-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1] +; VBITS_GE_2048-NEXT: umin [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d +; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0] +; VBITS_GE_2048-NEXT: ret + %op1 = load <32 x i64>, <32 x i64>* %a + %op2 = load <32 x i64>, <32 x i64>* %b + %res = call <32 x i64> @llvm.umin.v32i64(<32 x i64> %op1, <32 x i64> %op2) + store <32 x i64> %res, <32 x i64>* %a + ret void +} + attributes #0 = { "target-features"="+sve" } declare <8 x i8> @llvm.smin.v8i8(<8 x i8>, <8 x i8>) @@ -816,3 +1561,54 @@ declare <4 x i64> @llvm.smax.v4i64(<4 x i64>, <4 x i64>) declare <8 x i64> @llvm.smax.v8i64(<8 x i64>, <8 x i64>) declare <16 x i64> @llvm.smax.v16i64(<16 x i64>, <16 x i64>) declare <32 x i64> @llvm.smax.v32i64(<32 x i64>, <32 x i64>) + +declare <8 x i8> @llvm.umin.v8i8(<8 x i8>, <8 x i8>) +declare <16 x i8> @llvm.umin.v16i8(<16 x i8>, <16 x i8>) +declare <32 x i8> @llvm.umin.v32i8(<32 x i8>, <32 x i8>) +declare <64 x i8> @llvm.umin.v64i8(<64 x i8>, <64 x i8>) +declare <128 x i8> @llvm.umin.v128i8(<128 x i8>, <128 x i8>) +declare <256 x i8> @llvm.umin.v256i8(<256 x i8>, <256 x i8>) +declare <4 x i16> @llvm.umin.v4i16(<4 x i16>, <4 x i16>) +declare <8 x i16> @llvm.umin.v8i16(<8 x i16>, <8 x i16>) +declare <16 x i16> @llvm.umin.v16i16(<16 x i16>, <16 x i16>) +declare <32 x i16> @llvm.umin.v32i16(<32 x i16>, <32 x i16>) +declare <64 x i16> @llvm.umin.v64i16(<64 x i16>, <64 x i16>) +declare <128 x i16> @llvm.umin.v128i16(<128 x i16>, <128 x i16>) +declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>) +declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>) +declare <8 x i32> @llvm.umin.v8i32(<8 x i32>, <8 x i32>) +declare <16 x i32> @llvm.umin.v16i32(<16 x i32>, <16 x i32>) +declare <32 x i32> @llvm.umin.v32i32(<32 x i32>, <32 x i32>) +declare <64 x i32> @llvm.umin.v64i32(<64 x i32>, <64 x i32>) +declare <1 x i64> @llvm.umin.v1i64(<1 x i64>, <1 x i64>) +declare <2 x i64> @llvm.umin.v2i64(<2 x i64>, <2 x i64>) +declare <4 x i64> @llvm.umin.v4i64(<4 x i64>, <4 x i64>) +declare <8 x i64> @llvm.umin.v8i64(<8 x i64>, <8 x i64>) +declare <16 x i64> @llvm.umin.v16i64(<16 x i64>, <16 x i64>) +declare <32 x i64> @llvm.umin.v32i64(<32 x i64>, <32 x i64>) + +declare <8 x i8> @llvm.umax.v8i8(<8 x i8>, <8 x i8>) +declare <16 x i8> @llvm.umax.v16i8(<16 x i8>, <16 x i8>) +declare <32 x i8> @llvm.umax.v32i8(<32 x i8>, <32 x i8>) +declare <64 x i8> @llvm.umax.v64i8(<64 x i8>, <64 x i8>) +declare <128 x i8> @llvm.umax.v128i8(<128 x i8>, <128 x i8>) +declare <256 x i8> @llvm.umax.v256i8(<256 x i8>, <256 x i8>) +declare <4 x i16> @llvm.umax.v4i16(<4 x i16>, <4 x i16>) +declare <8 x i16> @llvm.umax.v8i16(<8 x i16>, <8 x i16>) +declare <16 x i16> @llvm.umax.v16i16(<16 x i16>, <16 x i16>) +declare <32 x i16> @llvm.umax.v32i16(<32 x i16>, <32 x i16>) +declare <64 x i16> @llvm.umax.v64i16(<64 x i16>, <64 x i16>) +declare <128 x i16> @llvm.umax.v128i16(<128 x i16>, <128 x i16>) +declare <2 x i32> @llvm.umax.v2i32(<2 x i32>, <2 x i32>) +declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>) +declare <8 x i32> @llvm.umax.v8i32(<8 x i32>, <8 x i32>) +declare <16 x i32> @llvm.umax.v16i32(<16 x i32>, <16 x i32>) +declare <32 x i32> @llvm.umax.v32i32(<32 x i32>, <32 x i32>) +declare <64 x i32> @llvm.umax.v64i32(<64 x i32>, <64 x i32>) +declare <1 x i64> @llvm.umax.v1i64(<1 x i64>, <1 x i64>) +declare <2 x i64> @llvm.umax.v2i64(<2 x i64>, <2 x i64>) +declare <4 x i64> @llvm.umax.v4i64(<4 x i64>, <4 x i64>) +declare <8 x i64> @llvm.umax.v8i64(<8 x i64>, <8 x i64>) +declare <16 x i64> @llvm.umax.v16i64(<16 x i64>, <16 x i64>) +declare <32 x i64> @llvm.umax.v32i64(<32 x i64>, <32 x i64>) +