From 7b0612088289e4f57e089a792c5e6d1123a2b6e3 Mon Sep 17 00:00:00 2001 From: Jon Roelofs Date: Thu, 27 May 2021 15:11:55 -0700 Subject: [PATCH] [AArch64][GISel] and+or+shl => bfi This fixes a GISEL vs SDAG regression that showed up at -Os in 256.bzip2 In `_getAndMoveToFrontDecode`: gisel: ``` and w9, w0, #0xff orr w9, w9, w8, lsl #8 ``` sdag: ``` bfi w0, w8, #8, #24 ``` Differential revision: https://reviews.llvm.org/D103291 --- .../GISel/AArch64InstructionSelector.cpp | 34 ++++ .../GlobalISel/select-bitfield-insert.ll | 157 ++++++++++++++++++ 2 files changed, 191 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/select-bitfield-insert.ll diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index d989064cf6901..76eb404fa63cf 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -2163,6 +2163,40 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) { I.eraseFromParent(); return true; } + case TargetOpcode::G_OR: { + // Look for operations that take the lower `Width=Size-ShiftImm` bits of + // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via + // shifting and masking that we can replace with a BFI (encoded as a BFM). + Register Dst = I.getOperand(0).getReg(); + LLT Ty = MRI.getType(Dst); + + if (!Ty.isScalar()) + return false; + + unsigned Size = Ty.getSizeInBits(); + if (Size != 32 && Size != 64) + return false; + + Register ShiftSrc; + int64_t ShiftImm; + Register MaskSrc; + int64_t MaskImm; + if (!mi_match( + Dst, MRI, + m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))), + m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm)))))) + return false; + + if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm)) + return false; + + int64_t Immr = Size - ShiftImm; + int64_t Imms = Size - ShiftImm - 1; + unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri; + emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB); + I.eraseFromParent(); + return true; + } default: return false; } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-bitfield-insert.ll b/llvm/test/CodeGen/AArch64/GlobalISel/select-bitfield-insert.ll new file mode 100644 index 0000000000000..d7ba81a89d012 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-bitfield-insert.ll @@ -0,0 +1,157 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -o - -verify-machineinstrs -global-isel=1 -global-isel-abort=1 | FileCheck %s --check-prefixes=CHECK,GISEL +; RUN: llc < %s -o - -verify-machineinstrs -global-isel=0 | FileCheck %s --check-prefixes=CHECK,SDAG + +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-ios14.5.0" + +define i32 @bfi_w_31(i32 %in1, i32 %in2) { +; CHECK-LABEL: bfi_w_31: +; CHECK: ; %bb.0: ; %bb +; CHECK-NEXT: bfi w1, w0, #31, #1 +; CHECK-NEXT: mov w0, w1 +; CHECK-NEXT: ret +bb: + %tmp3 = shl i32 %in1, 31 + %tmp4 = and i32 %in2, 2147483647 + %out = or i32 %tmp3, %tmp4 + ret i32 %out +} + +define i32 @bfi_w_8(i32 %in1, i32 %in2) { +; CHECK-LABEL: bfi_w_8: +; CHECK: ; %bb.0: ; %bb +; CHECK-NEXT: bfi w1, w0, #8, #24 +; CHECK-NEXT: mov w0, w1 +; CHECK-NEXT: ret +bb: + %tmp3 = shl i32 %in1, 8 + %tmp4 = and i32 %in2, 255 + %out = or i32 %tmp3, %tmp4 + ret i32 %out +} + +define i32 @bfi_w_1(i32 %in1, i32 %in2) { +; CHECK-LABEL: bfi_w_1: +; CHECK: ; %bb.0: ; %bb +; CHECK-NEXT: bfi w1, w0, #1, #31 +; CHECK-NEXT: mov w0, w1 +; CHECK-NEXT: ret +bb: + %tmp3 = shl i32 %in1, 1 + %tmp4 = and i32 %in2, 1 + %out = or i32 %tmp3, %tmp4 + ret i32 %out +} + +define i64 @bfi_x_63(i64 %in1, i64 %in2) { +; CHECK-LABEL: bfi_x_63: +; CHECK: ; %bb.0: ; %bb +; CHECK-NEXT: bfi x1, x0, #63, #1 +; CHECK-NEXT: mov x0, x1 +; CHECK-NEXT: ret +bb: + %tmp3 = shl i64 %in1, 63 + %tmp4 = and i64 %in2, 9223372036854775807 + %out = or i64 %tmp3, %tmp4 + ret i64 %out +} + +define i64 @bfi_x_31(i64 %in1, i64 %in2) { +; CHECK-LABEL: bfi_x_31: +; CHECK: ; %bb.0: ; %bb +; CHECK-NEXT: bfi x1, x0, #31, #33 +; CHECK-NEXT: mov x0, x1 +; CHECK-NEXT: ret +bb: + %tmp3 = shl i64 %in1, 31 + %tmp4 = and i64 %in2, 2147483647 + %out = or i64 %tmp3, %tmp4 + ret i64 %out +} + +define i64 @bfi_x_8(i64 %in1, i64 %in2) { +; CHECK-LABEL: bfi_x_8: +; CHECK: ; %bb.0: ; %bb +; CHECK-NEXT: bfi x1, x0, #8, #56 +; CHECK-NEXT: mov x0, x1 +; CHECK-NEXT: ret +bb: + %tmp3 = shl i64 %in1, 8 + %tmp4 = and i64 %in2, 255 + %out = or i64 %tmp3, %tmp4 + ret i64 %out +} + +define i64 @bfi_x_1(i64 %in1, i64 %in2) { +; CHECK-LABEL: bfi_x_1: +; CHECK: ; %bb.0: ; %bb +; CHECK-NEXT: bfi x1, x0, #1, #63 +; CHECK-NEXT: mov x0, x1 +; CHECK-NEXT: ret +bb: + %tmp3 = shl i64 %in1, 1 + %tmp4 = and i64 %in2, 1 + %out = or i64 %tmp3, %tmp4 + ret i64 %out +} + +define i64 @bfi_x_1_swapped(i64 %in1, i64 %in2) { +; CHECK-LABEL: bfi_x_1_swapped: +; CHECK: ; %bb.0: ; %bb +; CHECK-NEXT: bfi x1, x0, #1, #63 +; CHECK-NEXT: mov x0, x1 +; CHECK-NEXT: ret +bb: + %tmp3 = shl i64 %in1, 1 + %tmp4 = and i64 %in2, 1 + %out = or i64 %tmp4, %tmp3 + ret i64 %out +} + +define i64 @extra_use1(i64 %in1, i64 %in2, i64* %p) { +; GISEL-LABEL: extra_use1: +; GISEL: ; %bb.0: ; %bb +; GISEL-NEXT: lsl x8, x0, #1 +; GISEL-NEXT: and x9, x1, #0x1 +; GISEL-NEXT: orr x0, x8, x9 +; GISEL-NEXT: str x8, [x2] +; GISEL-NEXT: ret +; +; SDAG-LABEL: extra_use1: +; SDAG: ; %bb.0: ; %bb +; SDAG-NEXT: bfi x1, x0, #1, #63 +; SDAG-NEXT: lsl x8, x0, #1 +; SDAG-NEXT: mov x0, x1 +; SDAG-NEXT: str x8, [x2] +; SDAG-NEXT: ret +bb: + %tmp3 = shl i64 %in1, 1 + %tmp4 = and i64 %in2, 1 + %out = or i64 %tmp3, %tmp4 + store i64 %tmp3, i64* %p + ret i64 %out +} + +define i64 @extra_use2(i64 %in1, i64 %in2, i64* %p) { +; GISEL-LABEL: extra_use2: +; GISEL: ; %bb.0: ; %bb +; GISEL-NEXT: and x8, x1, #0x1 +; GISEL-NEXT: orr x0, x8, x0, lsl #1 +; GISEL-NEXT: str x8, [x2] +; GISEL-NEXT: ret +; +; SDAG-LABEL: extra_use2: +; SDAG: ; %bb.0: ; %bb +; SDAG-NEXT: and x8, x1, #0x1 +; SDAG-NEXT: bfi x1, x0, #1, #63 +; SDAG-NEXT: mov x0, x1 +; SDAG-NEXT: str x8, [x2] +; SDAG-NEXT: ret +bb: + %tmp3 = shl i64 %in1, 1 + %tmp4 = and i64 %in2, 1 + %out = or i64 %tmp3, %tmp4 + store i64 %tmp4, i64* %p + ret i64 %out +}