From 95479a57be7251a6fc2f5cfe3820f210ec6c1e35 Mon Sep 17 00:00:00 2001 From: hanbeom Date: Tue, 23 Apr 2024 02:41:51 +0900 Subject: [PATCH] [AArch64] prevent (shl (srl x, c1), c2) -> (and (shift x, c3)) when load Currently, process of replacing bitwise operations consisting of `(shl (srl x, c1), c2)` with `And` is performed by `DAGCombiner`. However, in certain case like `(shl (srl, x, c1) 2)` is do not need to transform to `AND` if it was used to `Load` Target. Consider following case: ``` lsr x8, x8, #56 and x8, x8, #0xfc ldr w0, [x2, x8] ret ``` In this case, we can remove the `AND` by changing the target of `LDR` to `[X2, X8, LSL #2]` and right-shifting amount change to 56 to 58. after changed: ``` lsr x8, x8, #58 ldr w0, [x2, x8, lsl #2] ret ``` This patch checks to see if the `(shl (srl x, c1) 2)` operation on `load` target can be prevent transform to `And`. --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 15 +++++++++++++++ llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll | 14 ++++++-------- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index f552f91929201c..3ff7ecc101af9f 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -16918,6 +16918,21 @@ bool AArch64TargetLowering::shouldFoldConstantShiftPairToMask( return (!C1 || !C2 || C1->getZExtValue() >= C2->getZExtValue()); } + // We do not need to fold when this shifting used in specific load case: + // (ldr x, (add x, (shl (srl x, c1) 2))) + if (N->getOpcode() == ISD::SHL && N->hasOneUse()) { + auto C2 = dyn_cast(N->getOperand(1)); + if (C2 && C2->getZExtValue() <= 2) { + auto ShouldADD = *N->use_begin(); + if (ShouldADD && ShouldADD->getOpcode() == ISD::ADD) { + if (auto ShouldLOAD = dyn_cast(*ShouldADD->use_begin())) { + if (isIndexedLoadLegal(ISD::PRE_INC, ShouldLOAD->getMemoryVT())) + return false; + } + } + } + } + return true; } diff --git a/llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll b/llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll index 04c2d4417b1fef..1b625cb41bffb5 100644 --- a/llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll @@ -19,9 +19,8 @@ define i32 @load_shr63(i64 %a, i64 %b, ptr %table) { ; CHECK-LABEL: load_shr63: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mul x8, x1, x0 -; CHECK-NEXT: lsr x8, x8, #61 -; CHECK-NEXT: and x8, x8, #0x4 -; CHECK-NEXT: ldr w0, [x2, x8] +; CHECK-NEXT: lsr x8, x8, #63 +; CHECK-NEXT: ldr w0, [x2, x8, lsl #2] ; CHECK-NEXT: ret entry: %mul = mul i64 %b, %a @@ -35,8 +34,8 @@ define i32 @load_shr2(i64 %a, i64 %b, ptr %table) { ; CHECK-LABEL: load_shr2: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mul x8, x1, x0 -; CHECK-NEXT: and x8, x8, #0xfffffffffffffffc -; CHECK-NEXT: ldr w0, [x2, x8] +; CHECK-NEXT: lsr x8, x8, #2 +; CHECK-NEXT: ldr w0, [x2, x8, lsl #2] ; CHECK-NEXT: ret entry: %mul = mul i64 %b, %a @@ -50,9 +49,8 @@ define i32 @load_shr1(i64 %a, i64 %b, ptr %table) { ; CHECK-LABEL: load_shr1: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mul x8, x1, x0 -; CHECK-NEXT: lsl x8, x8, #1 -; CHECK-NEXT: and x8, x8, #0xfffffffffffffffc -; CHECK-NEXT: ldr w0, [x2, x8] +; CHECK-NEXT: lsr x8, x8, #1 +; CHECK-NEXT: ldr w0, [x2, x8, lsl #2] ; CHECK-NEXT: ret entry: %mul = mul i64 %b, %a