[AArch64] Change how we do bit computations for bfi #142646

AZero13 · 2025-06-03T17:43:59Z

Instead of relying on known 0s, we can instead focus on what we do know about the final result, and bxfil that.

llvmbot · 2025-06-03T17:44:37Z

@llvm/pr-subscribers-backend-aarch64

Author: AZero13 (AZero13)

Changes

Instead of relying on known 0s, we can instead focus on what we do know about the final result, and bxfil that.

Full diff: https://github.com/llvm/llvm-project/pull/142646.diff

2 Files Affected:

(modified) llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp (+40-18)
(modified) llvm/test/CodeGen/AArch64/bitfield-insert.ll (+29)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 34f6db9374cb5..b52f2c03f5503 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -3316,28 +3316,50 @@ static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) {
       !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
     return false;
 
-  // Compute the Known Zero for the AND as this allows us to catch more general
-  // cases than just looking for AND with imm.
-  KnownBits Known = CurDAG->computeKnownBits(And);
+  // In the event that computeKnownBits gives us more bits than we bargained
+  // for, and cause the the result to not be a shifted mask, we
+  // should just focus on the maskImm
 
-  // Non-zero in the sense that they're not provably zero, which is the key
-  // point if we want to use this value.
-  uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
+  // We can optimize any chain of bitwise operations that results in a series of
+  // contiguous bits being set if we know the end result will have those bits
+  // set.
 
-  // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
-  if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
-    return false;
+  KnownBits Known = CurDAG->computeKnownBits(And.getOperand(0));
+  uint64_t OldOne = Known.One.getZExtValue();
+  uint64_t OldZero = Known.Zero.getZExtValue();
 
-  // The bits being inserted must only set those bits that are known to be zero.
-  if ((OrImm & NotKnownZero) != 0) {
-    // FIXME:  It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
-    // currently handle this case.
-    return false;
+  Known &= KnownBits::makeConstant(APInt(BitWidth, MaskImm));
+  Known |= KnownBits::makeConstant(APInt(BitWidth, OrImm));
+
+  // Now we need to see if any KnownBits changed
+  uint64_t NewOneVal = Known.One.getZExtValue();
+  uint64_t NewZeroVal = Known.Zero.getZExtValue();
+
+  uint64_t ChangedBits = (NewOneVal ^ OldOne) | (NewZeroVal ^ OldZero);
+
+  // Now ensure that all changed bits are still in known territory.
+  // 6. Ensure all changed bits are still in known territory.
+
+  // 7. Find smallest bitfield insert that encompasses all changed bits and any
+  // known bits between them.
+  if (ChangedBits == 0) {
+    // No change? Then we can just replace the node with the 1st AND operand.
+    CurDAG->ReplaceAllUsesWith(SDValue(N, 0), And->getOperand(0));
+    return true;
   }
 
-  // BFI/BFXIL dst, src, #lsb, #width.
-  int LSB = llvm::countr_one(NotKnownZero);
-  int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
+  // 4) Find your span:
+  APInt ChangedBitsAPInt = APInt(BitWidth, ChangedBits);
+  int LSB = ChangedBitsAPInt.countr_zero();
+  int MSB = BitWidth - ChangedBitsAPInt.countl_zero();
+  int Width = MSB - LSB;
+  uint64_t SpanMask = ((1ULL << Width) - 1) << LSB;
+
+  // 5) Bail if *any* bit in that run is still unknown:
+  uint64_t Unknown = ~(NewOneVal | NewZeroVal);
+  if ((Unknown & SpanMask) != 0)
+    return false;
+
 
   // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
   unsigned ImmR = (BitWidth - LSB) % BitWidth;
@@ -3348,7 +3370,7 @@ static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) {
   // ORR.  A BFXIL will use the same constant as the original ORR, so the code
   // should be no worse in this case.
   bool IsBFI = LSB != 0;
-  uint64_t BFIImm = OrImm >> LSB;
+  uint64_t BFIImm = (NewOneVal & SpanMask) >> LSB;
   if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
     // We have a BFI instruction and we know the constant can't be materialized
     // with a ORR-immediate with the zero register.
diff --git a/llvm/test/CodeGen/AArch64/bitfield-insert.ll b/llvm/test/CodeGen/AArch64/bitfield-insert.ll
index eefb862c5313c..556c7ecde792f 100644
--- a/llvm/test/CodeGen/AArch64/bitfield-insert.ll
+++ b/llvm/test/CodeGen/AArch64/bitfield-insert.ll
@@ -753,3 +753,32 @@ entry:
   store i16 %or, ptr %p
   ret i16 %and1
 }
+
+define i32 @test1_known_not_zero(i32 %a) {
+; CHECK-LABEL: test1_known_not_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #12288 // =0x3000
+; CHECK-NEXT:    and w9, w0, #0xfffffffe
+; CHECK-NEXT:    movk w8, #7, lsl #16
+; CHECK-NEXT:    and w9, w9, #0xfff00fff
+; CHECK-NEXT:    orr w0, w9, w8
+; CHECK-NEXT:    ret
+  %and1 = and i32 %a, -1044482
+  %or = or disjoint i32 %and1, 471040
+  ret i32 %or
+}
+
+define i32 @hole_test(i32 %a) {
+; CHECK-LABEL: hole_test:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mul w9, w0, w0
+; CHECK-NEXT:    mov w8, #65533 // =0xfffd
+; CHECK-NEXT:    movk w8, #65525, lsl #16
+; CHECK-NEXT:    orr w9, w9, #0xff0000
+; CHECK-NEXT:    and w0, w9, w8
+; CHECK-NEXT:    ret
+  %2 = mul i32 %a, %a
+  %3 = and i32 %2, -4128771
+  %4 = or i32 %3, 16056320
+  ret i32 %4
+}

github-actions · 2025-06-03T17:48:10Z

✅ With the latest revision this PR passed the C/C++ code formatter.

Instead of relying on known 0s, we can instead focus on what we do know about the final result, and bxfil that.

AZero13 force-pushed the bxfil branch from b952c7e to ae267f0 Compare June 3, 2025 17:44

llvmbot added the backend:AArch64 label Jun 3, 2025

AZero13 force-pushed the bxfil branch from ae267f0 to 5c9dc1d Compare June 3, 2025 17:49

AZero13 added 2 commits June 4, 2025 10:48

Test known zero with regressions

b10979e

[AArch64] Change how we do bit computations for bfi

03387c9

Instead of relying on known 0s, we can instead focus on what we do know about the final result, and bxfil that.

AZero13 force-pushed the bxfil branch from 5c9dc1d to 03387c9 Compare June 4, 2025 14:48

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AArch64] Change how we do bit computations for bfi #142646

[AArch64] Change how we do bit computations for bfi #142646

Uh oh!

AZero13 commented Jun 3, 2025

Uh oh!

llvmbot commented Jun 3, 2025

Uh oh!

github-actions bot commented Jun 3, 2025 •

edited

Loading

Uh oh!

Uh oh!

[AArch64] Change how we do bit computations for bfi #142646

Are you sure you want to change the base?

[AArch64] Change how we do bit computations for bfi #142646

Uh oh!

Conversation

AZero13 commented Jun 3, 2025

Uh oh!

llvmbot commented Jun 3, 2025

Uh oh!

github-actions bot commented Jun 3, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

github-actions bot commented Jun 3, 2025 •

edited

Loading