Skip to content

Commit f4615fe

Browse files
committed
Revert "[DAG] Extend SearchForAndLoads with any_extend handling"
This caused builds to fail with llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp:5638: bool (anonymous namespace)::DAGCombiner::BackwardsPropagateMask(llvm::SDNode *): Assertion `NewLoad && "Shouldn't be masking the load if it can't be narrowed"' failed. See the code review for a link to a reproducer. > This extends the code in SearchForAndLoads to be able to look through > ANY_EXTEND nodes, which can be created from mismatching IR types where > the AND node we begin from only demands the low parts of the register. > That turns zext and sext into any_extends as only the low bits are > demanded. To be able to look through ANY_EXTEND nodes we need to handle > mismatching types in a few places, potentially truncating the mask to > the size of the final load. > > Differential Revision: https://reviews.llvm.org/D117457 This reverts commit 5780087.
1 parent 9e68557 commit f4615fe

File tree

4 files changed

+92
-84
lines changed

4 files changed

+92
-84
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 12 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -5491,8 +5491,6 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
54915491

54925492
// Some constants may need fixing up later if they are too large.
54935493
if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5494-
if (Mask->getValueType(0) != C->getValueType(0))
5495-
return false;
54965494
if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
54975495
(Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
54985496
NodesWithConsts.insert(N);
@@ -5526,17 +5524,16 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
55265524
case ISD::AssertZext: {
55275525
unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
55285526
EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5529-
EVT VT = Op.getOpcode() == ISD::AssertZext
5530-
? cast<VTSDNode>(Op.getOperand(1))->getVT()
5531-
: Op.getOperand(0).getValueType();
5527+
EVT VT = Op.getOpcode() == ISD::AssertZext ?
5528+
cast<VTSDNode>(Op.getOperand(1))->getVT() :
5529+
Op.getOperand(0).getValueType();
55325530

55335531
// We can accept extending nodes if the mask is wider or an equal
55345532
// width to the original type.
55355533
if (ExtVT.bitsGE(VT))
55365534
continue;
55375535
break;
55385536
}
5539-
case ISD::ANY_EXTEND:
55405537
case ISD::OR:
55415538
case ISD::XOR:
55425539
case ISD::AND:
@@ -5596,14 +5593,12 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
55965593
// masking.
55975594
if (FixupNode) {
55985595
LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
5599-
SDValue MaskOpT = DAG.getZExtOrTrunc(MaskOp, SDLoc(FixupNode),
5600-
FixupNode->getValueType(0));
5601-
SDValue And =
5602-
DAG.getNode(ISD::AND, SDLoc(FixupNode), FixupNode->getValueType(0),
5603-
SDValue(FixupNode, 0), MaskOpT);
5596+
SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
5597+
FixupNode->getValueType(0),
5598+
SDValue(FixupNode, 0), MaskOp);
56045599
DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
56055600
if (And.getOpcode() == ISD ::AND)
5606-
DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOpT);
5601+
DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
56075602
}
56085603

56095604
// Narrow any constants that need it.
@@ -5612,27 +5607,23 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
56125607
SDValue Op1 = LogicN->getOperand(1);
56135608

56145609
if (isa<ConstantSDNode>(Op0))
5615-
std::swap(Op0, Op1);
5610+
std::swap(Op0, Op1);
56165611

5617-
SDValue MaskOpT =
5618-
DAG.getZExtOrTrunc(MaskOp, SDLoc(Op1), Op1.getValueType());
5619-
SDValue And =
5620-
DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOpT);
5612+
SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
5613+
Op1, MaskOp);
56215614

56225615
DAG.UpdateNodeOperands(LogicN, Op0, And);
56235616
}
56245617

56255618
// Create narrow loads.
56265619
for (auto *Load : Loads) {
56275620
LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
5628-
SDValue MaskOpT =
5629-
DAG.getZExtOrTrunc(MaskOp, SDLoc(Load), Load->getValueType(0));
56305621
SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
5631-
SDValue(Load, 0), MaskOpT);
5622+
SDValue(Load, 0), MaskOp);
56325623
DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
56335624
if (And.getOpcode() == ISD ::AND)
56345625
And = SDValue(
5635-
DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOpT), 0);
5626+
DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
56365627
SDValue NewLoad = reduceLoadWidth(And.getNode());
56375628
assert(NewLoad &&
56385629
"Shouldn't be masking the load if it can't be narrowed");

llvm/test/CodeGen/AArch64/combine-andintoload.ll

Lines changed: 68 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,16 @@
55
define i64 @load32_and16_and(i32* %p, i64 %y) {
66
; CHECK-LABEL: load32_and16_and:
77
; CHECK: // %bb.0:
8-
; CHECK-NEXT: ldrh w8, [x0]
9-
; CHECK-NEXT: and w0, w1, w8
8+
; CHECK-NEXT: ldr w8, [x0]
9+
; CHECK-NEXT: and w8, w1, w8
10+
; CHECK-NEXT: and x0, x8, #0xffff
1011
; CHECK-NEXT: ret
1112
;
1213
; CHECKBE-LABEL: load32_and16_and:
1314
; CHECKBE: // %bb.0:
14-
; CHECKBE-NEXT: ldrh w8, [x0, #2]
15-
; CHECKBE-NEXT: and w0, w1, w8
15+
; CHECKBE-NEXT: ldr w8, [x0]
16+
; CHECKBE-NEXT: and w8, w1, w8
17+
; CHECKBE-NEXT: and x0, x8, #0xffff
1618
; CHECKBE-NEXT: ret
1719
%x = load i32, i32* %p, align 4
1820
%xz = zext i32 %x to i64
@@ -24,14 +26,16 @@ define i64 @load32_and16_and(i32* %p, i64 %y) {
2426
define i64 @load32_and16_andr(i32* %p, i64 %y) {
2527
; CHECK-LABEL: load32_and16_andr:
2628
; CHECK: // %bb.0:
27-
; CHECK-NEXT: ldrh w8, [x0]
28-
; CHECK-NEXT: and w0, w1, w8
29+
; CHECK-NEXT: ldr w8, [x0]
30+
; CHECK-NEXT: and w8, w1, w8
31+
; CHECK-NEXT: and x0, x8, #0xffff
2932
; CHECK-NEXT: ret
3033
;
3134
; CHECKBE-LABEL: load32_and16_andr:
3235
; CHECKBE: // %bb.0:
33-
; CHECKBE-NEXT: ldrh w8, [x0, #2]
34-
; CHECKBE-NEXT: and w0, w1, w8
36+
; CHECKBE-NEXT: ldr w8, [x0]
37+
; CHECKBE-NEXT: and w8, w1, w8
38+
; CHECKBE-NEXT: and x0, x8, #0xffff
3539
; CHECKBE-NEXT: ret
3640
%x = load i32, i32* %p, align 4
3741
%xz = zext i32 %x to i64
@@ -43,14 +47,16 @@ define i64 @load32_and16_andr(i32* %p, i64 %y) {
4347
define i64 @load32_and16_and_sext(i32* %p, i64 %y) {
4448
; CHECK-LABEL: load32_and16_and_sext:
4549
; CHECK: // %bb.0:
46-
; CHECK-NEXT: ldrh w8, [x0]
47-
; CHECK-NEXT: and w0, w1, w8
50+
; CHECK-NEXT: ldr w8, [x0]
51+
; CHECK-NEXT: and w8, w1, w8
52+
; CHECK-NEXT: and x0, x8, #0xffff
4853
; CHECK-NEXT: ret
4954
;
5055
; CHECKBE-LABEL: load32_and16_and_sext:
5156
; CHECKBE: // %bb.0:
52-
; CHECKBE-NEXT: ldrh w8, [x0, #2]
53-
; CHECKBE-NEXT: and w0, w1, w8
57+
; CHECKBE-NEXT: ldr w8, [x0]
58+
; CHECKBE-NEXT: and w8, w1, w8
59+
; CHECKBE-NEXT: and x0, x8, #0xffff
5460
; CHECKBE-NEXT: ret
5561
%x = load i32, i32* %p, align 4
5662
%xz = sext i32 %x to i64
@@ -62,16 +68,16 @@ define i64 @load32_and16_and_sext(i32* %p, i64 %y) {
6268
define i64 @load32_and16_or(i32* %p, i64 %y) {
6369
; CHECK-LABEL: load32_and16_or:
6470
; CHECK: // %bb.0:
65-
; CHECK-NEXT: ldrh w8, [x0]
66-
; CHECK-NEXT: and w9, w1, #0xffff
67-
; CHECK-NEXT: orr w0, w9, w8
71+
; CHECK-NEXT: ldr w8, [x0]
72+
; CHECK-NEXT: orr w8, w1, w8
73+
; CHECK-NEXT: and x0, x8, #0xffff
6874
; CHECK-NEXT: ret
6975
;
7076
; CHECKBE-LABEL: load32_and16_or:
7177
; CHECKBE: // %bb.0:
72-
; CHECKBE-NEXT: ldrh w8, [x0, #2]
73-
; CHECKBE-NEXT: and w9, w1, #0xffff
74-
; CHECKBE-NEXT: orr w0, w9, w8
78+
; CHECKBE-NEXT: ldr w8, [x0]
79+
; CHECKBE-NEXT: orr w8, w1, w8
80+
; CHECKBE-NEXT: and x0, x8, #0xffff
7581
; CHECKBE-NEXT: ret
7682
%x = load i32, i32* %p, align 4
7783
%xz = zext i32 %x to i64
@@ -164,14 +170,16 @@ define i64 @load16_and16(i16* %p, i64 %y) {
164170
define i64 @load16_and8(i16* %p, i64 %y) {
165171
; CHECK-LABEL: load16_and8:
166172
; CHECK: // %bb.0:
167-
; CHECK-NEXT: ldrb w8, [x0]
168-
; CHECK-NEXT: and w0, w1, w8
173+
; CHECK-NEXT: ldrh w8, [x0]
174+
; CHECK-NEXT: and w8, w1, w8
175+
; CHECK-NEXT: and x0, x8, #0xff
169176
; CHECK-NEXT: ret
170177
;
171178
; CHECKBE-LABEL: load16_and8:
172179
; CHECKBE: // %bb.0:
173-
; CHECKBE-NEXT: ldrb w8, [x0, #1]
174-
; CHECKBE-NEXT: and w0, w1, w8
180+
; CHECKBE-NEXT: ldrh w8, [x0]
181+
; CHECKBE-NEXT: and w8, w1, w8
182+
; CHECKBE-NEXT: and x0, x8, #0xff
175183
; CHECKBE-NEXT: ret
176184
%x = load i16, i16* %p, align 4
177185
%xz = zext i16 %x to i64
@@ -224,13 +232,15 @@ define i64 @load8_and16_zext(i8* %p, i8 %y) {
224232
; CHECK-LABEL: load8_and16_zext:
225233
; CHECK: // %bb.0:
226234
; CHECK-NEXT: ldrb w8, [x0]
227-
; CHECK-NEXT: and w0, w1, w8
235+
; CHECK-NEXT: and w8, w1, w8
236+
; CHECK-NEXT: and x0, x8, #0xff
228237
; CHECK-NEXT: ret
229238
;
230239
; CHECKBE-LABEL: load8_and16_zext:
231240
; CHECKBE: // %bb.0:
232241
; CHECKBE-NEXT: ldrb w8, [x0]
233-
; CHECKBE-NEXT: and w0, w1, w8
242+
; CHECKBE-NEXT: and w8, w1, w8
243+
; CHECKBE-NEXT: and x0, x8, #0xff
234244
; CHECKBE-NEXT: ret
235245
%x = load i8, i8* %p, align 4
236246
%xz = zext i8 %x to i64
@@ -286,14 +296,16 @@ define i64 @load8_and16_or(i8* %p, i64 %y) {
286296
define i64 @load16_and8_manyext(i16* %p, i32 %y) {
287297
; CHECK-LABEL: load16_and8_manyext:
288298
; CHECK: // %bb.0:
289-
; CHECK-NEXT: ldrb w8, [x0]
290-
; CHECK-NEXT: and w0, w1, w8
299+
; CHECK-NEXT: ldrh w8, [x0]
300+
; CHECK-NEXT: and w8, w1, w8
301+
; CHECK-NEXT: and x0, x8, #0xff
291302
; CHECK-NEXT: ret
292303
;
293304
; CHECKBE-LABEL: load16_and8_manyext:
294305
; CHECKBE: // %bb.0:
295-
; CHECKBE-NEXT: ldrb w8, [x0, #1]
296-
; CHECKBE-NEXT: and w0, w1, w8
306+
; CHECKBE-NEXT: ldrh w8, [x0]
307+
; CHECKBE-NEXT: and w8, w1, w8
308+
; CHECKBE-NEXT: and x0, x8, #0xff
297309
; CHECKBE-NEXT: ret
298310
%x = load i16, i16* %p, align 4
299311
%xz = zext i16 %x to i32
@@ -306,16 +318,18 @@ define i64 @load16_and8_manyext(i16* %p, i32 %y) {
306318
define i64 @multiple_load(i16* %p, i32* %q) {
307319
; CHECK-LABEL: multiple_load:
308320
; CHECK: // %bb.0:
309-
; CHECK-NEXT: ldrb w8, [x0]
310-
; CHECK-NEXT: ldrb w9, [x1]
311-
; CHECK-NEXT: and w0, w9, w8
321+
; CHECK-NEXT: ldrh w8, [x0]
322+
; CHECK-NEXT: ldr w9, [x1]
323+
; CHECK-NEXT: and w8, w9, w8
324+
; CHECK-NEXT: and x0, x8, #0xff
312325
; CHECK-NEXT: ret
313326
;
314327
; CHECKBE-LABEL: multiple_load:
315328
; CHECKBE: // %bb.0:
316-
; CHECKBE-NEXT: ldrb w8, [x0, #1]
317-
; CHECKBE-NEXT: ldrb w9, [x1, #3]
318-
; CHECKBE-NEXT: and w0, w9, w8
329+
; CHECKBE-NEXT: ldrh w8, [x0]
330+
; CHECKBE-NEXT: ldr w9, [x1]
331+
; CHECKBE-NEXT: and w8, w9, w8
332+
; CHECKBE-NEXT: and x0, x8, #0xff
319333
; CHECKBE-NEXT: ret
320334
%x = load i16, i16* %p, align 4
321335
%xz = zext i16 %x to i64
@@ -329,16 +343,18 @@ define i64 @multiple_load(i16* %p, i32* %q) {
329343
define i64 @multiple_load_or(i16* %p, i32* %q) {
330344
; CHECK-LABEL: multiple_load_or:
331345
; CHECK: // %bb.0:
332-
; CHECK-NEXT: ldrb w8, [x0]
333-
; CHECK-NEXT: ldrb w9, [x1]
334-
; CHECK-NEXT: orr w0, w9, w8
346+
; CHECK-NEXT: ldrh w8, [x0]
347+
; CHECK-NEXT: ldr w9, [x1]
348+
; CHECK-NEXT: orr w8, w9, w8
349+
; CHECK-NEXT: and x0, x8, #0xff
335350
; CHECK-NEXT: ret
336351
;
337352
; CHECKBE-LABEL: multiple_load_or:
338353
; CHECKBE: // %bb.0:
339-
; CHECKBE-NEXT: ldrb w8, [x0, #1]
340-
; CHECKBE-NEXT: ldrb w9, [x1, #3]
341-
; CHECKBE-NEXT: orr w0, w9, w8
354+
; CHECKBE-NEXT: ldrh w8, [x0]
355+
; CHECKBE-NEXT: ldr w9, [x1]
356+
; CHECKBE-NEXT: orr w8, w9, w8
357+
; CHECKBE-NEXT: and x0, x8, #0xff
342358
; CHECKBE-NEXT: ret
343359
%x = load i16, i16* %p, align 4
344360
%xz = zext i16 %x to i64
@@ -352,16 +368,16 @@ define i64 @multiple_load_or(i16* %p, i32* %q) {
352368
define i64 @load32_and16_zexty(i32* %p, i32 %y) {
353369
; CHECK-LABEL: load32_and16_zexty:
354370
; CHECK: // %bb.0:
355-
; CHECK-NEXT: ldrh w8, [x0]
356-
; CHECK-NEXT: and w9, w1, #0xffff
357-
; CHECK-NEXT: orr w0, w9, w8
371+
; CHECK-NEXT: ldr w8, [x0]
372+
; CHECK-NEXT: orr w8, w1, w8
373+
; CHECK-NEXT: and x0, x8, #0xffff
358374
; CHECK-NEXT: ret
359375
;
360376
; CHECKBE-LABEL: load32_and16_zexty:
361377
; CHECKBE: // %bb.0:
362-
; CHECKBE-NEXT: ldrh w8, [x0, #2]
363-
; CHECKBE-NEXT: and w9, w1, #0xffff
364-
; CHECKBE-NEXT: orr w0, w9, w8
378+
; CHECKBE-NEXT: ldr w8, [x0]
379+
; CHECKBE-NEXT: orr w8, w1, w8
380+
; CHECKBE-NEXT: and x0, x8, #0xffff
365381
; CHECKBE-NEXT: ret
366382
%x = load i32, i32* %p, align 4
367383
%xz = zext i32 %x to i64
@@ -374,16 +390,16 @@ define i64 @load32_and16_zexty(i32* %p, i32 %y) {
374390
define i64 @load32_and16_sexty(i32* %p, i32 %y) {
375391
; CHECK-LABEL: load32_and16_sexty:
376392
; CHECK: // %bb.0:
377-
; CHECK-NEXT: ldrh w8, [x0]
378-
; CHECK-NEXT: and w9, w1, #0xffff
379-
; CHECK-NEXT: orr w0, w9, w8
393+
; CHECK-NEXT: ldr w8, [x0]
394+
; CHECK-NEXT: orr w8, w1, w8
395+
; CHECK-NEXT: and x0, x8, #0xffff
380396
; CHECK-NEXT: ret
381397
;
382398
; CHECKBE-LABEL: load32_and16_sexty:
383399
; CHECKBE: // %bb.0:
384-
; CHECKBE-NEXT: ldrh w8, [x0, #2]
385-
; CHECKBE-NEXT: and w9, w1, #0xffff
386-
; CHECKBE-NEXT: orr w0, w9, w8
400+
; CHECKBE-NEXT: ldr w8, [x0]
401+
; CHECKBE-NEXT: orr w8, w1, w8
402+
; CHECKBE-NEXT: and x0, x8, #0xffff
387403
; CHECKBE-NEXT: ret
388404
%x = load i32, i32* %p, align 4
389405
%xz = zext i32 %x to i64

llvm/test/CodeGen/X86/pr35763.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@
1010
define dso_local void @PR35763() {
1111
; CHECK-LABEL: PR35763:
1212
; CHECK: # %bb.0: # %entry
13-
; CHECK-NEXT: movzwl z(%rip), %eax
14-
; CHECK-NEXT: movzwl z+2(%rip), %ecx
15-
; CHECK-NEXT: orl %eax, %ecx
16-
; CHECK-NEXT: movq %rcx, tf_3_var_136(%rip)
13+
; CHECK-NEXT: movl z(%rip), %eax
14+
; CHECK-NEXT: orl z+2(%rip), %eax
15+
; CHECK-NEXT: movzwl %ax, %eax
16+
; CHECK-NEXT: movq %rax, tf_3_var_136(%rip)
1717
; CHECK-NEXT: movl z+6(%rip), %eax
1818
; CHECK-NEXT: movzbl z+10(%rip), %ecx
1919
; CHECK-NEXT: shlq $32, %rcx

llvm/test/CodeGen/X86/pr35765.ll

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,14 @@ define dso_local void @PR35765() {
1313
; CHECK-NEXT: addb $-118, %cl
1414
; CHECK-NEXT: movl $4, %eax
1515
; CHECK-NEXT: shll %cl, %eax
16-
; CHECK-NEXT: movzwl s2(%rip), %ecx
17-
; CHECK-NEXT: notl %ecx
18-
; CHECK-NEXT: orl x(%rip), %ecx
19-
; CHECK-NEXT: orl $63488, %ecx # imm = 0xF800
20-
; CHECK-NEXT: movzwl %cx, %ecx
21-
; CHECK-NEXT: xorl %eax, %ecx
22-
; CHECK-NEXT: movslq %ecx, %rax
16+
; CHECK-NEXT: movzwl x(%rip), %ecx
17+
; CHECK-NEXT: movzwl s2(%rip), %edx
18+
; CHECK-NEXT: notl %edx
19+
; CHECK-NEXT: orl $63488, %edx # imm = 0xF800
20+
; CHECK-NEXT: movzwl %dx, %edx
21+
; CHECK-NEXT: orl %ecx, %edx
22+
; CHECK-NEXT: xorl %eax, %edx
23+
; CHECK-NEXT: movslq %edx, %rax
2324
; CHECK-NEXT: movq %rax, ll(%rip)
2425
; CHECK-NEXT: retq
2526
entry:

0 commit comments

Comments
 (0)