Skip to content

Commit c396c3d

Browse files
committed
Address review comment
1 parent 9373e0b commit c396c3d

33 files changed

+2284
-2237
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 28 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -14815,6 +14815,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
1481514815
if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
1481614816
return Res;
1481714817

14818+
if (N0.getOpcode() == ISD::FREEZE && N0.hasOneUse() && !VT.isVector()) {
14819+
SDValue Res =
14820+
DAG.getFreeze(DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0)));
14821+
return DAG.getNode(ISD::AssertSext, DL, VT, Res,
14822+
DAG.getValueType(N0.getOperand(0).getValueType()));
14823+
}
14824+
1481814825
return SDValue();
1481914826
}
1482014827

@@ -15194,6 +15201,13 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
1519415201
return SDValue(CSENode, 0);
1519515202
}
1519615203

15204+
if (N0.getOpcode() == ISD::FREEZE && N0.hasOneUse() && !VT.isVector()) {
15205+
SDValue Res =
15206+
DAG.getFreeze(DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)));
15207+
return DAG.getNode(ISD::AssertZext, DL, VT, Res,
15208+
DAG.getValueType(N0.getOperand(0).getValueType()));
15209+
}
15210+
1519715211
return SDValue();
1519815212
}
1519915213

@@ -15362,6 +15376,10 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
1536215376
if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
1536315377
return Res;
1536415378

15379+
if (N0.getOpcode() == ISD::FREEZE && N0.hasOneUse())
15380+
return DAG.getFreeze(
15381+
DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(0)));
15382+
1536515383
return SDValue();
1536615384
}
1536715385

@@ -16911,6 +16929,11 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
1691116929
return LegalShuffle;
1691216930
}
1691316931

16932+
if (N0.getOpcode() == ISD::FREEZE && N0.hasOneUse()) {
16933+
SDLoc DL(N);
16934+
return DAG.getFreeze(DAG.getNode(ISD::BITCAST, DL, VT, N0.getOperand(0)));
16935+
}
16936+
1691416937
return SDValue();
1691516938
}
1691616939

@@ -16943,23 +16966,11 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
1694316966
// example https://reviews.llvm.org/D136529#4120959.
1694416967
if (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)
1694516968
return SDValue();
16946-
16947-
// fold: bitcast(freeze(load)) -> freeze(bitcast(load))
16948-
// fold: sext(freeze(load)) -> freeze(sext(load))
16949-
// fold: zext(freeze(load)) -> freeze(zext(load))
16950-
// This allows the conversion to potentially fold into the load.
16951-
if (N0.getOpcode() == ISD::LOAD && N->hasOneUse()) {
16952-
SDNode *User = *N->user_begin();
16953-
unsigned UserOpcode = User->getOpcode();
16954-
if (UserOpcode == ISD::BITCAST || UserOpcode == ISD::SIGN_EXTEND ||
16955-
UserOpcode == ISD::ZERO_EXTEND) {
16956-
SDValue NewConv =
16957-
DAG.getNode(UserOpcode, SDLoc(User), User->getValueType(0), N0);
16958-
SDValue FrozenConv = DAG.getFreeze(NewConv);
16959-
DAG.ReplaceAllUsesWith(User, FrozenConv.getNode());
16960-
return SDValue(N, 0);
16961-
}
16962-
}
16969+
// Avoid folding extensions and bitcasts. Each of these operations handles
16970+
// FREEZE in their own respective visitors.
16971+
if (N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND ||
16972+
N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::BITCAST)
16973+
return SDValue();
1696316974

1696416975
// Fold freeze(op(x, ...)) -> op(freeze(x), ...).
1696516976
// Try to push freeze through instructions that propagate but don't produce

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3448,19 +3448,11 @@ bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
34483448
if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8)
34493449
return false;
34503450

3451-
// With low alignment, don't convert integer vectors to large scalar loads,
3452-
// because otherwise they get broken into many small scalar loads.
3453-
if (LoadVT.isVector() && LoadVT.isInteger() && !BitcastVT.isVector() &&
3454-
BitcastVT.isInteger()) {
3455-
const DataLayout &DL = DAG.getDataLayout();
3456-
unsigned MinAlign = DL.getPointerSize();
3457-
// Aligned well, will legalize into a clean sequence of loads.
3458-
if (MMO.getAlign() >= MinAlign)
3459-
return true;
3460-
// Aligned poorly for a large enough scalar.
3461-
if (BitcastVT.getSizeInBits() > 2 * DL.getPointerSizeInBits())
3462-
return false;
3463-
}
3451+
// If we have a large vector type (even if illegal), don't bitcast to large
3452+
// (illegal) scalar types. Better to load fewer vectors and extract.
3453+
if (LoadVT.isVector() && !BitcastVT.isVector() && LoadVT.isInteger() &&
3454+
BitcastVT.isInteger() && (LoadVT.getSizeInBits() % 128) == 0)
3455+
return false;
34643456

34653457
// If both types are legal vectors, it's always ok to convert them.
34663458
if (LoadVT.isVector() && BitcastVT.isVector() &&

llvm/test/CodeGen/AArch64/freeze-bitcast-ext-load.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,7 @@ define i32 @test_sext_freeze_load_i8(ptr %p) {
2626
define i64 @test_sext_freeze_load_i32(ptr %p) {
2727
; CHECK-LABEL: test_sext_freeze_load_i32:
2828
; CHECK: // %bb.0:
29-
; CHECK-NEXT: ldr w8, [x0]
30-
; CHECK-NEXT: sxtw x0, w8
29+
; CHECK-NEXT: ldrsw x0, [x0]
3130
; CHECK-NEXT: ret
3231
%v = load i32, ptr %p
3332
%f = freeze i32 %v

llvm/test/CodeGen/AArch64/freeze.ll

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -376,10 +376,14 @@ define i32 @freeze_anonstruct() {
376376
}
377377

378378
define i32 @freeze_anonstruct2() {
379-
; CHECK-LABEL: freeze_anonstruct2:
380-
; CHECK: // %bb.0:
381-
; CHECK-NEXT: add w0, w8, w8, uxth
382-
; CHECK-NEXT: ret
379+
; CHECK-SD-LABEL: freeze_anonstruct2:
380+
; CHECK-SD: // %bb.0:
381+
; CHECK-SD-NEXT: ret
382+
;
383+
; CHECK-GI-LABEL: freeze_anonstruct2:
384+
; CHECK-GI: // %bb.0:
385+
; CHECK-GI-NEXT: add w0, w8, w8, uxth
386+
; CHECK-GI-NEXT: ret
383387
%y1 = freeze {i32, i16} undef
384388
%v1 = extractvalue {i32, i16} %y1, 0
385389
%v2 = extractvalue {i32, i16} %y1, 1

llvm/test/CodeGen/AArch64/pr66603.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,7 @@
55
define i32 @PR66603(double %x) nounwind {
66
; CHECK-LABEL: PR66603:
77
; CHECK: // %bb.0:
8-
; CHECK-NEXT: fcvtzs w8, d0
9-
; CHECK-NEXT: sxtb w0, w8
8+
; CHECK-NEXT: fcvtzs w0, d0
109
; CHECK-NEXT: ret
1110
%as_i8 = fptosi double %x to i8
1211
%frozen_i8 = freeze i8 %as_i8

llvm/test/CodeGen/AArch64/vector-compress.ll

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,16 @@ define <4 x i32> @test_compress_v4i32(<4 x i32> %vec, <4 x i1> %mask) {
1212
; CHECK-NEXT: shl.4s v1, v1, #31
1313
; CHECK-NEXT: cmlt.4s v1, v1, #0
1414
; CHECK-NEXT: mov.s w9, v1[1]
15-
; CHECK-NEXT: fmov w11, s1
1615
; CHECK-NEXT: mov.s w10, v1[2]
17-
; CHECK-NEXT: and x12, x11, #0x1
16+
; CHECK-NEXT: fmov w11, s1
1817
; CHECK-NEXT: bfi x8, x11, #2, #1
19-
; CHECK-NEXT: mov x11, sp
18+
; CHECK-NEXT: and x11, x11, #0x1
2019
; CHECK-NEXT: and x9, x9, #0x1
21-
; CHECK-NEXT: add x9, x12, x9
20+
; CHECK-NEXT: and w10, w10, #0x1
21+
; CHECK-NEXT: add x9, x11, x9
22+
; CHECK-NEXT: mov x11, sp
2223
; CHECK-NEXT: st1.s { v0 }[1], [x8]
23-
; CHECK-NEXT: sub w10, w9, w10
24+
; CHECK-NEXT: add w10, w9, w10
2425
; CHECK-NEXT: orr x9, x11, x9, lsl #2
2526
; CHECK-NEXT: bfi x11, x10, #2, #2
2627
; CHECK-NEXT: st1.s { v0 }[2], [x9]
@@ -420,15 +421,16 @@ define <3 x i32> @test_compress_narrow(<3 x i32> %vec, <3 x i1> %mask) {
420421
; CHECK-NEXT: shl.4s v1, v1, #31
421422
; CHECK-NEXT: cmlt.4s v1, v1, #0
422423
; CHECK-NEXT: mov.s w8, v1[1]
423-
; CHECK-NEXT: fmov w10, s1
424424
; CHECK-NEXT: mov.s w9, v1[2]
425-
; CHECK-NEXT: and x12, x10, #0x1
425+
; CHECK-NEXT: fmov w10, s1
426426
; CHECK-NEXT: bfi x11, x10, #2, #1
427-
; CHECK-NEXT: mov x10, sp
427+
; CHECK-NEXT: and x10, x10, #0x1
428428
; CHECK-NEXT: and x8, x8, #0x1
429-
; CHECK-NEXT: add x8, x12, x8
429+
; CHECK-NEXT: and w9, w9, #0x1
430+
; CHECK-NEXT: add x8, x10, x8
431+
; CHECK-NEXT: mov x10, sp
430432
; CHECK-NEXT: st1.s { v0 }[1], [x11]
431-
; CHECK-NEXT: sub w9, w8, w9
433+
; CHECK-NEXT: add w9, w8, w9
432434
; CHECK-NEXT: orr x8, x10, x8, lsl #2
433435
; CHECK-NEXT: bfi x10, x9, #2, #2
434436
; CHECK-NEXT: st1.s { v0 }[2], [x8]

llvm/test/CodeGen/AArch64/vselect-ext.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -594,10 +594,10 @@ define void @extension_in_loop_v16i8_to_v16i32(ptr %src, ptr %dst) {
594594
; CHECK-NEXT: add x8, x8, #16
595595
; CHECK-NEXT: cmp x8, #128
596596
; CHECK-NEXT: cmge.16b v5, v4, #0
597-
; CHECK-NEXT: tbl.16b v7, { v4 }, v0
598-
; CHECK-NEXT: tbl.16b v16, { v4 }, v1
599-
; CHECK-NEXT: tbl.16b v18, { v4 }, v2
600-
; CHECK-NEXT: tbl.16b v4, { v4 }, v3
597+
; CHECK-NEXT: tbl.16b v7, { v4 }, v3
598+
; CHECK-NEXT: tbl.16b v16, { v4 }, v2
599+
; CHECK-NEXT: tbl.16b v18, { v4 }, v1
600+
; CHECK-NEXT: tbl.16b v4, { v4 }, v0
601601
; CHECK-NEXT: sshll2.8h v6, v5, #0
602602
; CHECK-NEXT: sshll.8h v5, v5, #0
603603
; CHECK-NEXT: sshll2.4s v17, v6, #0
@@ -664,10 +664,10 @@ define void @extension_in_loop_as_shuffle_v16i8_to_v16i32(ptr %src, ptr %dst) {
664664
; CHECK-NEXT: add x8, x8, #16
665665
; CHECK-NEXT: cmp x8, #128
666666
; CHECK-NEXT: cmge.16b v5, v4, #0
667-
; CHECK-NEXT: tbl.16b v7, { v4 }, v0
668-
; CHECK-NEXT: tbl.16b v16, { v4 }, v1
669-
; CHECK-NEXT: tbl.16b v18, { v4 }, v2
670-
; CHECK-NEXT: tbl.16b v4, { v4 }, v3
667+
; CHECK-NEXT: tbl.16b v7, { v4 }, v3
668+
; CHECK-NEXT: tbl.16b v16, { v4 }, v2
669+
; CHECK-NEXT: tbl.16b v18, { v4 }, v1
670+
; CHECK-NEXT: tbl.16b v4, { v4 }, v0
671671
; CHECK-NEXT: sshll2.8h v6, v5, #0
672672
; CHECK-NEXT: sshll.8h v5, v5, #0
673673
; CHECK-NEXT: sshll2.4s v17, v6, #0
@@ -735,10 +735,10 @@ define void @shuffle_in_loop_is_no_extend_v16i8_to_v16i32(ptr %src, ptr %dst) {
735735
; CHECK-NEXT: add x8, x8, #16
736736
; CHECK-NEXT: cmp x8, #128
737737
; CHECK-NEXT: cmge.16b v5, v4, #0
738-
; CHECK-NEXT: tbl.16b v7, { v4 }, v0
739-
; CHECK-NEXT: tbl.16b v16, { v4 }, v1
740-
; CHECK-NEXT: tbl.16b v18, { v4 }, v2
741-
; CHECK-NEXT: tbl.16b v4, { v4 }, v3
738+
; CHECK-NEXT: tbl.16b v7, { v4 }, v3
739+
; CHECK-NEXT: tbl.16b v16, { v4 }, v2
740+
; CHECK-NEXT: tbl.16b v18, { v4 }, v1
741+
; CHECK-NEXT: tbl.16b v4, { v4 }, v0
742742
; CHECK-NEXT: sshll2.8h v6, v5, #0
743743
; CHECK-NEXT: sshll.8h v5, v5, #0
744744
; CHECK-NEXT: sshll2.4s v17, v6, #0

llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7769,7 +7769,7 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
77697769
;
77707770
; GFX6-LABEL: sdiv_i64_pow2_shl_denom:
77717771
; GFX6: ; %bb.0:
7772-
; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xd
7772+
; GFX6-NEXT: s_load_dword s0, s[4:5], 0xd
77737773
; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9
77747774
; GFX6-NEXT: s_mov_b32 s3, 0xf000
77757775
; GFX6-NEXT: s_mov_b32 s2, -1
@@ -7938,7 +7938,7 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
79387938
;
79397939
; GFX9-LABEL: sdiv_i64_pow2_shl_denom:
79407940
; GFX9: ; %bb.0:
7941-
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x34
7941+
; GFX9-NEXT: s_load_dword s0, s[4:5], 0x34
79427942
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
79437943
; GFX9-NEXT: s_lshl_b64 s[0:1], 0x1000, s0
79447944
; GFX9-NEXT: s_ashr_i32 s6, s1, 31
@@ -9037,7 +9037,7 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
90379037
;
90389038
; GFX6-LABEL: srem_i64_pow2_shl_denom:
90399039
; GFX6: ; %bb.0:
9040-
; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xd
9040+
; GFX6-NEXT: s_load_dword s0, s[4:5], 0xd
90419041
; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9
90429042
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
90439043
; GFX6-NEXT: s_lshl_b64 s[0:1], 0x1000, s0
@@ -9208,7 +9208,7 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
92089208
;
92099209
; GFX9-LABEL: srem_i64_pow2_shl_denom:
92109210
; GFX9: ; %bb.0:
9211-
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x34
9211+
; GFX9-NEXT: s_load_dword s0, s[4:5], 0x34
92129212
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
92139213
; GFX9-NEXT: s_lshl_b64 s[0:1], 0x1000, s0
92149214
; GFX9-NEXT: s_ashr_i32 s2, s1, 31

0 commit comments

Comments
 (0)