Skip to content

Commit ba64b04

Browse files
committed
[DAGCombiner][X86] Push bitcast/ext through freeze for loads
1 parent d6b22a3 commit ba64b04

File tree

7 files changed

+171
-29
lines changed

7 files changed

+171
-29
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16944,6 +16944,23 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
1694416944
if (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)
1694516945
return SDValue();
1694616946

16947+
// fold: bitcast(freeze(load)) -> freeze(bitcast(load))
16948+
// fold: sext(freeze(load)) -> freeze(sext(load))
16949+
// fold: zext(freeze(load)) -> freeze(zext(load))
16950+
// This allows the conversion to potentially fold into the load.
16951+
if (N0.getOpcode() == ISD::LOAD && N->hasOneUse()) {
16952+
SDNode *User = *N->user_begin();
16953+
unsigned UserOpcode = User->getOpcode();
16954+
if (UserOpcode == ISD::BITCAST || UserOpcode == ISD::SIGN_EXTEND ||
16955+
UserOpcode == ISD::ZERO_EXTEND) {
16956+
SDValue NewConv =
16957+
DAG.getNode(UserOpcode, SDLoc(User), User->getValueType(0), N0);
16958+
SDValue FrozenConv = DAG.getFreeze(NewConv);
16959+
DAG.ReplaceAllUsesWith(User, FrozenConv.getNode());
16960+
return SDValue(N, 0);
16961+
}
16962+
}
16963+
1694716964
// Fold freeze(op(x, ...)) -> op(freeze(x), ...).
1694816965
// Try to push freeze through instructions that propagate but don't produce
1694916966
// poison as far as possible. If an operand of freeze follows three

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3448,6 +3448,20 @@ bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
34483448
if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8)
34493449
return false;
34503450

3451+
// With low alignment, don't convert integer vectors to large scalar loads,
3452+
// because otherwise they get broken into many small scalar loads.
3453+
if (LoadVT.isVector() && LoadVT.isInteger() && !BitcastVT.isVector() &&
3454+
BitcastVT.isInteger()) {
3455+
const DataLayout &DL = DAG.getDataLayout();
3456+
unsigned MinAlign = DL.getPointerSize();
3457+
// Aligned well, will legalize into a clean sequence of loads.
3458+
if (MMO.getAlign() >= MinAlign)
3459+
return true;
3460+
// Aligned poorly for a large enough scalar.
3461+
if (BitcastVT.getSizeInBits() > 2 * DL.getPointerSizeInBits())
3462+
return false;
3463+
}
3464+
34513465
// If both types are legal vectors, it's always ok to convert them.
34523466
if (LoadVT.isVector() && BitcastVT.isVector() &&
34533467
isTypeLegal(LoadVT) && isTypeLegal(BitcastVT))

llvm/test/CodeGen/X86/avx10_2_512bf16-arith.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,8 @@ define <32 x bfloat> @test_int_x86_avx10_maskz_sub_bf16_512(<32 x bfloat> %src,
9494
;
9595
; X86-LABEL: test_int_x86_avx10_maskz_sub_bf16_512:
9696
; X86: # %bb.0:
97-
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
9897
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
98+
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
9999
; X86-NEXT: vsubbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x5c,0xc2]
100100
; X86-NEXT: vsubbf16 (%eax), %zmm1, %zmm1 # encoding: [0x62,0xf5,0x75,0x48,0x5c,0x08]
101101
; X86-NEXT: vsubbf16 %zmm1, %zmm0, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x49,0x5c,0xc1]

llvm/test/CodeGen/X86/avx10_2bf16-arith.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -147,8 +147,8 @@ define <16 x bfloat> @test_int_x86_avx10_maskz_sub_bf16_256(<16 x bfloat> %src,
147147
;
148148
; X86-LABEL: test_int_x86_avx10_maskz_sub_bf16_256:
149149
; X86: # %bb.0:
150-
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
151150
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
151+
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
152152
; X86-NEXT: vsubbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x5c,0xc2]
153153
; X86-NEXT: vsubbf16 (%eax), %ymm1, %ymm1 # encoding: [0x62,0xf5,0x75,0x28,0x5c,0x08]
154154
; X86-NEXT: vsubbf16 %ymm1, %ymm0, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x29,0x5c,0xc1]
@@ -201,8 +201,8 @@ define <8 x bfloat> @test_int_x86_avx10_maskz_sub_bf16_128(<8 x bfloat> %src, <8
201201
;
202202
; X86-LABEL: test_int_x86_avx10_maskz_sub_bf16_128:
203203
; X86: # %bb.0:
204-
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
205204
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
205+
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
206206
; X86-NEXT: vsubbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x5c,0xc2]
207207
; X86-NEXT: vsubbf16 (%eax), %xmm1, %xmm1 # encoding: [0x62,0xf5,0x75,0x08,0x5c,0x08]
208208
; X86-NEXT: vsubbf16 %xmm1, %xmm0, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x5c,0xc1]

llvm/test/CodeGen/X86/avx512-ext.ll

Lines changed: 12 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -212,11 +212,9 @@ define <32 x i16> @zext_32x8mem_to_32x16(ptr%i , <32 x i1> %mask) nounwind readn
212212
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
213213
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
214214
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
215-
; KNL-NEXT: vmovdqu (%rdi), %ymm2
216-
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
217-
; KNL-NEXT: vextracti128 $1, %ymm2, %xmm2
218-
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
219-
; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
215+
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
216+
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
217+
; KNL-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2
220218
; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
221219
; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
222220
; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
@@ -237,11 +235,9 @@ define <32 x i16> @zext_32x8mem_to_32x16(ptr%i , <32 x i1> %mask) nounwind readn
237235
; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm1
238236
; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
239237
; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
240-
; AVX512DQNOBW-NEXT: vmovdqu (%rdi), %ymm2
241-
; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
242-
; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm2, %xmm2
243-
; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
244-
; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
238+
; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
239+
; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
240+
; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2
245241
; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0
246242
; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0
247243
; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1
@@ -261,11 +257,9 @@ define <32 x i16> @sext_32x8mem_to_32x16(ptr%i , <32 x i1> %mask) nounwind readn
261257
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
262258
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
263259
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
264-
; KNL-NEXT: vmovdqu (%rdi), %ymm2
265-
; KNL-NEXT: vpmovsxbw %xmm2, %ymm3
266-
; KNL-NEXT: vextracti128 $1, %ymm2, %xmm2
267-
; KNL-NEXT: vpmovsxbw %xmm2, %ymm2
268-
; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
260+
; KNL-NEXT: vpmovsxbw (%rdi), %ymm2
261+
; KNL-NEXT: vpmovsxbw 16(%rdi), %ymm3
262+
; KNL-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2
269263
; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
270264
; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
271265
; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
@@ -286,11 +280,9 @@ define <32 x i16> @sext_32x8mem_to_32x16(ptr%i , <32 x i1> %mask) nounwind readn
286280
; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm1
287281
; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
288282
; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
289-
; AVX512DQNOBW-NEXT: vmovdqu (%rdi), %ymm2
290-
; AVX512DQNOBW-NEXT: vpmovsxbw %xmm2, %ymm3
291-
; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm2, %xmm2
292-
; AVX512DQNOBW-NEXT: vpmovsxbw %xmm2, %ymm2
293-
; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
283+
; AVX512DQNOBW-NEXT: vpmovsxbw (%rdi), %ymm2
284+
; AVX512DQNOBW-NEXT: vpmovsxbw 16(%rdi), %ymm3
285+
; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2
294286
; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0
295287
; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0
296288
; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
3+
4+
define double @test_bitcast_freeze_load(ptr %p) {
5+
; CHECK-LABEL: test_bitcast_freeze_load:
6+
; CHECK: // %bb.0:
7+
; CHECK-NEXT: ldr d0, [x0]
8+
; CHECK-NEXT: ret
9+
%v = load <2 x float>, ptr %p
10+
%f = freeze <2 x float> %v
11+
%b = bitcast <2 x float> %f to double
12+
ret double %b
13+
}
14+
15+
define i32 @test_sext_freeze_load_i8(ptr %p) {
16+
; CHECK-LABEL: test_sext_freeze_load_i8:
17+
; CHECK: // %bb.0:
18+
; CHECK-NEXT: ldrsb w0, [x0]
19+
; CHECK-NEXT: ret
20+
%v = load i8, ptr %p
21+
%f = freeze i8 %v
22+
%e = sext i8 %f to i32
23+
ret i32 %e
24+
}
25+
26+
define i64 @test_sext_freeze_load_i32(ptr %p) {
27+
; CHECK-LABEL: test_sext_freeze_load_i32:
28+
; CHECK: // %bb.0:
29+
; CHECK-NEXT: ldr w8, [x0]
30+
; CHECK-NEXT: sxtw x0, w8
31+
; CHECK-NEXT: ret
32+
%v = load i32, ptr %p
33+
%f = freeze i32 %v
34+
%e = sext i32 %f to i64
35+
ret i64 %e
36+
}
37+
38+
define i64 @test_sext_freeze_load_i16(ptr %p) {
39+
; CHECK-LABEL: test_sext_freeze_load_i16:
40+
; CHECK: // %bb.0:
41+
; CHECK-NEXT: ldrsh x0, [x0]
42+
; CHECK-NEXT: ret
43+
%v = load i16, ptr %p
44+
%f = freeze i16 %v
45+
%e = sext i16 %f to i64
46+
ret i64 %e
47+
}
48+
49+
define i32 @test_zext_freeze_load_i8(ptr %p) {
50+
; CHECK-LABEL: test_zext_freeze_load_i8:
51+
; CHECK: // %bb.0:
52+
; CHECK-NEXT: ldrb w0, [x0]
53+
; CHECK-NEXT: ret
54+
%v = load i8, ptr %p
55+
%f = freeze i8 %v
56+
%e = zext i8 %f to i32
57+
ret i32 %e
58+
}
59+
60+
define i64 @test_zext_freeze_load_i32(ptr %p) {
61+
; CHECK-LABEL: test_zext_freeze_load_i32:
62+
; CHECK: // %bb.0:
63+
; CHECK-NEXT: ldr w0, [x0]
64+
; CHECK-NEXT: ret
65+
%v = load i32, ptr %p
66+
%f = freeze i32 %v
67+
%e = zext i32 %f to i64
68+
ret i64 %e
69+
}
70+
71+
define i64 @test_zext_freeze_load_i16(ptr %p) {
72+
; CHECK-LABEL: test_zext_freeze_load_i16:
73+
; CHECK: // %bb.0:
74+
; CHECK-NEXT: ldrh w0, [x0]
75+
; CHECK-NEXT: ret
76+
%v = load i16, ptr %p
77+
%f = freeze i16 %v
78+
%e = zext i16 %f to i64
79+
ret i64 %e
80+
}
81+
82+
define i32 @test_sext_freeze_load_multiuse(ptr %p) {
83+
; CHECK-LABEL: test_sext_freeze_load_multiuse:
84+
; CHECK: // %bb.0:
85+
; CHECK-NEXT: ldrb w8, [x0]
86+
; CHECK-NEXT: sxtb w9, w8
87+
; CHECK-NEXT: add w0, w9, w8, uxtb
88+
; CHECK-NEXT: ret
89+
%v = load i8, ptr %p
90+
%f = freeze i8 %v
91+
%e = sext i8 %f to i32
92+
%z = zext i8 %f to i32
93+
%r = add i32 %e, %z
94+
ret i32 %r
95+
}
96+
97+
define <4 x i32> @test_sext_freeze_load_v4i16(ptr %p) {
98+
; CHECK-LABEL: test_sext_freeze_load_v4i16:
99+
; CHECK: // %bb.0:
100+
; CHECK-NEXT: ldr d0, [x0]
101+
; CHECK-NEXT: sshll v0.4s, v0.4h, #0
102+
; CHECK-NEXT: ret
103+
%v = load <4 x i16>, ptr %p
104+
%f = freeze <4 x i16> %v
105+
%e = sext <4 x i16> %f to <4 x i32>
106+
ret <4 x i32> %e
107+
}
108+
109+
define <4 x i32> @test_zext_freeze_load_v4i16(ptr %p) {
110+
; CHECK-LABEL: test_zext_freeze_load_v4i16:
111+
; CHECK: // %bb.0:
112+
; CHECK-NEXT: ldr d0, [x0]
113+
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
114+
; CHECK-NEXT: ret
115+
%v = load <4 x i16>, ptr %p
116+
%f = freeze <4 x i16> %v
117+
%e = zext <4 x i16> %f to <4 x i32>
118+
ret <4 x i32> %e
119+
}

llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -171,17 +171,17 @@ define void @load_2byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64
171171
define void @load_1byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
172172
; X64-NO-BMI2-LABEL: load_1byte_chunk_of_8byte_alloca_with_zero_upper_half:
173173
; X64-NO-BMI2: # %bb.0:
174-
; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx
175174
; X64-NO-BMI2-NEXT: movl (%rdi), %eax
175+
; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx
176176
; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
177177
; X64-NO-BMI2-NEXT: shrq %cl, %rax
178178
; X64-NO-BMI2-NEXT: movb %al, (%rdx)
179179
; X64-NO-BMI2-NEXT: retq
180180
;
181181
; X64-BMI2-LABEL: load_1byte_chunk_of_8byte_alloca_with_zero_upper_half:
182182
; X64-BMI2: # %bb.0:
183-
; X64-BMI2-NEXT: shll $3, %esi
184183
; X64-BMI2-NEXT: movl (%rdi), %eax
184+
; X64-BMI2-NEXT: shll $3, %esi
185185
; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax
186186
; X64-BMI2-NEXT: movb %al, (%rdx)
187187
; X64-BMI2-NEXT: retq
@@ -248,17 +248,17 @@ define void @load_1byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64
248248
define void @load_2byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
249249
; X64-NO-BMI2-LABEL: load_2byte_chunk_of_8byte_alloca_with_zero_upper_half:
250250
; X64-NO-BMI2: # %bb.0:
251-
; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx
252251
; X64-NO-BMI2-NEXT: movl (%rdi), %eax
252+
; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx
253253
; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
254254
; X64-NO-BMI2-NEXT: shrq %cl, %rax
255255
; X64-NO-BMI2-NEXT: movw %ax, (%rdx)
256256
; X64-NO-BMI2-NEXT: retq
257257
;
258258
; X64-BMI2-LABEL: load_2byte_chunk_of_8byte_alloca_with_zero_upper_half:
259259
; X64-BMI2: # %bb.0:
260-
; X64-BMI2-NEXT: shll $3, %esi
261260
; X64-BMI2-NEXT: movl (%rdi), %eax
261+
; X64-BMI2-NEXT: shll $3, %esi
262262
; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax
263263
; X64-BMI2-NEXT: movw %ax, (%rdx)
264264
; X64-BMI2-NEXT: retq
@@ -324,17 +324,17 @@ define void @load_2byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64
324324
define void @load_4byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
325325
; X64-NO-BMI2-LABEL: load_4byte_chunk_of_8byte_alloca_with_zero_upper_half:
326326
; X64-NO-BMI2: # %bb.0:
327-
; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx
328327
; X64-NO-BMI2-NEXT: movl (%rdi), %eax
328+
; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx
329329
; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
330330
; X64-NO-BMI2-NEXT: shrq %cl, %rax
331331
; X64-NO-BMI2-NEXT: movl %eax, (%rdx)
332332
; X64-NO-BMI2-NEXT: retq
333333
;
334334
; X64-BMI2-LABEL: load_4byte_chunk_of_8byte_alloca_with_zero_upper_half:
335335
; X64-BMI2: # %bb.0:
336-
; X64-BMI2-NEXT: shll $3, %esi
337336
; X64-BMI2-NEXT: movl (%rdi), %eax
337+
; X64-BMI2-NEXT: shll $3, %esi
338338
; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax
339339
; X64-BMI2-NEXT: movl %eax, (%rdx)
340340
; X64-BMI2-NEXT: retq

0 commit comments

Comments
 (0)