Skip to content

Commit 539584c

Browse files
committed
[X86] Remove extra MOV after widening atomic load
This change adds patterns to optimize out an extra MOV present after widening the atomic load. commit-id:45989503
1 parent bd488e4 commit 539584c

File tree

3 files changed

+30
-20
lines changed

3 files changed

+30
-20
lines changed

llvm/lib/Target/X86/X86InstrCompiler.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1204,6 +1204,13 @@ def : Pat<(i16 (atomic_load_nonext_16 addr:$src)), (MOV16rm addr:$src)>;
12041204
def : Pat<(i32 (atomic_load_nonext_32 addr:$src)), (MOV32rm addr:$src)>;
12051205
def : Pat<(i64 (atomic_load_nonext_64 addr:$src)), (MOV64rm addr:$src)>;
12061206

1207+
def : Pat<(v4i32 (scalar_to_vector (i32 (zext (i16 (atomic_load_16 addr:$src)))))),
1208+
(MOVDI2PDIrm addr:$src)>; // load atomic <2 x i8>
1209+
def : Pat<(v4i32 (scalar_to_vector (i32 (atomic_load_32 addr:$src)))),
1210+
(MOVDI2PDIrm addr:$src)>; // load atomic <2 x i16>
1211+
def : Pat<(v2i64 (scalar_to_vector (i64 (atomic_load_64 addr:$src)))),
1212+
(MOV64toPQIrm addr:$src)>; // load atomic <2 x i32,float>
1213+
12071214
// Floating point loads/stores.
12081215
def : Pat<(atomic_store_32 (i32 (bitconvert (f32 FR32:$src))), addr:$dst),
12091216
(MOVSSmr addr:$dst, FR32:$src)>, Requires<[UseSSE1]>;

llvm/test/CodeGen/X86/atomic-load-store.ll

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -165,20 +165,23 @@ define <2 x i8> @atomic_vec2_i8(ptr %x) {
165165
}
166166

167167
define <2 x i16> @atomic_vec2_i16(ptr %x) {
168-
; CHECK-LABEL: atomic_vec2_i16:
169-
; CHECK: ## %bb.0:
170-
; CHECK-NEXT: movl (%rdi), %eax
171-
; CHECK-NEXT: movd %eax, %xmm0
172-
; CHECK-NEXT: retq
168+
; CHECK3-LABEL: atomic_vec2_i16:
169+
; CHECK3: ## %bb.0:
170+
; CHECK3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
171+
; CHECK3-NEXT: retq
172+
;
173+
; CHECK0-LABEL: atomic_vec2_i16:
174+
; CHECK0: ## %bb.0:
175+
; CHECK0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
176+
; CHECK0-NEXT: retq
173177
%ret = load atomic <2 x i16>, ptr %x acquire, align 4
174178
ret <2 x i16> %ret
175179
}
176180

177181
define <2 x ptr addrspace(270)> @atomic_vec2_ptr270(ptr %x) {
178182
; CHECK-LABEL: atomic_vec2_ptr270:
179183
; CHECK: ## %bb.0:
180-
; CHECK-NEXT: movq (%rdi), %rax
181-
; CHECK-NEXT: movq %rax, %xmm0
184+
; CHECK-NEXT: movq (%rdi), %xmm0
182185
; CHECK-NEXT: retq
183186
%ret = load atomic <2 x ptr addrspace(270)>, ptr %x acquire, align 8
184187
ret <2 x ptr addrspace(270)> %ret
@@ -187,8 +190,7 @@ define <2 x ptr addrspace(270)> @atomic_vec2_ptr270(ptr %x) {
187190
define <2 x i32> @atomic_vec2_i32_align(ptr %x) {
188191
; CHECK-LABEL: atomic_vec2_i32_align:
189192
; CHECK: ## %bb.0:
190-
; CHECK-NEXT: movq (%rdi), %rax
191-
; CHECK-NEXT: movq %rax, %xmm0
193+
; CHECK-NEXT: movq (%rdi), %xmm0
192194
; CHECK-NEXT: retq
193195
%ret = load atomic <2 x i32>, ptr %x acquire, align 8
194196
ret <2 x i32> %ret
@@ -197,8 +199,7 @@ define <2 x i32> @atomic_vec2_i32_align(ptr %x) {
197199
define <2 x float> @atomic_vec2_float_align(ptr %x) {
198200
; CHECK-LABEL: atomic_vec2_float_align:
199201
; CHECK: ## %bb.0:
200-
; CHECK-NEXT: movq (%rdi), %rax
201-
; CHECK-NEXT: movq %rax, %xmm0
202+
; CHECK-NEXT: movq (%rdi), %xmm0
202203
; CHECK-NEXT: retq
203204
%ret = load atomic <2 x float>, ptr %x acquire, align 8
204205
ret <2 x float> %ret
@@ -354,20 +355,23 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
354355
}
355356

356357
define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind {
357-
; CHECK-LABEL: atomic_vec4_i8:
358-
; CHECK: ## %bb.0:
359-
; CHECK-NEXT: movl (%rdi), %eax
360-
; CHECK-NEXT: movd %eax, %xmm0
361-
; CHECK-NEXT: retq
358+
; CHECK3-LABEL: atomic_vec4_i8:
359+
; CHECK3: ## %bb.0:
360+
; CHECK3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
361+
; CHECK3-NEXT: retq
362+
;
363+
; CHECK0-LABEL: atomic_vec4_i8:
364+
; CHECK0: ## %bb.0:
365+
; CHECK0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
366+
; CHECK0-NEXT: retq
362367
%ret = load atomic <4 x i8>, ptr %x acquire, align 4
363368
ret <4 x i8> %ret
364369
}
365370

366371
define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
367372
; CHECK-LABEL: atomic_vec4_i16:
368373
; CHECK: ## %bb.0:
369-
; CHECK-NEXT: movq (%rdi), %rax
370-
; CHECK-NEXT: movq %rax, %xmm0
374+
; CHECK-NEXT: movq (%rdi), %xmm0
371375
; CHECK-NEXT: retq
372376
%ret = load atomic <4 x i16>, ptr %x acquire, align 8
373377
ret <4 x i16> %ret

llvm/test/CodeGen/X86/atomic-unordered.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2275,8 +2275,7 @@ define i64 @load_i16_anyext_i64(ptr %ptr) {
22752275
;
22762276
; CHECK-O3-LABEL: load_i16_anyext_i64:
22772277
; CHECK-O3: # %bb.0:
2278-
; CHECK-O3-NEXT: movzwl (%rdi), %eax
2279-
; CHECK-O3-NEXT: vmovd %eax, %xmm0
2278+
; CHECK-O3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
22802279
; CHECK-O3-NEXT: vmovq %xmm0, %rax
22812280
; CHECK-O3-NEXT: retq
22822281
%v = load atomic i16, ptr %ptr unordered, align 8

0 commit comments

Comments
 (0)