Skip to content

Commit 7775d9a

Browse files
committed
Enable unaligned loads on x86 using cmpxchg
We can do this by using cmpxchg. It is really the only way, though the big concern is that x86 cpus can choose to do a cpu exception here, or handle it. So I am unsure of how to deal with that.
1 parent 80f8e1e commit 7775d9a

File tree

4 files changed

+140
-148
lines changed

4 files changed

+140
-148
lines changed

llvm/lib/CodeGen/AtomicExpandPass.cpp

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -232,8 +232,22 @@ template <typename Inst>
232232
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
233233
unsigned Size = getAtomicOpSize(I);
234234
Align Alignment = I->getAlign();
235-
return Alignment >= Size &&
236-
Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8;
235+
236+
// X86 we can do unaligned loads
237+
return Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8 &&
238+
(Alignment >= Size || TLI->supportsUnalignedAtomics());
239+
}
240+
241+
template <typename Inst>
242+
static bool canLowerAtomicAsUnaligned(const TargetLowering *TLI, Inst *I) {
243+
if (!TLI->supportsUnalignedAtomics())
244+
return false;
245+
unsigned Size = getAtomicOpSize(I);
246+
Align Alignment = I->getAlign();
247+
248+
// X86 we can do unaligned loads
249+
return Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8 &&
250+
(Alignment < Size);
237251
}
238252

239253
bool AtomicExpandImpl::processAtomicInstr(Instruction *I) {
@@ -510,6 +524,10 @@ AtomicExpandImpl::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
510524
}
511525

512526
bool AtomicExpandImpl::tryExpandAtomicLoad(LoadInst *LI) {
527+
528+
if (canLowerAtomicAsUnaligned(TLI, LI))
529+
return expandAtomicLoadToCmpXchg(LI);
530+
513531
switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
514532
case TargetLoweringBase::AtomicExpansionKind::None:
515533
return false;
@@ -532,6 +550,11 @@ bool AtomicExpandImpl::tryExpandAtomicLoad(LoadInst *LI) {
532550
}
533551

534552
bool AtomicExpandImpl::tryExpandAtomicStore(StoreInst *SI) {
553+
if (canLowerAtomicAsUnaligned(TLI, SI)) {
554+
expandAtomicStore(SI);
555+
return true;
556+
}
557+
535558
switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
536559
case TargetLoweringBase::AtomicExpansionKind::None:
537560
return false;

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
137137

138138
// Set up the TargetLowering object.
139139

140+
// X86 supports unaligned atomic memory accesses via cmpxchg8b and cmpxchg16b
141+
setSupportsUnalignedAtomics(true);
142+
140143
// X86 is weird. It always uses i8 for shift amounts and setcc results.
141144
setBooleanContents(ZeroOrOneBooleanContent);
142145
// X86-SSE is even stranger. It uses -1 or 0 for vector masks.

llvm/test/CodeGen/X86/atomic-unaligned.ll

Lines changed: 82 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,24 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
12
; RUN: llc -mtriple=x86_64 < %s | FileCheck %s
23

34
; Quick test to ensure that atomics which are not naturally-aligned
45
; emit unsized libcalls, and aren't emitted as native instructions or
56
; sized libcalls.
67
define void @test_i32(ptr %a) nounwind {
78
; CHECK-LABEL: test_i32:
8-
; CHECK: callq __atomic_load
9-
; CHECK: callq __atomic_store
10-
; CHECK: callq __atomic_exchange
11-
; CHECK: callq __atomic_compare_exchange
12-
; CHECK: callq __atomic_compare_exchange
9+
; CHECK: # %bb.0:
10+
; CHECK-NEXT: xorl %ecx, %ecx
11+
; CHECK-NEXT: xorl %eax, %eax
12+
; CHECK-NEXT: lock cmpxchgl %ecx, (%rdi)
13+
; CHECK-NEXT: movl $1, %ecx
14+
; CHECK-NEXT: movl $1, %eax
15+
; CHECK-NEXT: xchgl %eax, (%rdi)
16+
; CHECK-NEXT: movl $1, %eax
17+
; CHECK-NEXT: xchgl %eax, (%rdi)
18+
; CHECK-NEXT: lock addl $2, (%rdi)
19+
; CHECK-NEXT: xorl %eax, %eax
20+
; CHECK-NEXT: lock cmpxchgl %ecx, (%rdi)
21+
; CHECK-NEXT: retq
1322
%t0 = load atomic i32, ptr %a seq_cst, align 2
1423
store atomic i32 1, ptr %a seq_cst, align 2
1524
%t1 = atomicrmw xchg ptr %a, i32 1 seq_cst, align 2
@@ -20,10 +29,74 @@ define void @test_i32(ptr %a) nounwind {
2029

2130
define void @test_i128(ptr %a) nounwind {
2231
; CHECK-LABEL: test_i128:
23-
; CHECK: callq __atomic_load
24-
; CHECK: callq __atomic_store
25-
; CHECK: callq __atomic_exchange
26-
; CHECK: callq __atomic_compare_exchange
32+
; CHECK: # %bb.0:
33+
; CHECK-NEXT: pushq %r15
34+
; CHECK-NEXT: pushq %r14
35+
; CHECK-NEXT: pushq %rbx
36+
; CHECK-NEXT: subq $32, %rsp
37+
; CHECK-NEXT: movq %rdi, %rbx
38+
; CHECK-NEXT: movq %rsp, %r14
39+
; CHECK-NEXT: movl $16, %edi
40+
; CHECK-NEXT: movq %rbx, %rsi
41+
; CHECK-NEXT: movq %r14, %rdx
42+
; CHECK-NEXT: movl $5, %ecx
43+
; CHECK-NEXT: callq __atomic_load@PLT
44+
; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
45+
; CHECK-NEXT: movq $1, (%rsp)
46+
; CHECK-NEXT: movq %rsp, %rdx
47+
; CHECK-NEXT: movl $16, %edi
48+
; CHECK-NEXT: movq %rbx, %rsi
49+
; CHECK-NEXT: movl $5, %ecx
50+
; CHECK-NEXT: callq __atomic_store@PLT
51+
; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
52+
; CHECK-NEXT: movq $1, (%rsp)
53+
; CHECK-NEXT: movq %rsp, %rdx
54+
; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %r15
55+
; CHECK-NEXT: movl $16, %edi
56+
; CHECK-NEXT: movq %rbx, %rsi
57+
; CHECK-NEXT: movq %r15, %rcx
58+
; CHECK-NEXT: movl $5, %r8d
59+
; CHECK-NEXT: callq __atomic_exchange@PLT
60+
; CHECK-NEXT: movq (%rbx), %rdx
61+
; CHECK-NEXT: movq 8(%rbx), %rcx
62+
; CHECK-NEXT: .p2align 4
63+
; CHECK-NEXT: .LBB1_1: # %atomicrmw.start
64+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
65+
; CHECK-NEXT: movq %rdx, %rax
66+
; CHECK-NEXT: addq $2, %rax
67+
; CHECK-NEXT: movq %rdx, (%rsp)
68+
; CHECK-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
69+
; CHECK-NEXT: adcq $0, %rcx
70+
; CHECK-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
71+
; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
72+
; CHECK-NEXT: movl $16, %edi
73+
; CHECK-NEXT: movq %rbx, %rsi
74+
; CHECK-NEXT: movq %r14, %rdx
75+
; CHECK-NEXT: movq %r15, %rcx
76+
; CHECK-NEXT: movl $5, %r8d
77+
; CHECK-NEXT: movl $5, %r9d
78+
; CHECK-NEXT: callq __atomic_compare_exchange@PLT
79+
; CHECK-NEXT: movq (%rsp), %rdx
80+
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx
81+
; CHECK-NEXT: testb %al, %al
82+
; CHECK-NEXT: je .LBB1_1
83+
; CHECK-NEXT: # %bb.2: # %atomicrmw.end
84+
; CHECK-NEXT: xorps %xmm0, %xmm0
85+
; CHECK-NEXT: movaps %xmm0, (%rsp)
86+
; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
87+
; CHECK-NEXT: movq $1, {{[0-9]+}}(%rsp)
88+
; CHECK-NEXT: movq %rsp, %rdx
89+
; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
90+
; CHECK-NEXT: movl $16, %edi
91+
; CHECK-NEXT: movq %rbx, %rsi
92+
; CHECK-NEXT: movl $5, %r8d
93+
; CHECK-NEXT: movl $5, %r9d
94+
; CHECK-NEXT: callq __atomic_compare_exchange@PLT
95+
; CHECK-NEXT: addq $32, %rsp
96+
; CHECK-NEXT: popq %rbx
97+
; CHECK-NEXT: popq %r14
98+
; CHECK-NEXT: popq %r15
99+
; CHECK-NEXT: retq
27100
%t0 = load atomic i128, ptr %a seq_cst, align 8
28101
store atomic i128 1, ptr %a seq_cst, align 8
29102
%t1 = atomicrmw xchg ptr %a, i128 1 seq_cst, align 8

llvm/test/CodeGen/X86/atomic-unordered.ll

Lines changed: 30 additions & 137 deletions
Original file line numberDiff line numberDiff line change
@@ -64,30 +64,18 @@ define void @store_i16(ptr %ptr, i16 %v) {
6464
define i16 @load_i16_unaligned(ptr %ptr) {
6565
; CHECK-O0-LABEL: load_i16_unaligned:
6666
; CHECK-O0: # %bb.0:
67-
; CHECK-O0-NEXT: pushq %rax
68-
; CHECK-O0-NEXT: .cfi_def_cfa_offset 16
69-
; CHECK-O0-NEXT: movq %rdi, %rsi
70-
; CHECK-O0-NEXT: movl $2, %edi
71-
; CHECK-O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
72-
; CHECK-O0-NEXT: xorl %ecx, %ecx
73-
; CHECK-O0-NEXT: callq __atomic_load@PLT
74-
; CHECK-O0-NEXT: movw {{[0-9]+}}(%rsp), %ax
75-
; CHECK-O0-NEXT: popq %rcx
76-
; CHECK-O0-NEXT: .cfi_def_cfa_offset 8
67+
; CHECK-O0-NEXT: xorl %eax, %eax
68+
; CHECK-O0-NEXT: movw %ax, %cx
69+
; CHECK-O0-NEXT: movw %cx, %ax
70+
; CHECK-O0-NEXT: lock cmpxchgw %cx, (%rdi)
71+
; CHECK-O0-NEXT: sete %cl
7772
; CHECK-O0-NEXT: retq
7873
;
7974
; CHECK-O3-LABEL: load_i16_unaligned:
8075
; CHECK-O3: # %bb.0:
81-
; CHECK-O3-NEXT: pushq %rax
82-
; CHECK-O3-NEXT: .cfi_def_cfa_offset 16
83-
; CHECK-O3-NEXT: movq %rdi, %rsi
84-
; CHECK-O3-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
85-
; CHECK-O3-NEXT: movl $2, %edi
8676
; CHECK-O3-NEXT: xorl %ecx, %ecx
87-
; CHECK-O3-NEXT: callq __atomic_load@PLT
88-
; CHECK-O3-NEXT: movzwl {{[0-9]+}}(%rsp), %eax
89-
; CHECK-O3-NEXT: popq %rcx
90-
; CHECK-O3-NEXT: .cfi_def_cfa_offset 8
77+
; CHECK-O3-NEXT: xorl %eax, %eax
78+
; CHECK-O3-NEXT: lock cmpxchgw %cx, (%rdi)
9179
; CHECK-O3-NEXT: retq
9280
%v = load atomic i16, ptr %ptr unordered, align 1
9381
ret i16 %v
@@ -97,33 +85,13 @@ define i16 @load_i16_unaligned(ptr %ptr) {
9785
define void @store_i16_unaligned(ptr %ptr, i16 %v) {
9886
; CHECK-O0-LABEL: store_i16_unaligned:
9987
; CHECK-O0: # %bb.0:
100-
; CHECK-O0-NEXT: pushq %rax
101-
; CHECK-O0-NEXT: .cfi_def_cfa_offset 16
102-
; CHECK-O0-NEXT: movl %esi, %eax
103-
; CHECK-O0-NEXT: movq %rdi, %rsi
104-
; CHECK-O0-NEXT: # kill: def $ax killed $ax killed $eax
105-
; CHECK-O0-NEXT: movw %ax, {{[0-9]+}}(%rsp)
106-
; CHECK-O0-NEXT: movl $2, %edi
107-
; CHECK-O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
108-
; CHECK-O0-NEXT: xorl %ecx, %ecx
109-
; CHECK-O0-NEXT: callq __atomic_store@PLT
110-
; CHECK-O0-NEXT: popq %rax
111-
; CHECK-O0-NEXT: .cfi_def_cfa_offset 8
88+
; CHECK-O0-NEXT: movw %si, %ax
89+
; CHECK-O0-NEXT: xchgw %ax, (%rdi)
11290
; CHECK-O0-NEXT: retq
11391
;
11492
; CHECK-O3-LABEL: store_i16_unaligned:
11593
; CHECK-O3: # %bb.0:
116-
; CHECK-O3-NEXT: pushq %rax
117-
; CHECK-O3-NEXT: .cfi_def_cfa_offset 16
118-
; CHECK-O3-NEXT: movq %rdi, %rax
119-
; CHECK-O3-NEXT: movw %si, {{[0-9]+}}(%rsp)
120-
; CHECK-O3-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
121-
; CHECK-O3-NEXT: movl $2, %edi
122-
; CHECK-O3-NEXT: movq %rax, %rsi
123-
; CHECK-O3-NEXT: xorl %ecx, %ecx
124-
; CHECK-O3-NEXT: callq __atomic_store@PLT
125-
; CHECK-O3-NEXT: popq %rax
126-
; CHECK-O3-NEXT: .cfi_def_cfa_offset 8
94+
; CHECK-O3-NEXT: xchgw %si, (%rdi)
12795
; CHECK-O3-NEXT: retq
12896
store atomic i16 %v, ptr %ptr unordered, align 1
12997
ret void
@@ -150,65 +118,27 @@ define void @store_i32(ptr %ptr, i32 %v) {
150118
define i32 @load_i32_unaligned(ptr %ptr) {
151119
; CHECK-O0-LABEL: load_i32_unaligned:
152120
; CHECK-O0: # %bb.0:
153-
; CHECK-O0-NEXT: pushq %rax
154-
; CHECK-O0-NEXT: .cfi_def_cfa_offset 16
155-
; CHECK-O0-NEXT: movq %rdi, %rsi
156-
; CHECK-O0-NEXT: movl $4, %edi
157-
; CHECK-O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
158121
; CHECK-O0-NEXT: xorl %ecx, %ecx
159-
; CHECK-O0-NEXT: callq __atomic_load@PLT
160-
; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %eax
161-
; CHECK-O0-NEXT: popq %rcx
162-
; CHECK-O0-NEXT: .cfi_def_cfa_offset 8
122+
; CHECK-O0-NEXT: movl %ecx, %eax
123+
; CHECK-O0-NEXT: lock cmpxchgl %ecx, (%rdi)
124+
; CHECK-O0-NEXT: sete %cl
163125
; CHECK-O0-NEXT: retq
164126
;
165127
; CHECK-O3-LABEL: load_i32_unaligned:
166128
; CHECK-O3: # %bb.0:
167-
; CHECK-O3-NEXT: pushq %rax
168-
; CHECK-O3-NEXT: .cfi_def_cfa_offset 16
169-
; CHECK-O3-NEXT: movq %rdi, %rsi
170-
; CHECK-O3-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
171-
; CHECK-O3-NEXT: movl $4, %edi
172129
; CHECK-O3-NEXT: xorl %ecx, %ecx
173-
; CHECK-O3-NEXT: callq __atomic_load@PLT
174-
; CHECK-O3-NEXT: movl {{[0-9]+}}(%rsp), %eax
175-
; CHECK-O3-NEXT: popq %rcx
176-
; CHECK-O3-NEXT: .cfi_def_cfa_offset 8
130+
; CHECK-O3-NEXT: xorl %eax, %eax
131+
; CHECK-O3-NEXT: lock cmpxchgl %ecx, (%rdi)
177132
; CHECK-O3-NEXT: retq
178133
%v = load atomic i32, ptr %ptr unordered, align 1
179134
ret i32 %v
180135
}
181136

182137
define void @store_i32_unaligned(ptr %ptr, i32 %v) {
183-
; CHECK-O0-LABEL: store_i32_unaligned:
184-
; CHECK-O0: # %bb.0:
185-
; CHECK-O0-NEXT: pushq %rax
186-
; CHECK-O0-NEXT: .cfi_def_cfa_offset 16
187-
; CHECK-O0-NEXT: movl %esi, %eax
188-
; CHECK-O0-NEXT: movq %rdi, %rsi
189-
; CHECK-O0-NEXT: movl %eax, {{[0-9]+}}(%rsp)
190-
; CHECK-O0-NEXT: movl $4, %edi
191-
; CHECK-O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
192-
; CHECK-O0-NEXT: xorl %ecx, %ecx
193-
; CHECK-O0-NEXT: callq __atomic_store@PLT
194-
; CHECK-O0-NEXT: popq %rax
195-
; CHECK-O0-NEXT: .cfi_def_cfa_offset 8
196-
; CHECK-O0-NEXT: retq
197-
;
198-
; CHECK-O3-LABEL: store_i32_unaligned:
199-
; CHECK-O3: # %bb.0:
200-
; CHECK-O3-NEXT: pushq %rax
201-
; CHECK-O3-NEXT: .cfi_def_cfa_offset 16
202-
; CHECK-O3-NEXT: movq %rdi, %rax
203-
; CHECK-O3-NEXT: movl %esi, {{[0-9]+}}(%rsp)
204-
; CHECK-O3-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
205-
; CHECK-O3-NEXT: movl $4, %edi
206-
; CHECK-O3-NEXT: movq %rax, %rsi
207-
; CHECK-O3-NEXT: xorl %ecx, %ecx
208-
; CHECK-O3-NEXT: callq __atomic_store@PLT
209-
; CHECK-O3-NEXT: popq %rax
210-
; CHECK-O3-NEXT: .cfi_def_cfa_offset 8
211-
; CHECK-O3-NEXT: retq
138+
; CHECK-LABEL: store_i32_unaligned:
139+
; CHECK: # %bb.0:
140+
; CHECK-NEXT: xchgl %esi, (%rdi)
141+
; CHECK-NEXT: retq
212142
store atomic i32 %v, ptr %ptr unordered, align 1
213143
ret void
214144
}
@@ -234,65 +164,28 @@ define void @store_i64(ptr %ptr, i64 %v) {
234164
define i64 @load_i64_unaligned(ptr %ptr) {
235165
; CHECK-O0-LABEL: load_i64_unaligned:
236166
; CHECK-O0: # %bb.0:
237-
; CHECK-O0-NEXT: pushq %rax
238-
; CHECK-O0-NEXT: .cfi_def_cfa_offset 16
239-
; CHECK-O0-NEXT: movq %rdi, %rsi
240-
; CHECK-O0-NEXT: movl $8, %edi
241-
; CHECK-O0-NEXT: movq %rsp, %rdx
242-
; CHECK-O0-NEXT: xorl %ecx, %ecx
243-
; CHECK-O0-NEXT: callq __atomic_load@PLT
244-
; CHECK-O0-NEXT: movq (%rsp), %rax
245-
; CHECK-O0-NEXT: popq %rcx
246-
; CHECK-O0-NEXT: .cfi_def_cfa_offset 8
167+
; CHECK-O0-NEXT: xorl %eax, %eax
168+
; CHECK-O0-NEXT: movl %eax, %ecx
169+
; CHECK-O0-NEXT: movq %rcx, %rax
170+
; CHECK-O0-NEXT: lock cmpxchgq %rcx, (%rdi)
171+
; CHECK-O0-NEXT: sete %cl
247172
; CHECK-O0-NEXT: retq
248173
;
249174
; CHECK-O3-LABEL: load_i64_unaligned:
250175
; CHECK-O3: # %bb.0:
251-
; CHECK-O3-NEXT: pushq %rax
252-
; CHECK-O3-NEXT: .cfi_def_cfa_offset 16
253-
; CHECK-O3-NEXT: movq %rdi, %rsi
254-
; CHECK-O3-NEXT: movq %rsp, %rdx
255-
; CHECK-O3-NEXT: movl $8, %edi
256176
; CHECK-O3-NEXT: xorl %ecx, %ecx
257-
; CHECK-O3-NEXT: callq __atomic_load@PLT
258-
; CHECK-O3-NEXT: movq (%rsp), %rax
259-
; CHECK-O3-NEXT: popq %rcx
260-
; CHECK-O3-NEXT: .cfi_def_cfa_offset 8
177+
; CHECK-O3-NEXT: xorl %eax, %eax
178+
; CHECK-O3-NEXT: lock cmpxchgq %rcx, (%rdi)
261179
; CHECK-O3-NEXT: retq
262180
%v = load atomic i64, ptr %ptr unordered, align 1
263181
ret i64 %v
264182
}
265183

266184
define void @store_i64_unaligned(ptr %ptr, i64 %v) {
267-
; CHECK-O0-LABEL: store_i64_unaligned:
268-
; CHECK-O0: # %bb.0:
269-
; CHECK-O0-NEXT: pushq %rax
270-
; CHECK-O0-NEXT: .cfi_def_cfa_offset 16
271-
; CHECK-O0-NEXT: movq %rsi, %rax
272-
; CHECK-O0-NEXT: movq %rdi, %rsi
273-
; CHECK-O0-NEXT: movq %rax, (%rsp)
274-
; CHECK-O0-NEXT: movl $8, %edi
275-
; CHECK-O0-NEXT: movq %rsp, %rdx
276-
; CHECK-O0-NEXT: xorl %ecx, %ecx
277-
; CHECK-O0-NEXT: callq __atomic_store@PLT
278-
; CHECK-O0-NEXT: popq %rax
279-
; CHECK-O0-NEXT: .cfi_def_cfa_offset 8
280-
; CHECK-O0-NEXT: retq
281-
;
282-
; CHECK-O3-LABEL: store_i64_unaligned:
283-
; CHECK-O3: # %bb.0:
284-
; CHECK-O3-NEXT: pushq %rax
285-
; CHECK-O3-NEXT: .cfi_def_cfa_offset 16
286-
; CHECK-O3-NEXT: movq %rdi, %rax
287-
; CHECK-O3-NEXT: movq %rsi, (%rsp)
288-
; CHECK-O3-NEXT: movq %rsp, %rdx
289-
; CHECK-O3-NEXT: movl $8, %edi
290-
; CHECK-O3-NEXT: movq %rax, %rsi
291-
; CHECK-O3-NEXT: xorl %ecx, %ecx
292-
; CHECK-O3-NEXT: callq __atomic_store@PLT
293-
; CHECK-O3-NEXT: popq %rax
294-
; CHECK-O3-NEXT: .cfi_def_cfa_offset 8
295-
; CHECK-O3-NEXT: retq
185+
; CHECK-LABEL: store_i64_unaligned:
186+
; CHECK: # %bb.0:
187+
; CHECK-NEXT: xchgq %rsi, (%rdi)
188+
; CHECK-NEXT: retq
296189
store atomic i64 %v, ptr %ptr unordered, align 1
297190
ret void
298191
}

0 commit comments

Comments
 (0)