llvm · AZero13 · May 30, 2025 · May 31, 2025 · Jun 3, 2025
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -232,8 +232,22 @@ template <typename Inst>
 static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
   unsigned Size = getAtomicOpSize(I);
   Align Alignment = I->getAlign();
-  return Alignment >= Size &&
-         Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8;
+
+  // X86 we can do unaligned loads
+  return Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8 &&
+         (Alignment >= Size || TLI->supportsUnalignedAtomics());
+}
+
+template <typename Inst>
+static bool canLowerAtomicAsUnaligned(const TargetLowering *TLI, Inst *I) {
+  if (!TLI->supportsUnalignedAtomics())
+    return false;
+  unsigned Size = getAtomicOpSize(I);
+  Align Alignment = I->getAlign();
+
+  // X86 we can do unaligned loads
+  return Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8 &&
+         (Alignment < Size);
 }
 
 bool AtomicExpandImpl::processAtomicInstr(Instruction *I) {
@@ -510,6 +524,10 @@ AtomicExpandImpl::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
 }
 
 bool AtomicExpandImpl::tryExpandAtomicLoad(LoadInst *LI) {
+
+  if (canLowerAtomicAsUnaligned(TLI, LI))
+    return expandAtomicLoadToCmpXchg(LI);
+
   switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
   case TargetLoweringBase::AtomicExpansionKind::None:
     return false;
@@ -532,6 +550,11 @@ bool AtomicExpandImpl::tryExpandAtomicLoad(LoadInst *LI) {
 }
 
 bool AtomicExpandImpl::tryExpandAtomicStore(StoreInst *SI) {
+  if (canLowerAtomicAsUnaligned(TLI, SI)) {
+    expandAtomicStore(SI);
+    return true;
+  }
+
   switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
   case TargetLoweringBase::AtomicExpansionKind::None:
     return false;

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -137,6 +137,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
 
   // Set up the TargetLowering object.
 
+  // X86 supports unaligned atomic memory accesses via cmpxchg
+  setSupportsUnalignedAtomics(true);
+
   // X86 is weird. It always uses i8 for shift amounts and setcc results.
   setBooleanContents(ZeroOrOneBooleanContent);
   // X86-SSE is even stranger. It uses -1 or 0 for vector masks.

diff --git a/llvm/test/CodeGen/X86/atomic-unaligned.ll b/llvm/test/CodeGen/X86/atomic-unaligned.ll
@@ -1,15 +1,24 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=x86_64 < %s | FileCheck %s
 
 ; Quick test to ensure that atomics which are not naturally-aligned
 ; emit unsized libcalls, and aren't emitted as native instructions or
 ; sized libcalls.
 define void @test_i32(ptr %a) nounwind {
 ; CHECK-LABEL: test_i32:
-; CHECK: callq __atomic_load
-; CHECK: callq __atomic_store
-; CHECK: callq __atomic_exchange
-; CHECK: callq __atomic_compare_exchange
-; CHECK: callq __atomic_compare_exchange
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; CHECK-NEXT:    movl $1, %ecx
+; CHECK-NEXT:    movl $1, %eax
+; CHECK-NEXT:    xchgl %eax, (%rdi)
+; CHECK-NEXT:    movl $1, %eax
+; CHECK-NEXT:    xchgl %eax, (%rdi)
+; CHECK-NEXT:    lock addl $2, (%rdi)
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    lock cmpxchgl %ecx, (%rdi)
+; CHECK-NEXT:    retq
   %t0 = load atomic i32, ptr %a seq_cst, align 2
   store atomic i32 1, ptr %a seq_cst, align 2
   %t1 = atomicrmw xchg ptr %a, i32 1 seq_cst, align 2
@@ -20,10 +29,74 @@ define void @test_i32(ptr %a) nounwind {
 
 define void @test_i128(ptr %a) nounwind {
 ; CHECK-LABEL: test_i128:
-; CHECK: callq __atomic_load
-; CHECK: callq __atomic_store
-; CHECK: callq __atomic_exchange
-; CHECK: callq __atomic_compare_exchange
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %r15
+; CHECK-NEXT:    pushq %r14
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    subq $32, %rsp
+; CHECK-NEXT:    movq %rdi, %rbx
+; CHECK-NEXT:    movq %rsp, %r14
+; CHECK-NEXT:    movl $16, %edi
+; CHECK-NEXT:    movq %rbx, %rsi
+; CHECK-NEXT:    movq %r14, %rdx
+; CHECK-NEXT:    movl $5, %ecx
+; CHECK-NEXT:    callq __atomic_load@PLT
+; CHECK-NEXT:    movq $0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movq $1, (%rsp)
+; CHECK-NEXT:    movq %rsp, %rdx
+; CHECK-NEXT:    movl $16, %edi
+; CHECK-NEXT:    movq %rbx, %rsi
+; CHECK-NEXT:    movl $5, %ecx
+; CHECK-NEXT:    callq __atomic_store@PLT
+; CHECK-NEXT:    movq $0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movq $1, (%rsp)
+; CHECK-NEXT:    movq %rsp, %rdx
+; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %r15
+; CHECK-NEXT:    movl $16, %edi
+; CHECK-NEXT:    movq %rbx, %rsi
+; CHECK-NEXT:    movq %r15, %rcx
+; CHECK-NEXT:    movl $5, %r8d
+; CHECK-NEXT:    callq __atomic_exchange@PLT
+; CHECK-NEXT:    movq (%rbx), %rdx
+; CHECK-NEXT:    movq 8(%rbx), %rcx
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  .LBB1_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq %rdx, %rax
+; CHECK-NEXT:    addq $2, %rax
+; CHECK-NEXT:    movq %rdx, (%rsp)
+; CHECK-NEXT:    movq %rcx, {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    adcq $0, %rcx
+; CHECK-NEXT:    movq %rcx, {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movl $16, %edi
+; CHECK-NEXT:    movq %rbx, %rsi
+; CHECK-NEXT:    movq %r14, %rdx
+; CHECK-NEXT:    movq %r15, %rcx
+; CHECK-NEXT:    movl $5, %r8d
+; CHECK-NEXT:    movl $5, %r9d
+; CHECK-NEXT:    callq __atomic_compare_exchange@PLT
+; CHECK-NEXT:    movq (%rsp), %rdx
+; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
+; CHECK-NEXT:    testb %al, %al
+; CHECK-NEXT:    je .LBB1_1
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
+; CHECK-NEXT:    xorps %xmm0, %xmm0
+; CHECK-NEXT:    movaps %xmm0, (%rsp)
+; CHECK-NEXT:    movq $0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movq $1, {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movq %rsp, %rdx
+; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; CHECK-NEXT:    movl $16, %edi
+; CHECK-NEXT:    movq %rbx, %rsi
+; CHECK-NEXT:    movl $5, %r8d
+; CHECK-NEXT:    movl $5, %r9d
+; CHECK-NEXT:    callq __atomic_compare_exchange@PLT
+; CHECK-NEXT:    addq $32, %rsp
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    popq %r14
+; CHECK-NEXT:    popq %r15
+; CHECK-NEXT:    retq
   %t0 = load atomic i128, ptr %a seq_cst, align 8
   store atomic i128 1, ptr %a seq_cst, align 8
   %t1 = atomicrmw xchg ptr %a, i128 1 seq_cst, align 8