Skip to content

Atomic compare_exchange(_weak) functions produce overly complicated asm code on thumbv7(e)m-none-eabi(hf) targets #79418

Open
@qwerty19106

Description

This code perform atomic increment:

#![no_std]

use core::sync::atomic::*;

pub extern "C" fn atomic_usize_inc(ptr: &AtomicUsize) -> usize {
    let mut old = ptr.load(Ordering::Relaxed);

    loop {
        let new = old + 1;

        match ptr.compare_exchange_weak(old, new, Ordering::Relaxed, Ordering::Relaxed) {
            Ok(_old) => break _old,
            Err(_old) => old = _old,
        }
    }
}

Expected asm:

example::atomic_usize_inc:
1:
        ldr     r1, [r0]
        add    r3, r1, #1

        ldrex   r2, [r0]
        cmp     r2, r1
        itt       ne
        clrexne
        bne     1b
        strex   r1, r3, [r0]
        cbz    r1, 2f
        b       1b
2:
        movs    r0, r2

Produced asm godbolt:

example::atomic_usize_inc:
        ldr     r2, [r0]
.LBB0_1:
        ldrex   r1, [r0]
        cmp     r1, r2
        bne     .LBB0_4
        adds    r2, #1
        strex   r3, r2, [r0]
        cbnz    r3, .LBB0_5
        movs    r2, #1
        b       .LBB0_6
.LBB0_4:
        clrex
.LBB0_5:
        movs    r2, #0
.LBB0_6:
        cbnz    r2, .LBB0_25
        ldrex   r2, [r0]
        cmp     r2, r1
        bne     .LBB0_10
        adds    r1, #1
        strex   r3, r1, [r0]
        cbnz    r3, .LBB0_11
        movs    r1, #1
        b       .LBB0_12
.LBB0_10:
        clrex
.LBB0_11:
        movs    r1, #0
.LBB0_12:
        cbnz    r1, .LBB0_24
        ldrex   r1, [r0]
        cmp     r1, r2
        bne     .LBB0_16
        adds    r2, #1
        strex   r3, r2, [r0]
        cbnz    r3, .LBB0_17
        movs    r2, #1
        b       .LBB0_18
.LBB0_16:
        clrex
.LBB0_17:
        movs    r2, #0
.LBB0_18:
        cbnz    r2, .LBB0_25
        ldrex   r2, [r0]
        cmp     r2, r1
        bne     .LBB0_22
        adds    r1, #1
        strex   r3, r1, [r0]
        cbnz    r3, .LBB0_23
        movs    r1, #1
        cmp     r1, #0
        beq     .LBB0_1
        b       .LBB0_24
.LBB0_22:
        clrex
.LBB0_23:
        movs    r1, #0
        cmp     r1, #0
        beq     .LBB0_1
.LBB0_24:
        mov     r1, r2
.LBB0_25:
        mov     r0, r1
        bx      lr

Code size is very important on Cortex-M targets (thumbv*) because some controllers have only 20KB flash!

Besides the increment instruction (adds r2, #1) was moved into ldrex/strex section. This code lost compare_exchange advantage: evaluation before ldrex to reduce tick count when Exclusive Monitor is set.

This code is useless because we can call fetch_add. But other tasks can requires compare_exchange_weak, for example atomic increment with max condition (pseudocode):

pub extern "C" fn atomic_usize_inc_with_max(ptr: &AtomicUsize) -> (bool, usize) {
    atomic {
        let old = ptr.load(Ordering::Relaxed);
        if old == max {
            return (false, old);
        }

        ptr.store(old + 1, Ordering::Relaxed);
        return (true, old);
    }
}

Meta

rustc --version --verbose:

rustc 1.50.0-nightly (1c389ffef 2020-11-24)

Activity

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Metadata

Assignees

No one assigned

    Labels

    C-enhancementCategory: An issue proposing an enhancement or a PR with one.C-optimizationCategory: An issue highlighting optimization opportunities or PRs implementing suchI-heavyIssue: Problems and improvements with respect to binary size of generated code.O-ArmTarget: 32-bit Arm processors (armv6, armv7, thumb...), including 64-bit Arm in AArch32 stateT-compilerRelevant to the compiler team, which will review and decide on the PR/issue.WG-embeddedWorking group: Embedded systems

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions