Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

x86_64: Add portable_atomic_vmovdqa_atomic cfg #59

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/.cspell/project-dictionary.txt
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ reentrancy
rsbegin
rsend
rsil
sandybridge
sbcs
sched
selgr
Expand Down
11 changes: 11 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,12 @@ jobs:
RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -C target-feature=+cmpxchg16b --cfg portable_atomic_no_outline_atomics
RUSTFLAGS: ${{ env.RUSTFLAGS }} -C target-feature=+cmpxchg16b --cfg portable_atomic_no_outline_atomics
if: matrix.target == '' && !contains(matrix.rust, 'i686') || startsWith(matrix.target, 'x86_64')
# Sandy Bridge (the first Intel chip that introduced AVX) with portable_atomic_vmovdqa_atomic cfg
- run: tools/test.sh -vv $TARGET $DOCTEST_XCOMPILE $BUILD_STD $REPORT_TIME
env:
RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -C target-cpu=sandybridge -C target-feature=+cmpxchg16b --cfg portable_atomic_vmovdqa_atomic
RUSTFLAGS: ${{ env.RUSTFLAGS }} -C target-cpu=sandybridge -C target-feature=+cmpxchg16b --cfg portable_atomic_vmovdqa_atomic
if: matrix.target == '' && !contains(matrix.rust, 'i686') || startsWith(matrix.target, 'x86_64')
# aarch64 +lse
# As of QEMU 8.0, QEMU has not yet implemented FEAT_LSE2: https://linaro.atlassian.net/browse/QEMU-300
# FEAT_LSE2 is tested on Cirrus CI's aarch64 macOS VM.
Expand Down Expand Up @@ -510,6 +516,11 @@ jobs:
# vmovdqa load/store path has been tested above, disable outline-atomics and test cmpxchg16b load/store path.
RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -C target-feature=+cmpxchg16b --cfg portable_atomic_no_outline_atomics
RUSTFLAGS: ${{ env.RUSTFLAGS }} -C target-feature=+cmpxchg16b --cfg portable_atomic_no_outline_atomics
# Sandy Bridge (the first Intel chip that introduced AVX) with portable_atomic_vmovdqa_atomic cfg
- run: tools/test.sh -vv 2>&1 | ts -i '%.s '
env:
RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -C target-cpu=sandybridge -C target-feature=+cmpxchg16b --cfg portable_atomic_vmovdqa_atomic
RUSTFLAGS: ${{ env.RUSTFLAGS }} -C target-cpu=sandybridge -C target-feature=+cmpxchg16b --cfg portable_atomic_vmovdqa_atomic

codegen:
needs: tidy
Expand Down
9 changes: 8 additions & 1 deletion src/imp/atomic128/detect/x86_64.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
// Adapted from https://github.com/rust-lang/stdarch.

#![cfg_attr(any(not(target_feature = "sse"), portable_atomic_sanitize_thread), allow(dead_code))]
#![cfg_attr(
any(
not(target_feature = "sse"),
portable_atomic_vmovdqa_atomic,
portable_atomic_sanitize_thread,
),
allow(dead_code)
)]

// Miri doesn't support inline assembly used in __cpuid: https://github.com/rust-lang/miri/issues/932
// SGX doesn't support CPUID: https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/core_arch/src/x86/cpuid.rs#L102-L105
Expand Down
168 changes: 114 additions & 54 deletions src/imp/atomic128/x86_64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
// - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit
//
// Generated asm:
// - x86_64 (+cmpxchg16b) https://godbolt.org/z/WPvfn16sY
// - x86_64 (+cmpxchg16b) https://godbolt.org/z/f9rT3eEs8
// - x86_64 (+cmpxchg16b,+avx,vmovdqa_atomic) https://godbolt.org/z/feWx41Moa

include!("macros.rs");

Expand Down Expand Up @@ -37,12 +38,18 @@ macro_rules! debug_assert_cmpxchg16b {
}
};
}
#[cfg(not(any(portable_atomic_no_outline_atomics, target_env = "sgx")))]
#[cfg(any(
not(any(portable_atomic_no_outline_atomics, target_env = "sgx")),
all(portable_atomic_vmovdqa_atomic, target_feature = "avx"),
))]
#[cfg(target_feature = "sse")]
macro_rules! debug_assert_vmovdqa_atomic {
() => {{
debug_assert_cmpxchg16b!();
debug_assert!(detect::detect().has_vmovdqa_atomic());
#[cfg(not(all(portable_atomic_vmovdqa_atomic, target_feature = "avx")))]
{
debug_assert!(detect::detect().has_vmovdqa_atomic());
}
}};
}

Expand Down Expand Up @@ -140,7 +147,10 @@ unsafe fn cmpxchg16b(dst: *mut u128, old: u128, new: u128) -> (u128, bool) {
//
// Do not use vector registers on targets such as x86_64-unknown-none unless SSE is explicitly enabled.
// https://doc.rust-lang.org/nightly/rustc/platform-support/x86_64-unknown-none.html
#[cfg(not(any(portable_atomic_no_outline_atomics, target_env = "sgx")))]
#[cfg(any(
not(any(portable_atomic_no_outline_atomics, target_env = "sgx")),
all(portable_atomic_vmovdqa_atomic, target_feature = "avx"),
))]
#[cfg(target_feature = "sse")]
#[target_feature(enable = "avx")]
#[inline]
Expand All @@ -162,7 +172,10 @@ unsafe fn atomic_load_vmovdqa(src: *mut u128) -> u128 {
core::mem::transmute(out)
}
}
#[cfg(not(any(portable_atomic_no_outline_atomics, target_env = "sgx")))]
#[cfg(any(
not(any(portable_atomic_no_outline_atomics, target_env = "sgx")),
all(portable_atomic_vmovdqa_atomic, target_feature = "avx"),
))]
#[cfg(target_feature = "sse")]
#[target_feature(enable = "avx")]
#[inline]
Expand Down Expand Up @@ -199,8 +212,15 @@ unsafe fn atomic_store_vmovdqa(dst: *mut u128, val: u128, order: Ordering) {

#[cfg(not(all(
any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"),
any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")),
any(
all(
any(portable_atomic_no_outline_atomics, target_env = "sgx"),
not(all(portable_atomic_vmovdqa_atomic, target_feature = "avx")),
),
not(target_feature = "sse"),
),
)))]
#[cfg(not(all(portable_atomic_vmovdqa_atomic, target_feature = "avx")))]
macro_rules! load_store_detect {
(
vmovdqa = $vmovdqa:ident
Expand Down Expand Up @@ -250,36 +270,56 @@ unsafe fn atomic_load(src: *mut u128, _order: Ordering) -> u128 {
// SGX doesn't support CPUID.
#[cfg(all(
any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"),
any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")),
any(
all(
any(portable_atomic_no_outline_atomics, target_env = "sgx"),
not(all(portable_atomic_vmovdqa_atomic, target_feature = "avx")),
),
not(target_feature = "sse"),
),
))]
// SAFETY: the caller must uphold the safety contract.
// cfg guarantees that CMPXCHG16B is available at compile-time.
unsafe {
// cmpxchg16b is always SeqCst.
atomic_load_cmpxchg16b(src)
_atomic_load_cmpxchg16b(src)
}
#[cfg(not(all(
any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"),
any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")),
any(
all(
any(portable_atomic_no_outline_atomics, target_env = "sgx"),
not(all(portable_atomic_vmovdqa_atomic, target_feature = "avx")),
),
not(target_feature = "sse"),
),
)))]
// SAFETY: the caller must uphold the safety contract.
unsafe {
ifunc!(unsafe fn(src: *mut u128) -> u128 {
load_store_detect! {
vmovdqa = atomic_load_vmovdqa
cmpxchg16b = atomic_load_cmpxchg16b
// Use SeqCst because cmpxchg16b and atomic load by vmovdqa is always SeqCst.
fallback = atomic_load_seqcst
}
})
{
#[cfg(all(portable_atomic_vmovdqa_atomic, target_feature = "avx"))]
// SAFETY: the caller must uphold the safety contract.
unsafe {
atomic_load_vmovdqa(src)
}
#[cfg(not(all(portable_atomic_vmovdqa_atomic, target_feature = "avx")))]
// SAFETY: the caller must uphold the safety contract.
unsafe {
ifunc!(unsafe fn(src: *mut u128) -> u128 {
load_store_detect! {
vmovdqa = atomic_load_vmovdqa
cmpxchg16b = _atomic_load_cmpxchg16b
// Use SeqCst because cmpxchg16b and atomic load by vmovdqa is always SeqCst.
fallback = atomic_load_seqcst
}
})
}
}
}
#[cfg_attr(
not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")),
target_feature(enable = "cmpxchg16b")
)]
#[inline]
unsafe fn atomic_load_cmpxchg16b(src: *mut u128) -> u128 {
unsafe fn _atomic_load_cmpxchg16b(src: *mut u128) -> u128 {
debug_assert!(src as usize % 16 == 0);
debug_assert_cmpxchg16b!();

Expand Down Expand Up @@ -328,60 +368,80 @@ unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) {
// SGX doesn't support CPUID.
#[cfg(all(
any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"),
any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")),
any(
all(
any(portable_atomic_no_outline_atomics, target_env = "sgx"),
not(all(portable_atomic_vmovdqa_atomic, target_feature = "avx")),
),
not(target_feature = "sse"),
),
))]
// SAFETY: the caller must uphold the safety contract.
// cfg guarantees that CMPXCHG16B is available at compile-time.
unsafe {
// cmpxchg16b is always SeqCst.
let _ = order;
atomic_store_cmpxchg16b(dst, val);
_atomic_store_cmpxchg16b(dst, val);
}
#[cfg(not(all(
any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"),
any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")),
any(
all(
any(portable_atomic_no_outline_atomics, target_env = "sgx"),
not(all(portable_atomic_vmovdqa_atomic, target_feature = "avx")),
),
not(target_feature = "sse"),
),
)))]
// SAFETY: the caller must uphold the safety contract.
unsafe {
#[cfg(target_feature = "sse")]
fn_alias! {
#[target_feature(enable = "avx")]
unsafe fn(dst: *mut u128, val: u128);
// atomic store by vmovdqa has at least release semantics.
atomic_store_vmovdqa_non_seqcst = atomic_store_vmovdqa(Ordering::Release);
atomic_store_vmovdqa_seqcst = atomic_store_vmovdqa(Ordering::SeqCst);
{
#[cfg(all(portable_atomic_vmovdqa_atomic, target_feature = "avx"))]
// SAFETY: the caller must uphold the safety contract.
unsafe {
atomic_store_vmovdqa(dst, val, order);
}
match order {
// Relaxed and Release stores are equivalent in all implementations
// that may be called here (vmovdqa, asm-based cmpxchg16b, and fallback).
// core::arch's cmpxchg16b will never called here.
Ordering::Relaxed | Ordering::Release => {
ifunc!(unsafe fn(dst: *mut u128, val: u128) {
load_store_detect! {
vmovdqa = atomic_store_vmovdqa_non_seqcst
cmpxchg16b = atomic_store_cmpxchg16b
fallback = atomic_store_non_seqcst
}
});
#[cfg(not(all(portable_atomic_vmovdqa_atomic, target_feature = "avx")))]
// SAFETY: the caller must uphold the safety contract.
unsafe {
#[cfg(target_feature = "sse")]
fn_alias! {
#[target_feature(enable = "avx")]
unsafe fn(dst: *mut u128, val: u128);
// atomic store by vmovdqa has at least release semantics.
atomic_store_vmovdqa_non_seqcst = atomic_store_vmovdqa(Ordering::Release);
atomic_store_vmovdqa_seqcst = atomic_store_vmovdqa(Ordering::SeqCst);
}
Ordering::SeqCst => {
ifunc!(unsafe fn(dst: *mut u128, val: u128) {
load_store_detect! {
vmovdqa = atomic_store_vmovdqa_seqcst
cmpxchg16b = atomic_store_cmpxchg16b
fallback = atomic_store_seqcst
}
});
match order {
// Relaxed and Release stores are equivalent in all implementations
// that may be called here (vmovdqa, asm-based cmpxchg16b, and fallback).
// core::arch's cmpxchg16b will never called here.
Ordering::Relaxed | Ordering::Release => {
ifunc!(unsafe fn(dst: *mut u128, val: u128) {
load_store_detect! {
vmovdqa = atomic_store_vmovdqa_non_seqcst
cmpxchg16b = _atomic_store_cmpxchg16b
fallback = atomic_store_non_seqcst
}
});
}
Ordering::SeqCst => {
ifunc!(unsafe fn(dst: *mut u128, val: u128) {
load_store_detect! {
vmovdqa = atomic_store_vmovdqa_seqcst
cmpxchg16b = _atomic_store_cmpxchg16b
fallback = atomic_store_seqcst
}
});
}
_ => unreachable!("{:?}", order),
}
_ => unreachable!("{:?}", order),
}
}
}
#[cfg_attr(
not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")),
target_feature(enable = "cmpxchg16b")
)]
unsafe fn atomic_store_cmpxchg16b(dst: *mut u128, val: u128) {
unsafe fn _atomic_store_cmpxchg16b(dst: *mut u128, val: u128) {
// SAFETY: the caller must uphold the safety contract.
unsafe {
// cmpxchg16b is always SeqCst.
Expand Down
4 changes: 4 additions & 0 deletions tools/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ known_cfgs=(
portable_atomic_disable_fiq
portable_atomic_no_outline_atomics
portable_atomic_outline_atomics
portable_atomic_vmovdqa_atomic

# Not public APIs
portable_atomic_test_outline_atomics_detect_false
Expand Down Expand Up @@ -567,6 +568,9 @@ build() {
x_cargo "${args[@]}" "$@"
;;
esac
# Sandy Bridge (the first Intel chip that introduced AVX) with portable_atomic_vmovdqa_atomic cfg
RUSTFLAGS="${target_rustflags} -C target-cpu=sandybridge -C target-feature=+cmpxchg16b --cfg portable_atomic_vmovdqa_atomic" \
x_cargo "${args[@]}" --target-dir target/vmovdqa_atomic "$@"
;;
aarch64* | arm64*)
# macOS is skipped because it is +lse,+lse2 by default
Expand Down