Skip to content

[x86] expose cpuid, xgetbv, pushfd, popfd #166

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 120 additions & 0 deletions src/x86/cpuid.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
//! `cpuid` intrinsics

#[cfg(test)]
use stdsimd_test::assert_instr;

/// Result of the `cpuid` instruction.
#[derive(Copy, Clone, Eq, Ord, PartialEq, PartialOrd)]
#[cfg_attr(feature = "cargo-clippy", allow(stutter))]
pub struct CpuidResult {
/// EAX register.
pub eax: u32,
/// EBX register.
pub ebx: u32,
/// ECX register.
pub ecx: u32,
/// EDX register.
pub edx: u32,
}

/// `cpuid` instruction.
///
/// The [CPUID Wikipedia page][wiki_cpuid] contains how to query which
/// information using the `eax` and `ecx` registers, and the format in
/// which this information is returned in `eax...edx`.
///
/// The `has_cpuid()` intrinsics can be used to query whether the `cpuid`
/// instruction is available.
///
/// The definitive references are:
/// - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
/// Instruction Set Reference, A-Z][intel64_ref].
/// - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and
/// System Instructions][amd64_ref].
///
/// [wiki_cpuid]: https://en.wikipedia.org/wiki/CPUID
/// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
/// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
#[inline(always)]
#[cfg_attr(test, assert_instr(cpuid))]
#[cfg_attr(feature = "cargo-clippy", allow(stutter))]
pub unsafe fn __cpuid_count(eax: u32, ecx: u32) -> CpuidResult {
let mut r = ::std::mem::uninitialized::<CpuidResult>();
asm!("cpuid"
: "={eax}"(r.eax), "={ebx}"(r.ebx), "={ecx}"(r.ecx), "={edx}"(r.edx)
: "{eax}"(eax), "{ecx}"(ecx)
: :);
r
}

/// `cpuid` instruction.
///
/// See `__cpuid_count`.
#[inline(always)]
#[cfg_attr(test, assert_instr(cpuid))]
#[cfg_attr(feature = "cargo-clippy", allow(stutter))]
pub unsafe fn __cpuid(eax: u32) -> CpuidResult {
__cpuid_count(eax, 0)
}

/// Does the host support the `cpuid` instruction?
#[inline(always)]
#[cfg_attr(feature = "cargo-clippy", allow(stutter))]
pub fn has_cpuid() -> bool {
#[cfg(target_arch = "x86_64")]
{
true
}
#[cfg(target_arch = "x86")]
{
use super::ia32::{__readeflags, __writeeflags};

// On `x86` the `cpuid` instruction is not always available.
// This follows the approach indicated in:
// http://wiki.osdev.org/CPUID#Checking_CPUID_availability
unsafe {
// Read EFLAGS:
let eflags: u32 = __readeflags();

// Invert the ID bit in EFLAGS:
let eflags_mod: u32 = eflags | 0x0020_0000;

// Store the modified EFLAGS (ID bit may or may not be inverted)
__writeeflags(eflags_mod);

// Read EFLAGS again:
let eflags_after: u32 = __readeflags();

// Check if the ID bit changed:
eflags_after != eflags
}
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_always_has_cpuid() {
// all currently-tested targets have the instruction
// FIXME: add targets without `cpuid` to CI
assert!(has_cpuid());
}

#[cfg(target_arch = "x86")]
#[test]
fn test_has_cpuid() {
use vendor::__readeflags;
unsafe {
let before = __readeflags();

if has_cpuid() {
assert!(before != __readeflags());
} else {
assert!(before == __readeflags());
}
}
}

}
50 changes: 50 additions & 0 deletions src/x86/ia32.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
//! `i386/ia32` intrinsics

/// Reads EFLAGS.
#[cfg(target_arch = "x86")]
#[inline(always)]
pub unsafe fn __readeflags() -> u32 {
let eflags: u32;
asm!("pushfd; popl $0" : "=r"(eflags) : : : "volatile");
eflags
}

/// Reads EFLAGS.
#[cfg(target_arch = "x86_64")]
#[inline(always)]
pub unsafe fn __readeflags() -> u64 {
let eflags: u64;
asm!("pushfq; popq $0" : "=r"(eflags) : : : "volatile");
eflags
}

/// Write EFLAGS.
#[cfg(target_arch = "x86")]
#[inline(always)]
pub unsafe fn __writeeflags(eflags: u32) {
asm!("pushl $0; popfd" : : "r"(eflags) : "cc", "flags" : "volatile");
}

/// Write EFLAGS.
#[cfg(target_arch = "x86_64")]
#[inline(always)]
pub unsafe fn __writeeflags(eflags: u64) {
asm!("pushq $0; popfq" : : "r"(eflags) : "cc", "flags" : "volatile");
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_eflags() {
unsafe {
// reads eflags, writes them back, reads them again,
// and compare for equality:
let v = __readeflags();
__writeeflags(v);
let u = __readeflags();
assert_eq!(v, u);
}
}
}
8 changes: 8 additions & 0 deletions src/x86/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
//! `x86` and `x86_64` intrinsics.

pub use self::ia32::*;
pub use self::cpuid::*;
pub use self::xsave::*;

pub use self::sse::*;
pub use self::sse2::*;
pub use self::sse3::*;
Expand Down Expand Up @@ -28,6 +32,10 @@ mod macros;
#[macro_use]
mod runtime;

mod ia32;
mod cpuid;
mod xsave;

mod sse;
mod sse2;
mod sse3;
Expand Down
63 changes: 29 additions & 34 deletions src/x86/runtime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,31 +159,37 @@ fn test_bit(x: usize, bit: u32) -> bool {
/// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
/// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
fn detect_features() -> usize {
let extended_features_ebx;
let proc_info_ecx;
let proc_info_edx;
use super::cpuid::{__cpuid, has_cpuid, CpuidResult};
let mut value: usize = 0;

unsafe {
/// To obtain all feature flags we need two CPUID queries:
// If the x86 CPU does not support the CPUID instruction then it is too
// old to support any of the currently-detectable features.
if !has_cpuid() {
return value;
}

/// 1. EAX=1, ECX=0: Queries "Processor Info and Feature Bits"
/// This gives us most of the CPU features in ECX and EDX (see
/// below).
asm!("cpuid"
: "={ecx}"(proc_info_ecx), "={edx}"(proc_info_edx)
: "{eax}"(0x0000_0001_u32), "{ecx}"(0 as u32)
: :);
// Calling `cpuid` from here on is safe because the CPU has the `cpuid`
// instruction.

/// 2. EAX=7, ECX=0: Queries "Extended Features"
/// This gives us information about bmi,bmi2, and avx2 support
/// (see below); the result in ECX is not currently needed.
asm!("cpuid"
: "={ebx}"(extended_features_ebx)
: "{eax}"(0x0000_0007_u32), "{ecx}"(0 as u32)
: :);
}
// 1. EAX=1, ECX=0: Queries "Processor Info and Feature Bits";
// Contains information about most x86 features.
let CpuidResult {
ecx: proc_info_ecx,
edx: proc_info_edx,
..
} = unsafe { __cpuid(0x0000_0001_u32) };

let mut value: usize = 0;
// 2. EAX=7, ECX=0: Queries "Extended Features";
// Contains information about bmi,bmi2, and avx2 support.
let CpuidResult {
ebx: extended_features_ebx,
..
} = unsafe { __cpuid(0x0000_0007_u32) };

let proc_info_ecx = proc_info_ecx as usize;
let proc_info_edx = proc_info_edx as usize;

let extended_features_ebx = extended_features_ebx as usize;

if test_bit(extended_features_ebx, 3) {
value = set_bit(value, __Feature::bmi as u32);
Expand Down Expand Up @@ -233,21 +239,10 @@ fn detect_features() -> usize {
// org/mozilla-central/file/64bab5cbb9b6/mozglue/build/SSE.cpp#l190
//
if test_bit(proc_info_ecx, 26) && test_bit(proc_info_ecx, 27) {
/// XGETBV: reads the contents of the extended control
/// register (XCR).
unsafe fn xgetbv(xcr_no: u32) -> u64 {
let eax: u32;
let edx: u32;
// xgetbv
asm!("xgetbv"
: "={eax}"(eax), "={edx}"(edx)
: "{ecx}"(xcr_no)
: :);
((edx as u64) << 32) | (eax as u64)
}
use super::xsave::_xgetbv;

// This is safe because on x86 `xgetbv` is always available.
if unsafe { xgetbv(0) } & 6 == 6 {
if unsafe { _xgetbv(0) } & 6 == 6 {
if test_bit(proc_info_ecx, 28) {
value = set_bit(value, __Feature::avx as u32);
}
Expand Down
8 changes: 6 additions & 2 deletions src/x86/sse2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1792,7 +1792,9 @@ pub unsafe fn _mm_cvtsd_si64(a: f64x2) -> i64 {
#[inline(always)]
#[target_feature = "+sse2"]
#[cfg_attr(test, assert_instr(cvtsd2si))]
pub unsafe fn _mm_cvtsd_si64x(a: f64x2) -> i64 { _mm_cvtsd_si64(a) }
pub unsafe fn _mm_cvtsd_si64x(a: f64x2) -> i64 {
_mm_cvtsd_si64(a)
}

/// Convert the lower double-precision (64-bit) floating-point element in `b`
/// to a single-precision (32-bit) floating-point element, store the result in
Expand Down Expand Up @@ -1857,7 +1859,9 @@ pub unsafe fn _mm_cvttsd_si64(a: f64x2) -> i64 {
#[inline(always)]
#[target_feature = "+sse2"]
#[cfg_attr(test, assert_instr(cvttsd2si))]
pub unsafe fn _mm_cvttsd_si64x(a: f64x2) -> i64 { _mm_cvttsd_si64(a) }
pub unsafe fn _mm_cvttsd_si64x(a: f64x2) -> i64 {
_mm_cvttsd_si64(a)
}

/// Convert packed single-precision (32-bit) floating-point elements in `a` to
/// packed 32-bit integers with truncation.
Expand Down
22 changes: 22 additions & 0 deletions src/x86/xsave.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
//! `xsave` target feature intrinsics

#[cfg(test)]
use stdsimd_test::assert_instr;

/// Reads the contents of the extended control register `XCR`
/// specified in `xcr_no`.
#[inline(always)]
// #[target_feature = "+xsave"] // FIXME: see
// https://github.com/rust-lang-nursery/stdsimd/issues/167
#[cfg_attr(test, assert_instr(xgetbv))]
pub unsafe fn _xgetbv(xcr_no: u32) -> u64 {
let eax: u32;
let edx: u32;

asm!("xgetbv"
: "={eax}"(eax), "={edx}"(edx)
: "{ecx}"(xcr_no)
: :);

((edx as u64) << 32) | (eax as u64)
}