rust-lang · gnzlbg · Nov 2, 2017 · Nov 4, 2017
diff --git a/src/x86/cpuid.rs b/src/x86/cpuid.rs
@@ -0,0 +1,120 @@
+//! `cpuid` intrinsics
+
+#[cfg(test)]
+use stdsimd_test::assert_instr;
+
+/// Result of the `cpuid` instruction.
+#[derive(Copy, Clone, Eq, Ord, PartialEq, PartialOrd)]
+#[cfg_attr(feature = "cargo-clippy", allow(stutter))]
+pub struct CpuidResult {
+    /// EAX register.
+    pub eax: u32,
+    /// EBX register.
+    pub ebx: u32,
+    /// ECX register.
+    pub ecx: u32,
+    /// EDX register.
+    pub edx: u32,
+}
+
+/// `cpuid` instruction.
+///
+/// The [CPUID Wikipedia page][wiki_cpuid] contains how to query which
+/// information using the `eax` and `ecx` registers, and the format in
+/// which this information is returned in `eax...edx`.
+///
+/// The `has_cpuid()` intrinsics can be used to query whether the `cpuid`
+/// instruction is available.
+///
+/// The definitive references are:
+/// - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
+///   Instruction Set Reference, A-Z][intel64_ref].
+/// - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and
+///   System Instructions][amd64_ref].
+///
+/// [wiki_cpuid]: https://en.wikipedia.org/wiki/CPUID
+/// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+/// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cpuid))]
+#[cfg_attr(feature = "cargo-clippy", allow(stutter))]
+pub unsafe fn __cpuid_count(eax: u32, ecx: u32) -> CpuidResult {
+    let mut r = ::std::mem::uninitialized::<CpuidResult>();
+    asm!("cpuid"
+         : "={eax}"(r.eax), "={ebx}"(r.ebx), "={ecx}"(r.ecx), "={edx}"(r.edx)
+         : "{eax}"(eax), "{ecx}"(ecx)
+         : :);
+    r
+}
+
+/// `cpuid` instruction.
+///
+/// See `__cpuid_count`.
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cpuid))]
+#[cfg_attr(feature = "cargo-clippy", allow(stutter))]
+pub unsafe fn __cpuid(eax: u32) -> CpuidResult {
+    __cpuid_count(eax, 0)
+}
+
+/// Does the host support the `cpuid` instruction?
+#[inline(always)]
+#[cfg_attr(feature = "cargo-clippy", allow(stutter))]
+pub fn has_cpuid() -> bool {
+    #[cfg(target_arch = "x86_64")]
+    {
+        true
+    }
+    #[cfg(target_arch = "x86")]
+    {
+        use super::ia32::{__readeflags, __writeeflags};
+
+        // On `x86` the `cpuid` instruction is not always available.
+        // This follows the approach indicated in:
+        // http://wiki.osdev.org/CPUID#Checking_CPUID_availability
+        unsafe {
+            // Read EFLAGS:
+            let eflags: u32 = __readeflags();
+
+            // Invert the ID bit in EFLAGS:
+            let eflags_mod: u32 = eflags | 0x0020_0000;
+
+            // Store the modified EFLAGS (ID bit may or may not be inverted)
+            __writeeflags(eflags_mod);
+
+            // Read EFLAGS again:
+            let eflags_after: u32 = __readeflags();
+
+            // Check if the ID bit changed:
+            eflags_after != eflags
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_always_has_cpuid() {
+        // all currently-tested targets have the instruction
+        // FIXME: add targets without `cpuid` to CI
+        assert!(has_cpuid());
+    }
+
+    #[cfg(target_arch = "x86")]
+    #[test]
+    fn test_has_cpuid() {
+        use vendor::__readeflags;
+        unsafe {
+            let before = __readeflags();
+
+            if has_cpuid() {
+                assert!(before != __readeflags());
+            } else {
+                assert!(before == __readeflags());
+            }
+        }
+    }
+
+}
diff --git a/src/x86/ia32.rs b/src/x86/ia32.rs
@@ -0,0 +1,50 @@
+//! `i386/ia32` intrinsics
+
+/// Reads EFLAGS.
+#[cfg(target_arch = "x86")]
+#[inline(always)]
+pub unsafe fn __readeflags() -> u32 {
+    let eflags: u32;
+    asm!("pushfd; popl $0" : "=r"(eflags) : : : "volatile");
+    eflags
+}
+
+/// Reads EFLAGS.
+#[cfg(target_arch = "x86_64")]
+#[inline(always)]
+pub unsafe fn __readeflags() -> u64 {
+    let eflags: u64;
+    asm!("pushfq; popq $0" : "=r"(eflags) : : : "volatile");
+    eflags
+}
+
+/// Write EFLAGS.
+#[cfg(target_arch = "x86")]
+#[inline(always)]
+pub unsafe fn __writeeflags(eflags: u32) {
+    asm!("pushl $0; popfd" : : "r"(eflags) : "cc", "flags" : "volatile");
+}
+
+/// Write EFLAGS.
+#[cfg(target_arch = "x86_64")]
+#[inline(always)]
+pub unsafe fn __writeeflags(eflags: u64) {
+    asm!("pushq $0; popfq" : : "r"(eflags) : "cc", "flags" : "volatile");
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_eflags() {
+        unsafe {
+            // reads eflags, writes them back, reads them again,
+            // and compare for equality:
+            let v = __readeflags();
+            __writeeflags(v);
+            let u = __readeflags();
+            assert_eq!(v, u);
+        }
+    }
+}
diff --git a/src/x86/mod.rs b/src/x86/mod.rs
@@ -1,5 +1,9 @@
 //! `x86` and `x86_64` intrinsics.
 
+pub use self::ia32::*;
+pub use self::cpuid::*;
+pub use self::xsave::*;
+
 pub use self::sse::*;
 pub use self::sse2::*;
 pub use self::sse3::*;
@@ -28,6 +32,10 @@ mod macros;
 #[macro_use]
 mod runtime;
 
+mod ia32;
+mod cpuid;
+mod xsave;
+
 mod sse;
 mod sse2;
 mod sse3;

diff --git a/src/x86/runtime.rs b/src/x86/runtime.rs
@@ -159,31 +159,37 @@ fn test_bit(x: usize, bit: u32) -> bool {
 /// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
 /// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
 fn detect_features() -> usize {
-    let extended_features_ebx;
-    let proc_info_ecx;
-    let proc_info_edx;
+    use super::cpuid::{__cpuid, has_cpuid, CpuidResult};
+    let mut value: usize = 0;
 
-    unsafe {
-        /// To obtain all feature flags we need two CPUID queries:
+    // If the x86 CPU does not support the CPUID instruction then it is too
+    // old to support any of the currently-detectable features.
+    if !has_cpuid() {
+        return value;
+    }
 
-        /// 1. EAX=1, ECX=0: Queries "Processor Info and Feature Bits"
-        /// This gives us most of the CPU features in ECX and EDX (see
-        /// below).
-        asm!("cpuid"
-             : "={ecx}"(proc_info_ecx), "={edx}"(proc_info_edx)
-             : "{eax}"(0x0000_0001_u32), "{ecx}"(0 as u32)
-             : :);
+    // Calling `cpuid` from here on is safe because the CPU has the `cpuid`
+    // instruction.
 
-        /// 2. EAX=7, ECX=0: Queries "Extended Features"
-        /// This gives us information about bmi,bmi2, and avx2 support
-        /// (see below); the result in ECX is not currently needed.
-        asm!("cpuid"
-             : "={ebx}"(extended_features_ebx)
-             : "{eax}"(0x0000_0007_u32), "{ecx}"(0 as u32)
-             : :);
-    }
+    // 1. EAX=1, ECX=0: Queries "Processor Info and Feature Bits";
+    // Contains information about most x86 features.
+    let CpuidResult {
+        ecx: proc_info_ecx,
+        edx: proc_info_edx,
+        ..
+    } = unsafe { __cpuid(0x0000_0001_u32) };
 
-    let mut value: usize = 0;
+    // 2. EAX=7, ECX=0: Queries "Extended Features";
+    // Contains information about bmi,bmi2, and avx2 support.
+    let CpuidResult {
+        ebx: extended_features_ebx,
+        ..
+    } = unsafe { __cpuid(0x0000_0007_u32) };
+
+    let proc_info_ecx = proc_info_ecx as usize;
+    let proc_info_edx = proc_info_edx as usize;
+
+    let extended_features_ebx = extended_features_ebx as usize;
 
     if test_bit(extended_features_ebx, 3) {
         value = set_bit(value, __Feature::bmi as u32);
@@ -233,21 +239,10 @@ fn detect_features() -> usize {
     // org/mozilla-central/file/64bab5cbb9b6/mozglue/build/SSE.cpp#l190
     //
     if test_bit(proc_info_ecx, 26) && test_bit(proc_info_ecx, 27) {
-        /// XGETBV: reads the contents of the extended control
-        /// register (XCR).
-        unsafe fn xgetbv(xcr_no: u32) -> u64 {
-            let eax: u32;
-            let edx: u32;
-            // xgetbv
-            asm!("xgetbv"
-                 : "={eax}"(eax),  "={edx}"(edx)
-                 : "{ecx}"(xcr_no)
-                 : :);
-            ((edx as u64) << 32) | (eax as u64)
-        }
+        use super::xsave::_xgetbv;
 
         // This is safe because on x86 `xgetbv` is always available.
-        if unsafe { xgetbv(0) } & 6 == 6 {
+        if unsafe { _xgetbv(0) } & 6 == 6 {
             if test_bit(proc_info_ecx, 28) {
                 value = set_bit(value, __Feature::avx as u32);
             }

diff --git a/src/x86/sse2.rs b/src/x86/sse2.rs
@@ -1792,7 +1792,9 @@ pub unsafe fn _mm_cvtsd_si64(a: f64x2) -> i64 {
 #[inline(always)]
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(cvtsd2si))]
-pub unsafe fn _mm_cvtsd_si64x(a: f64x2) -> i64 { _mm_cvtsd_si64(a) }
+pub unsafe fn _mm_cvtsd_si64x(a: f64x2) -> i64 {
+    _mm_cvtsd_si64(a)
+}
 
 /// Convert the lower double-precision (64-bit) floating-point element in `b`
 /// to a single-precision (32-bit) floating-point element, store the result in
@@ -1857,7 +1859,9 @@ pub unsafe fn _mm_cvttsd_si64(a: f64x2) -> i64 {
 #[inline(always)]
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(cvttsd2si))]
-pub unsafe fn _mm_cvttsd_si64x(a: f64x2) -> i64 { _mm_cvttsd_si64(a) }
+pub unsafe fn _mm_cvttsd_si64x(a: f64x2) -> i64 {
+    _mm_cvttsd_si64(a)
+}
 
 /// Convert packed single-precision (32-bit) floating-point elements in `a` to
 /// packed 32-bit integers with truncation.

diff --git a/src/x86/xsave.rs b/src/x86/xsave.rs
@@ -0,0 +1,22 @@
+//! `xsave` target feature intrinsics
+
+#[cfg(test)]
+use stdsimd_test::assert_instr;
+
+/// Reads the contents of the extended control register `XCR`
+/// specified in `xcr_no`.
+#[inline(always)]
+// #[target_feature = "+xsave"] // FIXME: see
+// https://github.com/rust-lang-nursery/stdsimd/issues/167
+#[cfg_attr(test, assert_instr(xgetbv))]
+pub unsafe fn _xgetbv(xcr_no: u32) -> u64 {
+    let eax: u32;
+    let edx: u32;
+
+    asm!("xgetbv"
+         : "={eax}"(eax),  "={edx}"(edx)
+         : "{ecx}"(xcr_no)
+         : :);
+
+    ((edx as u64) << 32) | (eax as u64)
+}