Open
Description
I tried this code (Godbolt):
Rust Code
#![no_std]
#![allow(non_camel_case_types)]
#![feature(repr_simd, platform_intrinsics)]
#[repr(simd)]
#[derive(Debug)]
pub struct f32x4(f32, f32, f32, f32);
extern "platform-intrinsic" {
fn simd_fsqrt<T>(x: T) -> T;
fn simd_fabs<T>(x: T) -> T;
fn simd_fsin<T>(x: T) -> T;
fn simd_fcos<T>(x: T) -> T;
fn simd_ceil<T>(x: T) -> T;
fn simd_fexp<T>(x: T) -> T;
fn simd_fexp2<T>(x: T) -> T;
fn simd_floor<T>(x: T) -> T;
fn simd_fma<T>(x: T, y: T, z: T) -> T;
fn simd_flog<T>(x: T) -> T;
fn simd_flog10<T>(x: T) -> T;
fn simd_flog2<T>(x: T) -> T;
fn simd_fpow<T>(x: T, y: T) -> T;
fn simd_fpowi<T>(x: T, y: i32) -> T;
fn simd_trunc<T>(x: T) -> T;
fn simd_round<T>(x: T) -> T;
}
impl f32x4 {
// Rounding
pub fn ceil(self) -> Self {
unsafe { simd_ceil(self) }
}
pub fn floor(self) -> Self {
unsafe { simd_floor(self) }
}
pub fn round(self) -> Self {
unsafe { simd_round(self) }
}
pub fn trunc(self) -> Self {
unsafe { simd_trunc(self) }
}
// Arithmetic
pub fn mul_add(self, y: Self, z: Self) -> Self {
unsafe { simd_fma(self, y, z) }
}
pub fn abs(self) -> Self {
unsafe { simd_fabs(self) }
}
pub fn sqrt(self) -> Self {
unsafe { simd_fsqrt(self) }
}
pub fn powi(self, exp: i32) -> Self {
unsafe { simd_fpowi(self, exp) }
}
pub fn powf(self, exp: Self) -> Self {
unsafe { simd_fpow(self, exp) }
}
// Calculus
pub fn flog2(self) -> Self {
unsafe { simd_flog2(self) }
}
pub fn flog10(self) -> Self {
unsafe { simd_flog10(self) }
}
pub fn flog(self) -> Self {
unsafe { simd_flog(self) }
}
pub fn fexp(self) -> Self {
unsafe { simd_fexp(self) }
}
pub fn fexp2(self) -> Self {
unsafe { simd_fexp2(self) }
}
// Trigonometry
pub fn cos(self) -> Self {
unsafe { simd_fcos(self) }
}
pub fn sin(self) -> Self {
unsafe { simd_fsin(self) }
}
}
Instead, this happened: Mostly compiled to calls to libm!
When sufficient vector features are enabled, these do compile to vectorized assembly instructions. However, the problem is that compilation without those features enabled means code that depends on libm... which is not allowed in core
. We are going to have to either solve this or push our implementation of SimdF32
and SimdF64
mostly into std
, not core
.
Notable winners on x64: simd_fsqrt
, simd_fabs
become vector instructions just fine. I'm worried about them on x86_32 or Arm architectures, though.
Meta
rustc --version --verbose
:
rustc 1.52.0-nightly (d1206f950 2021-02-15)
binary: rustc
commit-hash: d1206f950ffb76c76e1b74a19ae33c2b7d949454
commit-date: 2021-02-15
host: x86_64-unknown-linux-gnu
release: 1.52.0-nightly
LLVM version: 11.0.1
x86 Assembly
<&T as core::fmt::Debug>::fmt:
movq (%rdi), %rdi
jmpq *_ZN4core3fmt5float50_$LT$impl$u20$core..fmt..Debug$u20$for$u20$f32$GT$3fmt17hf2084266ae57b528E@GOTPCREL(%rip)
core::ptr::drop_in_place<&f32>:
retq
example::f32x4::ceil:
pushq %r14
pushq %rbx
subq $56, %rsp
movq %rdi, %r14
movaps (%rsi), %xmm0
movaps %xmm0, 16(%rsp)
shufps $255, %xmm0, %xmm0
movq ceilf@GOTPCREL(%rip), %rbx
callq *%rbx
movaps %xmm0, (%rsp)
movaps 16(%rsp), %xmm0
movhlps %xmm0, %xmm0
callq *%rbx
unpcklps (%rsp), %xmm0
movaps %xmm0, (%rsp)
movaps 16(%rsp), %xmm0
callq *%rbx
movaps %xmm0, 32(%rsp)
movaps 16(%rsp), %xmm0
shufps $85, %xmm0, %xmm0
callq *%rbx
movaps 32(%rsp), %xmm1
unpcklps %xmm0, %xmm1
unpcklpd (%rsp), %xmm1
movaps %xmm1, (%r14)
movq %r14, %rax
addq $56, %rsp
popq %rbx
popq %r14
retq
example::f32x4::floor:
pushq %r14
pushq %rbx
subq $56, %rsp
movq %rdi, %r14
movaps (%rsi), %xmm0
movaps %xmm0, 16(%rsp)
shufps $255, %xmm0, %xmm0
movq floorf@GOTPCREL(%rip), %rbx
callq *%rbx
movaps %xmm0, (%rsp)
movaps 16(%rsp), %xmm0
movhlps %xmm0, %xmm0
callq *%rbx
unpcklps (%rsp), %xmm0
movaps %xmm0, (%rsp)
movaps 16(%rsp), %xmm0
callq *%rbx
movaps %xmm0, 32(%rsp)
movaps 16(%rsp), %xmm0
shufps $85, %xmm0, %xmm0
callq *%rbx
movaps 32(%rsp), %xmm1
unpcklps %xmm0, %xmm1
unpcklpd (%rsp), %xmm1
movaps %xmm1, (%r14)
movq %r14, %rax
addq $56, %rsp
popq %rbx
popq %r14
retq
example::f32x4::round:
pushq %r14
pushq %rbx
subq $56, %rsp
movq %rdi, %r14
movaps (%rsi), %xmm0
movaps %xmm0, 16(%rsp)
shufps $255, %xmm0, %xmm0
movq roundf@GOTPCREL(%rip), %rbx
callq *%rbx
movaps %xmm0, (%rsp)
movaps 16(%rsp), %xmm0
movhlps %xmm0, %xmm0
callq *%rbx
unpcklps (%rsp), %xmm0
movaps %xmm0, (%rsp)
movaps 16(%rsp), %xmm0
callq *%rbx
movaps %xmm0, 32(%rsp)
movaps 16(%rsp), %xmm0
shufps $85, %xmm0, %xmm0
callq *%rbx
movaps 32(%rsp), %xmm1
unpcklps %xmm0, %xmm1
unpcklpd (%rsp), %xmm1
movaps %xmm1, (%r14)
movq %r14, %rax
addq $56, %rsp
popq %rbx
popq %r14
retq
example::f32x4::trunc:
pushq %r14
pushq %rbx
subq $56, %rsp
movq %rdi, %r14
movaps (%rsi), %xmm0
movaps %xmm0, 16(%rsp)
shufps $255, %xmm0, %xmm0
movq truncf@GOTPCREL(%rip), %rbx
callq *%rbx
movaps %xmm0, (%rsp)
movaps 16(%rsp), %xmm0
movhlps %xmm0, %xmm0
callq *%rbx
unpcklps (%rsp), %xmm0
movaps %xmm0, (%rsp)
movaps 16(%rsp), %xmm0
callq *%rbx
movaps %xmm0, 32(%rsp)
movaps 16(%rsp), %xmm0
shufps $85, %xmm0, %xmm0
callq *%rbx
movaps 32(%rsp), %xmm1
unpcklps %xmm0, %xmm1
unpcklpd (%rsp), %xmm1
movaps %xmm1, (%r14)
movq %r14, %rax
addq $56, %rsp
popq %rbx
popq %r14
retq
example::f32x4::mul_add:
movaps (%rsi), %xmm0
mulps (%rdx), %xmm0
movq %rdi, %rax
addps (%rcx), %xmm0
movaps %xmm0, (%rdi)
retq
.LCPI7_0:
.long 0x7fffffff
.long 0x7fffffff
.long 0x7fffffff
.long 0x7fffffff
example::f32x4::abs:
movq %rdi, %rax
movaps (%rsi), %xmm0
andps .LCPI7_0(%rip), %xmm0
movaps %xmm0, (%rdi)
retq
.LCPI8_0:
.long 0xbf000000
.long 0xbf000000
.long 0xbf000000
.long 0xbf000000
.LCPI8_1:
.long 0xc0400000
.long 0xc0400000
.long 0xc0400000
.long 0xc0400000
.LCPI8_2:
.long 0x7fffffff
.long 0x7fffffff
.long 0x7fffffff
.long 0x7fffffff
.LCPI8_3:
.long 0x00800000
.long 0x00800000
.long 0x00800000
.long 0x00800000
example::f32x4::sqrt:
movaps (%rsi), %xmm0
rsqrtps %xmm0, %xmm1
movaps %xmm0, %xmm2
mulps %xmm1, %xmm2
movaps .LCPI8_0(%rip), %xmm3
mulps %xmm2, %xmm3
mulps %xmm1, %xmm2
addps .LCPI8_1(%rip), %xmm2
movq %rdi, %rax
mulps %xmm3, %xmm2
andps .LCPI8_2(%rip), %xmm0
movaps .LCPI8_3(%rip), %xmm1
cmpleps %xmm0, %xmm1
andps %xmm2, %xmm1
movaps %xmm1, (%rdi)
retq
example::f32x4::powi:
pushq %rbp
pushq %r14
pushq %rbx
subq $48, %rsp
movl %edx, %ebp
movq %rdi, %r14
movaps (%rsi), %xmm0
movaps %xmm0, 16(%rsp)
shufps $255, %xmm0, %xmm0
movq __powisf2@GOTPCREL(%rip), %rbx
movl %edx, %edi
callq *%rbx
movaps %xmm0, (%rsp)
movaps 16(%rsp), %xmm0
movhlps %xmm0, %xmm0
movl %ebp, %edi
callq *%rbx
unpcklps (%rsp), %xmm0
movaps %xmm0, (%rsp)
movaps 16(%rsp), %xmm0
movl %ebp, %edi
callq *%rbx
movaps %xmm0, 32(%rsp)
movaps 16(%rsp), %xmm0
shufps $85, %xmm0, %xmm0
movl %ebp, %edi
callq *%rbx
movaps 32(%rsp), %xmm1
unpcklps %xmm0, %xmm1
unpcklpd (%rsp), %xmm1
movaps %xmm1, (%r14)
movq %r14, %rax
addq $48, %rsp
popq %rbx
popq %r14
popq %rbp
retq
example::f32x4::powf:
pushq %r14
pushq %rbx
subq $72, %rsp
movq %rdi, %r14
movaps (%rsi), %xmm0
movaps %xmm0, 32(%rsp)
movaps (%rdx), %xmm1
movaps %xmm1, 16(%rsp)
shufps $255, %xmm0, %xmm0
shufps $255, %xmm1, %xmm1
movq powf@GOTPCREL(%rip), %rbx
callq *%rbx
movaps %xmm0, (%rsp)
movaps 32(%rsp), %xmm0
movhlps %xmm0, %xmm0
movaps 16(%rsp), %xmm1
movhlps %xmm1, %xmm1
callq *%rbx
unpcklps (%rsp), %xmm0
movaps %xmm0, (%rsp)
movaps 32(%rsp), %xmm0
movaps 16(%rsp), %xmm1
callq *%rbx
movaps %xmm0, 48(%rsp)
movaps 32(%rsp), %xmm0
shufps $85, %xmm0, %xmm0
movaps 16(%rsp), %xmm1
shufps $85, %xmm1, %xmm1
callq *%rbx
movaps 48(%rsp), %xmm1
unpcklps %xmm0, %xmm1
unpcklpd (%rsp), %xmm1
movaps %xmm1, (%r14)
movq %r14, %rax
addq $72, %rsp
popq %rbx
popq %r14
retq
example::f32x4::flog2:
pushq %r14
pushq %rbx
subq $56, %rsp
movq %rdi, %r14
movaps (%rsi), %xmm0
movaps %xmm0, 16(%rsp)
shufps $255, %xmm0, %xmm0
movq log2f@GOTPCREL(%rip), %rbx
callq *%rbx
movaps %xmm0, (%rsp)
movaps 16(%rsp), %xmm0
movhlps %xmm0, %xmm0
callq *%rbx
unpcklps (%rsp), %xmm0
movaps %xmm0, (%rsp)
movaps 16(%rsp), %xmm0
callq *%rbx
movaps %xmm0, 32(%rsp)
movaps 16(%rsp), %xmm0
shufps $85, %xmm0, %xmm0
callq *%rbx
movaps 32(%rsp), %xmm1
unpcklps %xmm0, %xmm1
unpcklpd (%rsp), %xmm1
movaps %xmm1, (%r14)
movq %r14, %rax
addq $56, %rsp
popq %rbx
popq %r14
retq
example::f32x4::flog10:
pushq %r14
pushq %rbx
subq $56, %rsp
movq %rdi, %r14
movaps (%rsi), %xmm0
movaps %xmm0, 16(%rsp)
shufps $255, %xmm0, %xmm0
movq log10f@GOTPCREL(%rip), %rbx
callq *%rbx
movaps %xmm0, (%rsp)
movaps 16(%rsp), %xmm0
movhlps %xmm0, %xmm0
callq *%rbx
unpcklps (%rsp), %xmm0
movaps %xmm0, (%rsp)
movaps 16(%rsp), %xmm0
callq *%rbx
movaps %xmm0, 32(%rsp)
movaps 16(%rsp), %xmm0
shufps $85, %xmm0, %xmm0
callq *%rbx
movaps 32(%rsp), %xmm1
unpcklps %xmm0, %xmm1
unpcklpd (%rsp), %xmm1
movaps %xmm1, (%r14)
movq %r14, %rax
addq $56, %rsp
popq %rbx
popq %r14
retq
example::f32x4::flog:
pushq %r14
pushq %rbx
subq $56, %rsp
movq %rdi, %r14
movaps (%rsi), %xmm0
movaps %xmm0, 16(%rsp)
shufps $255, %xmm0, %xmm0
movq logf@GOTPCREL(%rip), %rbx
callq *%rbx
movaps %xmm0, (%rsp)
movaps 16(%rsp), %xmm0
movhlps %xmm0, %xmm0
callq *%rbx
unpcklps (%rsp), %xmm0
movaps %xmm0, (%rsp)
movaps 16(%rsp), %xmm0
callq *%rbx
movaps %xmm0, 32(%rsp)
movaps 16(%rsp), %xmm0
shufps $85, %xmm0, %xmm0
callq *%rbx
movaps 32(%rsp), %xmm1
unpcklps %xmm0, %xmm1
unpcklpd (%rsp), %xmm1
movaps %xmm1, (%r14)
movq %r14, %rax
addq $56, %rsp
popq %rbx
popq %r14
retq
example::f32x4::fexp:
pushq %r14
pushq %rbx
subq $56, %rsp
movq %rdi, %r14
movaps (%rsi), %xmm0
movaps %xmm0, 16(%rsp)
shufps $255, %xmm0, %xmm0
movq expf@GOTPCREL(%rip), %rbx
callq *%rbx
movaps %xmm0, (%rsp)
movaps 16(%rsp), %xmm0
movhlps %xmm0, %xmm0
callq *%rbx
unpcklps (%rsp), %xmm0
movaps %xmm0, (%rsp)
movaps 16(%rsp), %xmm0
callq *%rbx
movaps %xmm0, 32(%rsp)
movaps 16(%rsp), %xmm0
shufps $85, %xmm0, %xmm0
callq *%rbx
movaps 32(%rsp), %xmm1
unpcklps %xmm0, %xmm1
unpcklpd (%rsp), %xmm1
movaps %xmm1, (%r14)
movq %r14, %rax
addq $56, %rsp
popq %rbx
popq %r14
retq
example::f32x4::fexp2:
pushq %r14
pushq %rbx
subq $56, %rsp
movq %rdi, %r14
movaps (%rsi), %xmm0
movaps %xmm0, 16(%rsp)
shufps $255, %xmm0, %xmm0
movq exp2f@GOTPCREL(%rip), %rbx
callq *%rbx
movaps %xmm0, (%rsp)
movaps 16(%rsp), %xmm0
movhlps %xmm0, %xmm0
callq *%rbx
unpcklps (%rsp), %xmm0
movaps %xmm0, (%rsp)
movaps 16(%rsp), %xmm0
callq *%rbx
movaps %xmm0, 32(%rsp)
movaps 16(%rsp), %xmm0
shufps $85, %xmm0, %xmm0
callq *%rbx
movaps 32(%rsp), %xmm1
unpcklps %xmm0, %xmm1
unpcklpd (%rsp), %xmm1
movaps %xmm1, (%r14)
movq %r14, %rax
addq $56, %rsp
popq %rbx
popq %r14
retq
example::f32x4::cos:
pushq %r14
pushq %rbx
subq $56, %rsp
movq %rdi, %r14
movaps (%rsi), %xmm0
movaps %xmm0, 16(%rsp)
shufps $255, %xmm0, %xmm0
movq cosf@GOTPCREL(%rip), %rbx
callq *%rbx
movaps %xmm0, (%rsp)
movaps 16(%rsp), %xmm0
movhlps %xmm0, %xmm0
callq *%rbx
unpcklps (%rsp), %xmm0
movaps %xmm0, (%rsp)
movaps 16(%rsp), %xmm0
callq *%rbx
movaps %xmm0, 32(%rsp)
movaps 16(%rsp), %xmm0
shufps $85, %xmm0, %xmm0
callq *%rbx
movaps 32(%rsp), %xmm1
unpcklps %xmm0, %xmm1
unpcklpd (%rsp), %xmm1
movaps %xmm1, (%r14)
movq %r14, %rax
addq $56, %rsp
popq %rbx
popq %r14
retq
example::f32x4::sin:
pushq %r14
pushq %rbx
subq $56, %rsp
movq %rdi, %r14
movaps (%rsi), %xmm0
movaps %xmm0, 16(%rsp)
shufps $255, %xmm0, %xmm0
movq sinf@GOTPCREL(%rip), %rbx
callq *%rbx
movaps %xmm0, (%rsp)
movaps 16(%rsp), %xmm0
movhlps %xmm0, %xmm0
callq *%rbx
unpcklps (%rsp), %xmm0
movaps %xmm0, (%rsp)
movaps 16(%rsp), %xmm0
callq *%rbx
movaps %xmm0, 32(%rsp)
movaps 16(%rsp), %xmm0
shufps $85, %xmm0, %xmm0
callq *%rbx
movaps 32(%rsp), %xmm1
unpcklps %xmm0, %xmm1
unpcklpd (%rsp), %xmm1
movaps %xmm1, (%r14)
movq %r14, %rax
addq $56, %rsp
popq %rbx
popq %r14
retq
<example::f32x4 as core::fmt::Debug>::fmt:
pushq %rbp
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
subq $40, %rsp
movq %rdi, %rbx
leaq 4(%rdi), %r12
leaq 8(%rdi), %r13
leaq 12(%rdi), %r15
leaq .L__unnamed_1(%rip), %rdx
leaq 16(%rsp), %r14
movl $5, %ecx
movq %r14, %rdi
callq *core::fmt::Formatter::debug_tuple@GOTPCREL(%rip)
movq %rbx, 8(%rsp)
leaq .L__unnamed_2(%rip), %rbx
movq core::fmt::builders::DebugTuple::field@GOTPCREL(%rip), %rbp
leaq 8(%rsp), %rsi
movq %r14, %rdi
movq %rbx, %rdx
callq *%rbp
movq %r12, 8(%rsp)
leaq 8(%rsp), %rsi
movq %r14, %rdi
movq %rbx, %rdx
callq *%rbp
movq %r13, 8(%rsp)
leaq 8(%rsp), %rsi
movq %r14, %rdi
movq %rbx, %rdx
callq *%rbp
movq %r15, 8(%rsp)
leaq 8(%rsp), %rsi
movq %r14, %rdi
movq %rbx, %rdx
callq *%rbp
movq %r14, %rdi
callq *core::fmt::builders::DebugTuple::finish@GOTPCREL(%rip)
addq $40, %rsp
popq %rbx
popq %r12
popq %r13
popq %r14
popq %r15
popq %rbp
retq
.L__unnamed_1:
.ascii "f32x4"
.L__unnamed_2:
.quad core::ptr::drop_in_place<&f32>
.quad 8
.quad 8
.quad <&T as core::fmt::Debug>::fmt
AArch64 Assembly
<&T as core::fmt::Debug>::fmt:
ldr x0, [x0]
b _ZN4core3fmt5float50_$LT$impl$u20$core..fmt..Debug$u20$for$u20$f32$GT$3fmt17h68f66863527610f0E
core::ptr::drop_in_place<&f32>:
ret
example::f32x4::ceil:
ldr q0, [x0]
frintp v0.4s, v0.4s
str q0, [x8]
ret
example::f32x4::floor:
ldr q0, [x0]
frintm v0.4s, v0.4s
str q0, [x8]
ret
example::f32x4::round:
ldr q0, [x0]
frinta v0.4s, v0.4s
str q0, [x8]
ret
example::f32x4::trunc:
ldr q0, [x0]
frintz v0.4s, v0.4s
str q0, [x8]
ret
example::f32x4::mul_add:
ldr q0, [x0]
ldr q1, [x1]
ldr q2, [x2]
fmla v2.4s, v1.4s, v0.4s
str q2, [x8]
ret
example::f32x4::abs:
ldr q0, [x0]
fabs v0.4s, v0.4s
str q0, [x8]
ret
example::f32x4::sqrt:
ldr q0, [x0]
fsqrt v0.4s, v0.4s
str q0, [x8]
ret
example::f32x4::powi:
sub sp, sp, #64
str x30, [sp, #32]
stp x20, x19, [sp, #48]
ldr q0, [x0]
mov w0, w1
mov w19, w1
mov x20, x8
str q0, [sp, #16]
mov s0, v0.s[1]
bl __powisf2
str d0, [sp]
ldr q0, [sp, #16]
mov w0, w19
bl __powisf2
ldr q1, [sp]
mov w0, w19
mov v0.s[1], v1.s[0]
str q0, [sp]
ldr q0, [sp, #16]
mov s0, v0.s[2]
bl __powisf2
ldr q1, [sp]
mov w0, w19
mov v1.s[2], v0.s[0]
ldr q0, [sp, #16]
str q1, [sp]
mov s0, v0.s[3]
bl __powisf2
ldr q1, [sp]
ldr x30, [sp, #32]
mov v1.s[3], v0.s[0]
str q1, [x20]
ldp x20, x19, [sp, #48]
add sp, sp, #64
ret
example::f32x4::powf:
sub sp, sp, #64
stp x30, x19, [sp, #48]
ldr q0, [x0]
ldr q1, [x1]
mov x19, x8
stp q1, q0, [sp, #16]
mov s0, v0.s[1]
mov s1, v1.s[1]
bl powf
str d0, [sp]
ldp q1, q0, [sp, #16]
bl powf
ldr q1, [sp]
mov v0.s[1], v1.s[0]
str q0, [sp]
ldp q1, q0, [sp, #16]
mov s0, v0.s[2]
mov s1, v1.s[2]
bl powf
ldr q1, [sp]
mov v1.s[2], v0.s[0]
str q1, [sp]
ldp q1, q0, [sp, #16]
mov s0, v0.s[3]
mov s1, v1.s[3]
bl powf
ldr q1, [sp]
mov v1.s[3], v0.s[0]
str q1, [x19]
ldp x30, x19, [sp, #48]
add sp, sp, #64
ret
example::f32x4::flog2:
sub sp, sp, #48
stp x30, x19, [sp, #32]
ldr q0, [x0]
mov x19, x8
str q0, [sp, #16]
mov s0, v0.s[1]
bl log2f
str d0, [sp]
ldr q0, [sp, #16]
bl log2f
ldr q1, [sp]
mov v0.s[1], v1.s[0]
str q0, [sp]
ldr q0, [sp, #16]
mov s0, v0.s[2]
bl log2f
ldr q1, [sp]
mov v1.s[2], v0.s[0]
ldr q0, [sp, #16]
str q1, [sp]
mov s0, v0.s[3]
bl log2f
ldr q1, [sp]
mov v1.s[3], v0.s[0]
str q1, [x19]
ldp x30, x19, [sp, #32]
add sp, sp, #48
ret
example::f32x4::flog10:
sub sp, sp, #48
stp x30, x19, [sp, #32]
ldr q0, [x0]
mov x19, x8
str q0, [sp, #16]
mov s0, v0.s[1]
bl log10f
str d0, [sp]
ldr q0, [sp, #16]
bl log10f
ldr q1, [sp]
mov v0.s[1], v1.s[0]
str q0, [sp]
ldr q0, [sp, #16]
mov s0, v0.s[2]
bl log10f
ldr q1, [sp]
mov v1.s[2], v0.s[0]
ldr q0, [sp, #16]
str q1, [sp]
mov s0, v0.s[3]
bl log10f
ldr q1, [sp]
mov v1.s[3], v0.s[0]
str q1, [x19]
ldp x30, x19, [sp, #32]
add sp, sp, #48
ret
example::f32x4::flog:
sub sp, sp, #48
stp x30, x19, [sp, #32]
ldr q0, [x0]
mov x19, x8
str q0, [sp, #16]
mov s0, v0.s[1]
bl logf
str d0, [sp]
ldr q0, [sp, #16]
bl logf
ldr q1, [sp]
mov v0.s[1], v1.s[0]
str q0, [sp]
ldr q0, [sp, #16]
mov s0, v0.s[2]
bl logf
ldr q1, [sp]
mov v1.s[2], v0.s[0]
ldr q0, [sp, #16]
str q1, [sp]
mov s0, v0.s[3]
bl logf
ldr q1, [sp]
mov v1.s[3], v0.s[0]
str q1, [x19]
ldp x30, x19, [sp, #32]
add sp, sp, #48
ret
example::f32x4::fexp:
sub sp, sp, #48
stp x30, x19, [sp, #32]
ldr q0, [x0]
mov x19, x8
str q0, [sp, #16]
mov s0, v0.s[1]
bl expf
str d0, [sp]
ldr q0, [sp, #16]
bl expf
ldr q1, [sp]
mov v0.s[1], v1.s[0]
str q0, [sp]
ldr q0, [sp, #16]
mov s0, v0.s[2]
bl expf
ldr q1, [sp]
mov v1.s[2], v0.s[0]
ldr q0, [sp, #16]
str q1, [sp]
mov s0, v0.s[3]
bl expf
ldr q1, [sp]
mov v1.s[3], v0.s[0]
str q1, [x19]
ldp x30, x19, [sp, #32]
add sp, sp, #48
ret
example::f32x4::fexp2:
sub sp, sp, #48
stp x30, x19, [sp, #32]
ldr q0, [x0]
mov x19, x8
str q0, [sp, #16]
mov s0, v0.s[1]
bl exp2f
str d0, [sp]
ldr q0, [sp, #16]
bl exp2f
ldr q1, [sp]
mov v0.s[1], v1.s[0]
str q0, [sp]
ldr q0, [sp, #16]
mov s0, v0.s[2]
bl exp2f
ldr q1, [sp]
mov v1.s[2], v0.s[0]
ldr q0, [sp, #16]
str q1, [sp]
mov s0, v0.s[3]
bl exp2f
ldr q1, [sp]
mov v1.s[3], v0.s[0]
str q1, [x19]
ldp x30, x19, [sp, #32]
add sp, sp, #48
ret
example::f32x4::cos:
sub sp, sp, #48
stp x30, x19, [sp, #32]
ldr q0, [x0]
mov x19, x8
str q0, [sp, #16]
mov s0, v0.s[1]
bl cosf
str d0, [sp]
ldr q0, [sp, #16]
bl cosf
ldr q1, [sp]
mov v0.s[1], v1.s[0]
str q0, [sp]
ldr q0, [sp, #16]
mov s0, v0.s[2]
bl cosf
ldr q1, [sp]
mov v1.s[2], v0.s[0]
ldr q0, [sp, #16]
str q1, [sp]
mov s0, v0.s[3]
bl cosf
ldr q1, [sp]
mov v1.s[3], v0.s[0]
str q1, [x19]
ldp x30, x19, [sp, #32]
add sp, sp, #48
ret
example::f32x4::sin:
sub sp, sp, #48
stp x30, x19, [sp, #32]
ldr q0, [x0]
mov x19, x8
str q0, [sp, #16]
mov s0, v0.s[1]
bl sinf
str d0, [sp]
ldr q0, [sp, #16]
bl sinf
ldr q1, [sp]
mov v0.s[1], v1.s[0]
str q0, [sp]
ldr q0, [sp, #16]
mov s0, v0.s[2]
bl sinf
ldr q1, [sp]
mov v1.s[2], v0.s[0]
ldr q0, [sp, #16]
str q1, [sp]
mov s0, v0.s[3]
bl sinf
ldr q1, [sp]
mov v1.s[3], v0.s[0]
str q1, [x19]
ldp x30, x19, [sp, #32]
add sp, sp, #48
ret
<example::f32x4 as core::fmt::Debug>::fmt:
sub sp, sp, #80
str x30, [sp, #32]
stp x22, x21, [sp, #48]
stp x20, x19, [sp, #64]
mov x9, x1
adrp x1, .L__unnamed_1
mov x19, x0
add x20, x0, #4
add x21, x0, #8
add x22, x0, #12
add x1, x1, :lo12:.L__unnamed_1
add x8, sp, #8
mov w2, #5
mov x0, x9
bl core::fmt::Formatter::debug_tuple
str x19, [sp, #40]
adrp x19, .L__unnamed_2
add x19, x19, :lo12:.L__unnamed_2
add x0, sp, #8
add x1, sp, #40
mov x2, x19
bl core::fmt::builders::DebugTuple::field
add x0, sp, #8
add x1, sp, #40
mov x2, x19
str x20, [sp, #40]
bl core::fmt::builders::DebugTuple::field
add x0, sp, #8
add x1, sp, #40
mov x2, x19
str x21, [sp, #40]
bl core::fmt::builders::DebugTuple::field
add x0, sp, #8
add x1, sp, #40
mov x2, x19
str x22, [sp, #40]
bl core::fmt::builders::DebugTuple::field
add x0, sp, #8
bl core::fmt::builders::DebugTuple::finish
ldp x20, x19, [sp, #64]
ldp x22, x21, [sp, #48]
ldr x30, [sp, #32]
add sp, sp, #80
ret
.L__unnamed_1:
.ascii "f32x4"
.L__unnamed_2:
.xword core::ptr::drop_in_place<&f32>
.xword 8
.xword 8
.xword <&T as core::fmt::Debug>::fmt