1111//! other `#[target_feature]` functions.
1212//! As such, once you have used the [`trampoline!`] macro, you can call any intrinsic in [`core::arch`].
1313//!
14- //! This crate also has modules which contain tokens for each Rust target features.
15- //! These allow safely validating that a target feature is available, and obtaining a token.
14+ //! This crate also has modules which contain a token for each Rust target feature.
15+ //! These each have a `try_new` constructor, which validates whether the corresponding
16+ //! target feature is available, then creates a token if it is.
1617//! These are grouped by architecture:
1718//!
1819//! - [`x86`] contains the tokens for both the x86 and x86-64 targets.
19- //! It also contains tokens for each x86-64 microarchitecture level, see [`x86::V1`] for details.
20+ //! It also contains a token for each x86-64 microarchitecture level, see [`x86::V1`] for details.
2021//! <!-- TODO: Other architectures -->
2122//!
2223//! # Examples
2324//!
24- //! At the time of writing, it is not possible to turn scalar values into <abbr title="Single InstructionMultiple Data">SIMD</abbr>
25+ //! At the time of writing, it is not possible to turn scalar values into <abbr title="Single Instruction Multiple Data">SIMD</abbr>
2526//! vector types safely using only the standard library.
2627//! These examples use [bytemuck](https://crates.io/crates/bytemuck) for this.
2728//!
28- //! <!-- TODO -->
29+ //! Note: These examples are currently pending.
30+ //! <!-- TODO: Merge the initial PR, so as to not get the examples bogged down -->
2931//!
3032//! Note that for `aarch64`'s neon, you will want to enable bytemuck's `aarch64_simd` feature.
3133//! This is also the case for WASM with `wasm_simd`, but note that this crate
3234//! [isn't needed on WASM][attributes.codegen.target_feature.wasm], as it is safe to
33- //! call `#[target_features]` on that platform.
35+ //! call `#[target_feature]` functions on that platform.
3436//!
3537//! # Crate Feature Flags
3638//!
37- //! <!-- TODO -->
39+ //! <!-- TODO: We currently only have the "std" feature, which does nothing. -->
3840//!
3941//! # Implementation
4042//!
6769#[ cfg( any( target_arch = "x86" , target_arch = "x86_64" , doc) ) ]
6870pub mod x86;
6971
70- pub mod trampoline ;
72+ pub mod support ;
7173
7274#[ cfg( feature = "std" ) ]
7375extern crate std;
7476
75- /// Token that a set of target feature is available.
77+ /// Token which proves that a set of target feature is available.
7678///
7779/// Note that this trait is only meaningful when there are values of this type.
7880/// That is, to enable the target features in `FEATURES`, you *must* have a value
7981/// of this type.
8082///
81- /// Values which implement this trait are used in the second argument to [`trampoline!`],
83+ /// Values which implement this trait are used in the first argument to [`trampoline!`],
8284/// which is a safe abstraction over enabling target features.
8385///
8486/// # Safety
@@ -104,43 +106,58 @@ pub unsafe trait TargetFeatureToken: Copy {
104106///
105107/// This is effectively a stable implementation of the "Struct Target Features" Rust feature,
106108/// which at the time of writing is neither in stable or nightly Rust.
107- /// This macro can be used to make SIMD dispatch safe in addition to make explicit SIMD, both safely .
109+ /// This macro can be used to make both SIMD dispatch and explicit SIMD safe .
108110///
109111/// # Reference
110112///
111- /// These reference examples presume that you have (values in brackets are the "variables"):
113+ /// These reference examples presume that you have the following.
114+ /// The parts of the examples referring to each prerequisite are provided in the brackets:
112115///
113- /// - An expression (`token`) of a type (`Token`) which is `TargetFeatureToken` for some target features (`"f1,f2,f3"`);
114- /// - A function (signature `fn uses_simd(val: [f32; 4]) -> [f32; 4]`) which is safe but enables a subset of those target features (`"f1,f2"`);
116+ /// - An expression (`token`) of a type (`Token`) which implements `TargetFeatureToken` for some target features (`"f1,f2,f3"`);
117+ /// - A function (signature `fn uses_simd(val: [f32; 4]) -> [f32; 4]`) which is safe but enables a subset
118+ /// of those target features (annotated `#[target_feature(enable = "f1,f2")]`);
115119/// - Local values of types corresponding to the argument types (`a` of type `[f32; 4]`)
116120///
117121/// ```rust,ignore
118- /// trampoline!(Token = token => "f1,f2", uses_simd(a: [f32; 4]) -> [f32; 4])
122+ /// trampoline!(Token = token => "f1,f2,f3", uses_simd(a: [f32; 4]) -> [f32; 4])
123+ /// // Or equivalently, as `uses_simd` doesn't require `f3`:
124+ /// trampoline!(Token = token => "f1,f2", uses_simd(a: [f32; 4]) -> [f32; 4]);
119125/// ```
120126///
121- /// Multiple tokens are also supported by providing them in a sequence in square brackets:
127+ /// Multiple tokens are also supported by providing them in a sequence in square brackets.
128+ /// The target feature string must be a subset of the total features made available by the tokens:
122129///
123130/// ```rust,ignore
124131/// trampoline!([Token = token, Sse = my_sse] => "f1,f2,sse", uses_simd(a: [f32; 4]) -> [f32; 4])
125132/// ```
126133///
134+ /// This is fully validated for safety, so the following example would fail to compile:
135+ ///
136+ /// ```rust,ignore,compile_fail
137+ /// // ERROR: call to function `uses_simd` with `#[target_feature]` is unsafe and requires unsafe block
138+ /// // in order for the call to be safe, the context requires the following additional target feature: f2
139+ /// trampoline!(Token = token => "f1", uses_simd(a: [f32; 4]) -> [f32; 4]);
140+ /// ```
141+ ///
127142/// A more advanced syntax is available if you need to use generics.
128- /// That syntax is explained in comments around the macro's definition, which can be seen above .
143+ /// That syntax is explained in comments around the macro's definition.
129144/// For reference, the implementation used to implement [`vectorize`](TargetFeatureToken::vectorize) for `"sse"` is:
130145///
131146/// ```rust,ignore
132147/// trampoline!([Sse = self] => "sse", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() })
133148/// ```
134149///
135- /// There is also support for where clauses after the return type.
150+ /// There is also support for a where clause, after the return type.
136151///
137152/// # Motivation
138153///
139- /// In Fearless SIMD, this macro has two primary use cases:
154+ /// In Fearless SIMD, this macro is used in three ways primary use cases:
140155///
141- /// 1) To dispatch to a specialised SIMD implementation of a function using target specific
142- /// instructions which will be more efficient than generic version written using the portable subset.
156+ /// 1) By end-users, to dispatch to a specialised SIMD implementation of a function using target specific
157+ /// instructions, which will be more efficient than generic version written using the portable subset.
143158/// 2) To implement the portable subset of SIMD operations.
159+ /// 3) To implement the `dispatch!` macro and `Simd::vectorize`, which allows SIMD intrinsics to
160+ /// be correctly inlined when writing portable SIMD code.
144161///
145162/// To expand on use case 1, when using Fearless SIMD you will often be writing functions which are
146163/// instantiated for multiple different SIMD levels (using generics).
@@ -176,12 +193,6 @@ pub unsafe trait TargetFeatureToken: Copy {
176193/// trampoline!(Token = token => "f1,f2", uses_simd(a: [f32; 4]) -> [f32; 4])
177194/// ```
178195///
179- /// Note that a function only operating on 128 bytes is probably too small for checking
180- /// whether a token exists just for it is worthwhile.
181- /// However, if you have amorphised the cost of that check between many function calls,
182- /// the `trampoline!` macro itself compiles down to a function call.
183- /// (This would be the case when this macro is being used to implement the portable subset of SIMD operations)
184- ///
185196// TODO: We could write an example for each of ARM, x86, and conditionally compile it in?
186197/// Note that our examples are all ignored as there is no target feature which is available on every platform,
187198/// but we need these docs to compile for users on any platform.
@@ -229,12 +240,14 @@ macro_rules! trampoline {
229240 // We validate that we actually have a token of each claimed type.
230241 let _: $token_type = $token;
231242 ) +
232- // We use a const item rather than a const block to ensure that.
233- // This does mean that you can no longer use tokens "generically", but it's hard to think of
234- // cases where that would be usable anyway.
243+ // We use a const item rather than a const block to ensure that the const evaluation happens eagerly,
244+ // ensuring that we don't create functions which look valid but actually will always fail when actually codegenned.
245+ // This does mean that you can't use tokens "generically", but it's hard to think of cases where that
246+ // would be usable anyway. For any case where that is valid, you can always manually create the
247+ // "subsetted" token/tokens beforehand using the `From` impls.
235248 const _: ( ) = {
236249 // And that the claimed types justify enabling the enabled target features.
237- $crate:: trampoline :: is_feature_subset( $to_enable, [ $( <$token_type as $crate:: TargetFeatureToken >:: FEATURES ) ,+] )
250+ $crate:: support :: is_feature_subset( $to_enable, [ $( <$token_type as $crate:: TargetFeatureToken >:: FEATURES ) ,+] )
238251 // TODO: Better failure message here (i.e. at least concatting the set of requested features)
239252 . unwrap( ) ;
240253 } ;
@@ -319,13 +332,13 @@ mod example_expansion {
319332 { sse_mul_f32s ( a, b) }
320333 }
321334 let _: Sse = sse;
322- const {
323- crate :: trampoline :: is_feature_subset (
335+ const _ : ( ) = {
336+ crate :: support :: is_feature_subset (
324337 "sse" ,
325338 [ <Sse as crate :: TargetFeatureToken >:: FEATURES ] ,
326339 )
327340 . unwrap ( ) ;
328- }
341+ } ;
329342 #[ allow( clippy:: redundant_locals, reason = "Required for consistency/safety." ) ]
330343 let a = a;
331344 #[ allow( clippy:: redundant_locals, reason = "Required for consistency/safety." ) ]
0 commit comments