Skip to content

add nvptx_target_feature #138689

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 17 additions & 5 deletions compiler/rustc_codegen_llvm/src/llvm_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,15 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
// Filter out features that are not supported by the current LLVM version
("aarch64", "fpmr") => None, // only existed in 18
("arm", "fp16") => Some(LLVMFeature::new("fullfp16")),
// NVPTX targets added in LLVM 20
("nvptx64", "sm_100") if get_version().0 < 20 => None,
("nvptx64", "sm_100a") if get_version().0 < 20 => None,
("nvptx64", "sm_101") if get_version().0 < 20 => None,
("nvptx64", "sm_101a") if get_version().0 < 20 => None,
("nvptx64", "sm_120") if get_version().0 < 20 => None,
("nvptx64", "sm_120a") if get_version().0 < 20 => None,
("nvptx64", "ptx86") if get_version().0 < 20 => None,
("nvptx64", "ptx87") if get_version().0 < 20 => None,
// Filter out features that are not supported by the current LLVM version
("loongarch64", "div32" | "lam-bh" | "lamcas" | "ld-seq-sa" | "scq")
if get_version().0 < 20 =>
Expand Down Expand Up @@ -337,11 +346,12 @@ pub(crate) fn target_config(sess: &Session) -> TargetConfig {
let target_machine = create_informational_target_machine(sess, true);
// Compute which of the known target features are enabled in the 'base' target machine. We only
// consider "supported" features; "forbidden" features are not reflected in `cfg` as of now.
let mut cpu_implied_features: Vec<(bool, Symbol)> = Vec::new();
let mut features: FxHashSet<Symbol> = sess
.target
.rust_target_features()
.iter()
.filter(|(feature, _, _)| {
.filter(|(feature, _, implied)| {
// skip checking special features, as LLVM may not understand them
if RUSTC_SPECIAL_FEATURES.contains(feature) {
return true;
Expand All @@ -356,6 +366,7 @@ pub(crate) fn target_config(sess: &Session) -> TargetConfig {
return false;
}
}
cpu_implied_features.extend(implied.iter().map(|f| (true, Symbol::intern(f))));
true
} else {
false
Expand All @@ -364,14 +375,15 @@ pub(crate) fn target_config(sess: &Session) -> TargetConfig {
.map(|(feature, _, _)| Symbol::intern(feature))
.collect();

// Add enabled and remove disabled features.
for (enabled, feature) in
// Parse -Ctarget-feature=+feature1,-feature2
let cg_target_features =
sess.opts.cg.target_feature.split(',').filter_map(|s| match s.chars().next() {
Some('+') => Some((true, Symbol::intern(&s[1..]))),
Some('-') => Some((false, Symbol::intern(&s[1..]))),
_ => None,
})
{
});
// Add features implied by -Ctarget-cpu followed by enabling/removing those specified by -Ctarget-feature
for (enabled, feature) in cpu_implied_features.into_iter().chain(cg_target_features) {
if enabled {
// Also add all transitively implied features.

Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_feature/src/unstable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,7 @@ declare_features! (
(unstable, m68k_target_feature, "1.85.0", Some(134328)),
(unstable, mips_target_feature, "1.27.0", Some(44839)),
(unstable, movrs_target_feature, "1.88.0", Some(137976)),
(unstable, nvptx_target_feature, "CURRENT_RUSTC_VERSION", Some(44839)),
(unstable, powerpc_target_feature, "1.27.0", Some(44839)),
(unstable, prfchw_target_feature, "1.78.0", Some(44839)),
(unstable, riscv_target_feature, "1.45.0", Some(44839)),
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_span/src/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1482,6 +1482,7 @@ symbols! {
not,
notable_trait,
note,
nvptx_target_feature,
object_safe_for_dispatch,
of,
off,
Expand Down
68 changes: 68 additions & 0 deletions compiler/rustc_target/src/target_features.rs
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,71 @@ const MIPS_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
// tidy-alphabetical-end
];

const NVPTX_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
// tidy-alphabetical-start
("sm_20", Unstable(sym::nvptx_target_feature), &[]),
("sm_21", Unstable(sym::nvptx_target_feature), &["sm_20"]),
("sm_30", Unstable(sym::nvptx_target_feature), &["sm_21"]),
("sm_32", Unstable(sym::nvptx_target_feature), &["sm_30"]),
("sm_35", Unstable(sym::nvptx_target_feature), &["sm_32"]),
("sm_37", Unstable(sym::nvptx_target_feature), &["sm_35"]),
("sm_50", Unstable(sym::nvptx_target_feature), &["sm_37"]),
("sm_52", Unstable(sym::nvptx_target_feature), &["sm_50"]),
("sm_53", Unstable(sym::nvptx_target_feature), &["sm_52"]),
("sm_60", Unstable(sym::nvptx_target_feature), &["sm_53"]),
("sm_61", Unstable(sym::nvptx_target_feature), &["sm_60"]),
("sm_62", Unstable(sym::nvptx_target_feature), &["sm_61"]),
("sm_70", Unstable(sym::nvptx_target_feature), &["sm_62"]),
("sm_72", Unstable(sym::nvptx_target_feature), &["sm_70"]),
("sm_75", Unstable(sym::nvptx_target_feature), &["sm_72"]),
("sm_80", Unstable(sym::nvptx_target_feature), &["sm_75"]),
("sm_86", Unstable(sym::nvptx_target_feature), &["sm_80"]),
("sm_87", Unstable(sym::nvptx_target_feature), &["sm_86"]),
("sm_89", Unstable(sym::nvptx_target_feature), &["sm_87"]),
("sm_90", Unstable(sym::nvptx_target_feature), &["sm_89"]),
("sm_90a", Unstable(sym::nvptx_target_feature), &["sm_90"]),
// tidy-alphabetical-end
// tidy-alphabetical-start
("sm_100", Unstable(sym::nvptx_target_feature), &["sm_90"]),
("sm_100a", Unstable(sym::nvptx_target_feature), &["sm_100"]),
("sm_101", Unstable(sym::nvptx_target_feature), &["sm_100"]),
("sm_101a", Unstable(sym::nvptx_target_feature), &["sm_101"]),
("sm_120", Unstable(sym::nvptx_target_feature), &["sm_101"]),
("sm_120a", Unstable(sym::nvptx_target_feature), &["sm_120"]),
// tidy-alphabetical-end
// tidy-alphabetical-start
("ptx32", Unstable(sym::nvptx_target_feature), &[]),
("ptx40", Unstable(sym::nvptx_target_feature), &["ptx32"]),
("ptx41", Unstable(sym::nvptx_target_feature), &["ptx40"]),
("ptx42", Unstable(sym::nvptx_target_feature), &["ptx41"]),
("ptx43", Unstable(sym::nvptx_target_feature), &["ptx42"]),
("ptx50", Unstable(sym::nvptx_target_feature), &["ptx43"]),
("ptx60", Unstable(sym::nvptx_target_feature), &["ptx50"]),
("ptx61", Unstable(sym::nvptx_target_feature), &["ptx60"]),
("ptx62", Unstable(sym::nvptx_target_feature), &["ptx61"]),
("ptx63", Unstable(sym::nvptx_target_feature), &["ptx62"]),
("ptx64", Unstable(sym::nvptx_target_feature), &["ptx63"]),
("ptx65", Unstable(sym::nvptx_target_feature), &["ptx64"]),
("ptx70", Unstable(sym::nvptx_target_feature), &["ptx65"]),
("ptx71", Unstable(sym::nvptx_target_feature), &["ptx70"]),
("ptx72", Unstable(sym::nvptx_target_feature), &["ptx71"]),
("ptx73", Unstable(sym::nvptx_target_feature), &["ptx72"]),
("ptx74", Unstable(sym::nvptx_target_feature), &["ptx73"]),
("ptx75", Unstable(sym::nvptx_target_feature), &["ptx74"]),
("ptx76", Unstable(sym::nvptx_target_feature), &["ptx75"]),
("ptx77", Unstable(sym::nvptx_target_feature), &["ptx76"]),
("ptx78", Unstable(sym::nvptx_target_feature), &["ptx77"]),
("ptx80", Unstable(sym::nvptx_target_feature), &["ptx78"]),
("ptx81", Unstable(sym::nvptx_target_feature), &["ptx80"]),
("ptx82", Unstable(sym::nvptx_target_feature), &["ptx81"]),
("ptx83", Unstable(sym::nvptx_target_feature), &["ptx82"]),
("ptx84", Unstable(sym::nvptx_target_feature), &["ptx83"]),
("ptx85", Unstable(sym::nvptx_target_feature), &["ptx84"]),
("ptx86", Unstable(sym::nvptx_target_feature), &["ptx85"]),
("ptx87", Unstable(sym::nvptx_target_feature), &["ptx86"]),
// tidy-alphabetical-end
];

static RISCV_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
// tidy-alphabetical-start
("a", Stable, &["zaamo", "zalrsc"]),
Expand Down Expand Up @@ -770,6 +835,7 @@ pub fn all_rust_features() -> impl Iterator<Item = (&'static str, Stability)> {
.chain(HEXAGON_FEATURES.iter())
.chain(POWERPC_FEATURES.iter())
.chain(MIPS_FEATURES.iter())
.chain(NVPTX_FEATURES.iter())
.chain(RISCV_FEATURES.iter())
.chain(WASM_FEATURES.iter())
.chain(BPF_FEATURES.iter())
Expand Down Expand Up @@ -835,6 +901,7 @@ impl Target {
"x86" | "x86_64" => X86_FEATURES,
"hexagon" => HEXAGON_FEATURES,
"mips" | "mips32r6" | "mips64" | "mips64r6" => MIPS_FEATURES,
"nvptx64" => NVPTX_FEATURES,
"powerpc" | "powerpc64" => POWERPC_FEATURES,
"riscv32" | "riscv64" => RISCV_FEATURES,
"wasm32" | "wasm64" => WASM_FEATURES,
Expand All @@ -861,6 +928,7 @@ impl Target {
"sparc" | "sparc64" => SPARC_FEATURES_FOR_CORRECT_VECTOR_ABI,
"hexagon" => HEXAGON_FEATURES_FOR_CORRECT_VECTOR_ABI,
"mips" | "mips32r6" | "mips64" | "mips64r6" => MIPS_FEATURES_FOR_CORRECT_VECTOR_ABI,
"nvptx64" => &[], // no vector ABI
"bpf" | "m68k" => &[], // no vector ABI
"csky" => CSKY_FEATURES_FOR_CORRECT_VECTOR_ABI,
// FIXME: for some tier3 targets, we are overly cautious and always give warnings
Expand Down
1 change: 1 addition & 0 deletions library/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@
// Target features:
// tidy-alphabetical-start
#![cfg_attr(bootstrap, feature(avx512_target_feature))]
#![cfg_attr(not(bootstrap), feature(nvptx_target_feature))]
#![feature(aarch64_unstable_target_feature)]
#![feature(arm_target_feature)]
#![feature(hexagon_target_feature)]
Expand Down
34 changes: 34 additions & 0 deletions src/doc/rustc/src/platform-support/nvptx64-nvidia-cuda.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,40 @@ platform.
[@RDambrosio016](https://github.com/RDambrosio016)
[@kjetilkjeka](https://github.com/kjetilkjeka)

## Requirements

This target is `no_std` and will typically be built with crate-type `cdylib` and `-C linker-flavor=llbc`, which generates PTX.
The necessary components for this workflow are:

- `rustup toolchain add nightly`
- `rustup component add llvm-tools --toolchain nightly`
- `rustup component add llvm-bitcode-linker --toolchain nightly`

There are two options for using the core library:

- `rustup component add rust-src --toolchain nightly` and build using `-Z build-std=core`.
- `rustup target add nvptx64-nvidia-cuda --toolchain nightly`

### Target and features

It is necessary to specify the target, such as `-C target-cpu=sm_89`. This implies two target features: `sm_89` and `ptx78` (and all preceding features within `sm_*` and `ptx*`). Rust will default to using the oldest PTX version that supports the target processor (see [this table](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#release-notes-ptx-release-history)), which maximizes driver compatibility.
One can use `-C target-feature=+ptx80` to choose a later PTX version without changing the target (the default `ptx78` requires CUDA driver version 11.8, while `ptx80` would require driver version 12.0).

Although `ptx*` is represented as a target feature, it is a compile-time property and it is not possible to build a crate that uses instructions not present in the PTX version specified at compile-time (either via `target-cpu` or `target-feature`).
For example, consider an unaligned barrier `barrier.sync`, which requires both `sm_70` and `ptx60`.
If one wants to support building for older devices (e.g., `-C target-cpu=sm_62`; ensuring that this unaligned barrier is unreachable at run-time on such devices), the relevant function could use attributes:
```
#[cfg(target_feature = "ptx60")]
#[target_feature(enable = "sm_70")]
```

## Building Rust kernels

A `no_std` crate containing one or more functions with `extern "ptx-kernel"` can be compiled to PTX using a command like the following.

```console
$ cargo +nightly rustc --target=nvptx64-nvidia-cuda -Zbuild-std=core --crate-type=cdylib -- -Clinker-flavor=llbc -Ctarget-cpu=sm_89 -Zunstable-options
```
Comment on lines +42 to +46
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are a few moving parts to complete kernels and how to use those on the host. That seems like too much for these docs, and would be better as a stand-alone repo or an example in cudarc. Let me know if you have other suggestions (or examples for other arches) that I should consider.

<!-- FIXME: fill this out

## Requirements
Expand Down
56 changes: 56 additions & 0 deletions tests/ui/check-cfg/target_feature.stderr
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,35 @@ LL | cfg!(target_feature = "_UNEXPECTED_VALUE");
`power9-altivec`
`power9-vector`
`prfchw`
`ptx32`
`ptx40`
`ptx41`
`ptx42`
`ptx43`
`ptx50`
`ptx60`
`ptx61`
`ptx62`
`ptx63`
`ptx64`
`ptx65`
`ptx70`
`ptx71`
`ptx72`
`ptx73`
`ptx74`
`ptx75`
`ptx76`
`ptx77`
`ptx78`
`ptx80`
`ptx81`
`ptx82`
`ptx83`
`ptx84`
`ptx85`
`ptx86`
`ptx87`
`quadword-atomics`
`rand`
`ras`
Expand All @@ -213,6 +242,33 @@ LL | cfg!(target_feature = "_UNEXPECTED_VALUE");
`simd128`
`sm3`
`sm4`
`sm_100`
`sm_100a`
`sm_101`
`sm_101a`
`sm_120`
`sm_120a`
`sm_20`
`sm_21`
`sm_30`
`sm_32`
`sm_35`
`sm_37`
`sm_50`
`sm_52`
`sm_53`
`sm_60`
`sm_61`
`sm_62`
`sm_70`
`sm_72`
`sm_75`
`sm_80`
`sm_86`
`sm_87`
`sm_89`
`sm_90`
`sm_90a`
`sme`
`sme-b16b16`
`sme-f16f16`
Expand Down
1 change: 1 addition & 0 deletions tests/ui/target-feature/gate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
// gate-test-arm_target_feature
// gate-test-hexagon_target_feature
// gate-test-mips_target_feature
// gate-test-nvptx_target_feature
// gate-test-wasm_target_feature
// gate-test-adx_target_feature
// gate-test-cmpxchg16b_target_feature
Expand Down
2 changes: 1 addition & 1 deletion tests/ui/target-feature/gate.stderr
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
error[E0658]: the target feature `x87` is currently unstable
--> $DIR/gate.rs:29:18
--> $DIR/gate.rs:30:18
|
LL | #[target_feature(enable = "x87")]
| ^^^^^^^^^^^^^^
Expand Down
28 changes: 28 additions & 0 deletions tests/ui/target-feature/implied-features-nvptx.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
//@ assembly-output: ptx-linker
//@ compile-flags: --crate-type cdylib -C target-cpu=sm_80 -Z unstable-options -Clinker-flavor=llbc
//@ only-nvptx64
//@ build-pass
#![no_std]
#![allow(dead_code)]

#[panic_handler]
pub fn panic(_info: &core::panic::PanicInfo) -> ! {
loop {}
}

// -Ctarget-cpu=sm_80 directly enables sm_80 and ptx70
#[cfg(not(all(target_feature = "sm_80", target_feature = "ptx70")))]
compile_error!("direct target features not enabled");

// -Ctarget-cpu=sm_80 implies all earlier sm_* and ptx* features.
#[cfg(not(all(
target_feature = "sm_60",
target_feature = "sm_70",
target_feature = "ptx50",
target_feature = "ptx60",
)))]
compile_error!("implied target features not enabled");

// -Ctarget-cpu=sm_80 implies all earlier sm_* and ptx* features.
#[cfg(target_feature = "ptx71")]
compile_error!("sm_80 requires only ptx70, but ptx71 enabled");
Loading