Skip to content

Commit 18d4928

Browse files
author
Jorge Aparicio
committed
PTX support
- `--emit=asm --target=nvptx64-nvidia-cuda` can be used to turn a crate into a PTX module (a `.s` file). - intrinsics like `__syncthreads` and `blockIdx.x` are exposed as `"platform-intrinsics"`. - "cabi" has been implemented for the nvptx and nvptx64 architectures. i.e. `extern "C"` works. - a new ABI, `"ptx-kernel"`. That can be used to generate "global" functions. Example: `extern "ptx-kernel" fn kernel() { .. }`. All other functions are "device" functions.
1 parent b7e5148 commit 18d4928

File tree

15 files changed

+344
-3
lines changed

15 files changed

+344
-3
lines changed

src/bootstrap/native.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ pub fn llvm(build: &Build, target: &str) {
8181
.profile(profile)
8282
.define("LLVM_ENABLE_ASSERTIONS", assertions)
8383
.define("LLVM_TARGETS_TO_BUILD",
84-
"X86;ARM;AArch64;Mips;PowerPC;SystemZ;JSBackend;MSP430;Sparc")
84+
"X86;ARM;AArch64;Mips;PowerPC;SystemZ;JSBackend;MSP430;Sparc;NVPTX")
8585
.define("LLVM_INCLUDE_EXAMPLES", "OFF")
8686
.define("LLVM_INCLUDE_TESTS", "OFF")
8787
.define("LLVM_INCLUDE_DOCS", "OFF")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{
2+
"intrinsic_prefix": "_",
3+
"llvm_prefix": "llvm.cuda.",
4+
"intrinsics": [
5+
{
6+
"intrinsic": "syncthreads",
7+
"width": ["0"],
8+
"llvm": "syncthreads",
9+
"ret": "V",
10+
"args": []
11+
}
12+
]
13+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{
2+
"platform": "nvptx",
3+
"number_info": {
4+
"signed": {}
5+
},
6+
"width_info": {}
7+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
{
2+
"intrinsic_prefix": "_",
3+
"llvm_prefix": "llvm.nvvm.read.ptx.sreg.",
4+
"intrinsics": [
5+
{
6+
"intrinsic": "block_dim_x",
7+
"width": ["0"],
8+
"llvm": "ntid.x",
9+
"ret": "S32",
10+
"args": []
11+
},
12+
{
13+
"intrinsic": "block_dim_y",
14+
"width": ["0"],
15+
"llvm": "ntid.y",
16+
"ret": "S32",
17+
"args": []
18+
},
19+
{
20+
"intrinsic": "block_dim_z",
21+
"width": ["0"],
22+
"llvm": "ntid.z",
23+
"ret": "S32",
24+
"args": []
25+
},
26+
{
27+
"intrinsic": "block_idx_x",
28+
"width": ["0"],
29+
"llvm": "ctaid.x",
30+
"ret": "S32",
31+
"args": []
32+
},
33+
{
34+
"intrinsic": "block_idx_y",
35+
"width": ["0"],
36+
"llvm": "ctaid.y",
37+
"ret": "S32",
38+
"args": []
39+
},
40+
{
41+
"intrinsic": "block_idx_z",
42+
"width": ["0"],
43+
"llvm": "ctaid.z",
44+
"ret": "S32",
45+
"args": []
46+
},
47+
{
48+
"intrinsic": "grid_dim_x",
49+
"width": ["0"],
50+
"llvm": "nctaid.x",
51+
"ret": "S32",
52+
"args": []
53+
},
54+
{
55+
"intrinsic": "grid_dim_y",
56+
"width": ["0"],
57+
"llvm": "nctaid.y",
58+
"ret": "S32",
59+
"args": []
60+
},
61+
{
62+
"intrinsic": "grid_dim_z",
63+
"width": ["0"],
64+
"llvm": "nctaid.z",
65+
"ret": "S32",
66+
"args": []
67+
},
68+
{
69+
"intrinsic": "thread_idx_x",
70+
"width": ["0"],
71+
"llvm": "tid.x",
72+
"ret": "S32",
73+
"args": []
74+
},
75+
{
76+
"intrinsic": "thread_idx_y",
77+
"width": ["0"],
78+
"llvm": "tid.y",
79+
"ret": "S32",
80+
"args": []
81+
},
82+
{
83+
"intrinsic": "thread_idx_z",
84+
"width": ["0"],
85+
"llvm": "tid.z",
86+
"ret": "S32",
87+
"args": []
88+
}
89+
]
90+
}

src/librustc_llvm/build.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ fn main() {
9696

9797
let optional_components =
9898
["x86", "arm", "aarch64", "mips", "powerpc", "pnacl", "systemz", "jsbackend", "msp430",
99-
"sparc"];
99+
"sparc", "nvptx"];
100100

101101
// FIXME: surely we don't need all these components, right? Stuff like mcjit
102102
// or interpreter the compiler itself never uses.

src/librustc_llvm/ffi.rs

+1
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ pub enum CallConv {
4242
X86StdcallCallConv = 64,
4343
X86FastcallCallConv = 65,
4444
ArmAapcsCallConv = 67,
45+
PtxKernel = 71,
4546
X86_64_SysV = 78,
4647
X86_64_Win64 = 79,
4748
X86_VectorCall = 80,

src/librustc_llvm/lib.rs

+5
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,11 @@ pub fn initialize_available_targets() {
377377
LLVMInitializeSparcTargetMC,
378378
LLVMInitializeSparcAsmPrinter,
379379
LLVMInitializeSparcAsmParser);
380+
init_target!(llvm_component = "nvptx",
381+
LLVMInitializeNVPTXTargetInfo,
382+
LLVMInitializeNVPTXTarget,
383+
LLVMInitializeNVPTXTargetMC,
384+
LLVMInitializeNVPTXAsmPrinter);
380385
}
381386

382387
pub fn last_error() -> Option<String> {

src/librustc_platform_intrinsics/lib.rs

+3
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ static VOID: Type = Type::Void;
9595
mod x86;
9696
mod arm;
9797
mod aarch64;
98+
mod nvptx;
9899

99100
impl Intrinsic {
100101
pub fn find(name: &str) -> Option<Intrinsic> {
@@ -104,6 +105,8 @@ impl Intrinsic {
104105
arm::find(name)
105106
} else if name.starts_with("aarch64_") {
106107
aarch64::find(name)
108+
} else if name.starts_with("nvptx_") {
109+
nvptx::find(name)
107110
} else {
108111
None
109112
}
+92
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
// DO NOT EDIT: autogenerated by etc/platform-intrinsics/generator.py
12+
// ignore-tidy-linelength
13+
14+
#![allow(unused_imports)]
15+
16+
use {Intrinsic, Type};
17+
use IntrinsicDef::Named;
18+
19+
// The default inlining settings trigger a pathological behaviour in
20+
// LLVM, which causes makes compilation very slow. See #28273.
21+
#[inline(never)]
22+
pub fn find(name: &str) -> Option<Intrinsic> {
23+
if !name.starts_with("nvptx") { return None }
24+
Some(match &name["nvptx".len()..] {
25+
"_syncthreads" => Intrinsic {
26+
inputs: { static INPUTS: [&'static Type; 0] = []; &INPUTS },
27+
output: &::VOID,
28+
definition: Named("llvm.cuda.syncthreads")
29+
},
30+
"_block_dim_x" => Intrinsic {
31+
inputs: { static INPUTS: [&'static Type; 0] = []; &INPUTS },
32+
output: &::I32,
33+
definition: Named("llvm.nvvm.read.ptx.sreg.ntid.x")
34+
},
35+
"_block_dim_y" => Intrinsic {
36+
inputs: { static INPUTS: [&'static Type; 0] = []; &INPUTS },
37+
output: &::I32,
38+
definition: Named("llvm.nvvm.read.ptx.sreg.ntid.y")
39+
},
40+
"_block_dim_z" => Intrinsic {
41+
inputs: { static INPUTS: [&'static Type; 0] = []; &INPUTS },
42+
output: &::I32,
43+
definition: Named("llvm.nvvm.read.ptx.sreg.ntid.z")
44+
},
45+
"_block_idx_x" => Intrinsic {
46+
inputs: { static INPUTS: [&'static Type; 0] = []; &INPUTS },
47+
output: &::I32,
48+
definition: Named("llvm.nvvm.read.ptx.sreg.ctaid.x")
49+
},
50+
"_block_idx_y" => Intrinsic {
51+
inputs: { static INPUTS: [&'static Type; 0] = []; &INPUTS },
52+
output: &::I32,
53+
definition: Named("llvm.nvvm.read.ptx.sreg.ctaid.y")
54+
},
55+
"_block_idx_z" => Intrinsic {
56+
inputs: { static INPUTS: [&'static Type; 0] = []; &INPUTS },
57+
output: &::I32,
58+
definition: Named("llvm.nvvm.read.ptx.sreg.ctaid.z")
59+
},
60+
"_grid_dim_x" => Intrinsic {
61+
inputs: { static INPUTS: [&'static Type; 0] = []; &INPUTS },
62+
output: &::I32,
63+
definition: Named("llvm.nvvm.read.ptx.sreg.nctaid.x")
64+
},
65+
"_grid_dim_y" => Intrinsic {
66+
inputs: { static INPUTS: [&'static Type; 0] = []; &INPUTS },
67+
output: &::I32,
68+
definition: Named("llvm.nvvm.read.ptx.sreg.nctaid.y")
69+
},
70+
"_grid_dim_z" => Intrinsic {
71+
inputs: { static INPUTS: [&'static Type; 0] = []; &INPUTS },
72+
output: &::I32,
73+
definition: Named("llvm.nvvm.read.ptx.sreg.nctaid.z")
74+
},
75+
"_thread_idx_x" => Intrinsic {
76+
inputs: { static INPUTS: [&'static Type; 0] = []; &INPUTS },
77+
output: &::I32,
78+
definition: Named("llvm.nvvm.read.ptx.sreg.tid.x")
79+
},
80+
"_thread_idx_y" => Intrinsic {
81+
inputs: { static INPUTS: [&'static Type; 0] = []; &INPUTS },
82+
output: &::I32,
83+
definition: Named("llvm.nvvm.read.ptx.sreg.tid.y")
84+
},
85+
"_thread_idx_z" => Intrinsic {
86+
inputs: { static INPUTS: [&'static Type; 0] = []; &INPUTS },
87+
output: &::I32,
88+
definition: Named("llvm.nvvm.read.ptx.sreg.tid.z")
89+
},
90+
_ => return None,
91+
})
92+
}

src/librustc_trans/abi.rs

+5
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ use cabi_mips64;
2525
use cabi_asmjs;
2626
use cabi_msp430;
2727
use cabi_sparc;
28+
use cabi_nvptx;
29+
use cabi_nvptx64;
2830
use machine::{llalign_of_min, llsize_of, llsize_of_alloc};
2931
use type_::Type;
3032
use type_of;
@@ -353,6 +355,7 @@ impl FnType {
353355
Win64 => llvm::X86_64_Win64,
354356
SysV64 => llvm::X86_64_SysV,
355357
Aapcs => llvm::ArmAapcsCallConv,
358+
PtxKernel => llvm::PtxKernel,
356359

357360
// These API constants ought to be more specific...
358361
Cdecl => llvm::CCallConv,
@@ -608,6 +611,8 @@ impl FnType {
608611
"wasm32" => cabi_asmjs::compute_abi_info(ccx, self),
609612
"msp430" => cabi_msp430::compute_abi_info(ccx, self),
610613
"sparc" => cabi_sparc::compute_abi_info(ccx, self),
614+
"nvptx" => cabi_nvptx::compute_abi_info(ccx, self),
615+
"nvptx64" => cabi_nvptx64::compute_abi_info(ccx, self),
611616
a => ccx.sess().fatal(&format!("unrecognized arch \"{}\" in target specification", a))
612617
}
613618

src/librustc_trans/cabi_nvptx.rs

+53
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
// Reference: PTX Writer's Guide to Interoperability
12+
// http://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability
13+
14+
#![allow(non_upper_case_globals)]
15+
16+
use llvm::Struct;
17+
18+
use abi::{self, ArgType, FnType};
19+
use context::CrateContext;
20+
use type_::Type;
21+
22+
fn ty_size(ty: Type) -> usize {
23+
abi::ty_size(ty, 4)
24+
}
25+
26+
fn classify_ret_ty(ccx: &CrateContext, ret: &mut ArgType) {
27+
if ret.ty.kind() == Struct && ty_size(ret.ty) > 32 {
28+
ret.make_indirect(ccx);
29+
} else {
30+
ret.extend_integer_width_to(32);
31+
}
32+
}
33+
34+
fn classify_arg_ty(ccx: &CrateContext, arg: &mut ArgType) {
35+
if arg.ty.kind() == Struct && ty_size(arg.ty) > 32 {
36+
arg.make_indirect(ccx);
37+
} else {
38+
arg.extend_integer_width_to(32);
39+
}
40+
}
41+
42+
pub fn compute_abi_info(ccx: &CrateContext, fty: &mut FnType) {
43+
if !fty.ret.is_ignore() {
44+
classify_ret_ty(ccx, &mut fty.ret);
45+
}
46+
47+
for arg in &mut fty.args {
48+
if arg.is_ignore() {
49+
continue;
50+
}
51+
classify_arg_ty(ccx, arg);
52+
}
53+
}

src/librustc_trans/cabi_nvptx64.rs

+53
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
// Reference: PTX Writer's Guide to Interoperability
12+
// http://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability
13+
14+
#![allow(non_upper_case_globals)]
15+
16+
use llvm::Struct;
17+
18+
use abi::{self, ArgType, FnType};
19+
use context::CrateContext;
20+
use type_::Type;
21+
22+
fn ty_size(ty: Type) -> usize {
23+
abi::ty_size(ty, 8)
24+
}
25+
26+
fn classify_ret_ty(ccx: &CrateContext, ret: &mut ArgType) {
27+
if ret.ty.kind() == Struct && ty_size(ret.ty) > 64 {
28+
ret.make_indirect(ccx);
29+
} else {
30+
ret.extend_integer_width_to(64);
31+
}
32+
}
33+
34+
fn classify_arg_ty(ccx: &CrateContext, arg: &mut ArgType) {
35+
if arg.ty.kind() == Struct && ty_size(arg.ty) > 64 {
36+
arg.make_indirect(ccx);
37+
} else {
38+
arg.extend_integer_width_to(64);
39+
}
40+
}
41+
42+
pub fn compute_abi_info(ccx: &CrateContext, fty: &mut FnType) {
43+
if !fty.ret.is_ignore() {
44+
classify_ret_ty(ccx, &mut fty.ret);
45+
}
46+
47+
for arg in &mut fty.args {
48+
if arg.is_ignore() {
49+
continue;
50+
}
51+
classify_arg_ty(ccx, arg);
52+
}
53+
}

0 commit comments

Comments
 (0)