Skip to content

Commit f185093

Browse files
committed
postpone device side generation
1 parent e79f1fa commit f185093

File tree

1 file changed

+113
-0
lines changed

1 file changed

+113
-0
lines changed
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
use std::ffi::{CString, c_uint};
2+
3+
use llvm::Linkage::*;
4+
use rustc_codegen_ssa::back::write::CodegenContext;
5+
6+
use crate::llvm::{self, Linkage};
7+
use crate::{LlvmCodegenBackend, SimpleCx};
8+
9+
fn add_unnamed_global_in_addrspace<'ll>(
10+
cx: &SimpleCx<'ll>,
11+
name: &str,
12+
initializer: &'ll llvm::Value,
13+
l: Linkage,
14+
addrspace: u32,
15+
) -> &'ll llvm::Value {
16+
let llglobal = add_global_in_addrspace(cx, name, initializer, l, addrspace);
17+
unsafe { llvm::LLVMSetUnnamedAddress(llglobal, llvm::UnnamedAddr::Global) };
18+
llglobal
19+
}
20+
21+
pub(crate) fn add_global_in_addrspace<'ll>(
22+
cx: &SimpleCx<'ll>,
23+
name: &str,
24+
initializer: &'ll llvm::Value,
25+
l: Linkage,
26+
addrspace: u32,
27+
) -> &'ll llvm::Value {
28+
let c_name = CString::new(name).unwrap();
29+
let llglobal: &'ll llvm::Value = llvm::add_global_in_addrspace(
30+
cx.llmod,
31+
cx.val_ty(initializer),
32+
&c_name,
33+
addrspace as c_uint,
34+
);
35+
llvm::set_global_constant(llglobal, true);
36+
llvm::set_linkage(llglobal, l);
37+
llvm::set_initializer(llglobal, initializer);
38+
llglobal
39+
}
40+
41+
#[allow(unused)]
42+
pub(crate) fn gen_asdf<'ll>(cgcx: &CodegenContext<LlvmCodegenBackend>, _old_cx: &SimpleCx<'ll>) {
43+
let llcx = unsafe { llvm::LLVMRustContextCreate(false) };
44+
let module_name = CString::new("offload.wrapper.module").unwrap();
45+
let llmod = unsafe { llvm::LLVMModuleCreateWithNameInContext(module_name.as_ptr(), llcx) };
46+
let cx = SimpleCx::new(llmod, llcx, cgcx.pointer_size);
47+
let initializer = cx.get_const_i32(0);
48+
add_unnamed_global_in_addrspace(&cx, "__omp_rtl_debug_kind", initializer, WeakODRLinkage, 1);
49+
add_unnamed_global_in_addrspace(
50+
&cx,
51+
"__omp_rtl_assume_teams_oversubscription",
52+
initializer,
53+
WeakODRLinkage,
54+
1,
55+
);
56+
add_unnamed_global_in_addrspace(
57+
&cx,
58+
"__omp_rtl_assume_threads_oversubscription",
59+
initializer,
60+
WeakODRLinkage,
61+
1,
62+
);
63+
add_unnamed_global_in_addrspace(
64+
&cx,
65+
"__omp_rtl_assume_no_thread_state",
66+
initializer,
67+
WeakODRLinkage,
68+
1,
69+
);
70+
add_unnamed_global_in_addrspace(
71+
&cx,
72+
"__oclc_ABI_version",
73+
cx.get_const_i32(500),
74+
WeakODRLinkage,
75+
4,
76+
);
77+
unsafe {
78+
llvm::LLVMPrintModuleToFile(
79+
llmod,
80+
CString::new("rustmagic-openmp-amdgcn-amd-amdhsa-gfx90a.ll").unwrap().as_ptr(),
81+
std::ptr::null_mut(),
82+
);
83+
84+
// Clean up
85+
llvm::LLVMDisposeModule(llmod);
86+
llvm::LLVMContextDispose(llcx);
87+
}
88+
// TODO: addressspace 1 or 4
89+
}
90+
// source_filename = "mem.cpp"
91+
// GPU: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
92+
// CPU: target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
93+
// target triple = "amdgcn-amd-amdhsa"
94+
//
95+
// @__omp_rtl_debug_kind = weak_odr hidden local_unnamed_addr addrspace(1) constant i32 0
96+
// @__omp_rtl_assume_teams_oversubscription = weak_odr hidden local_unnamed_addr addrspace(1) constant i32 0
97+
// @__omp_rtl_assume_threads_oversubscription = weak_odr hidden local_unnamed_addr addrspace(1) constant i32 0
98+
// @__omp_rtl_assume_no_thread_state = weak_odr hidden local_unnamed_addr addrspace(1) constant i32 0
99+
// @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden local_unnamed_addr addrspace(1) constant i32 0
100+
// @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
101+
//
102+
// !llvm.module.flags = !{!0, !1, !2, !3, !4}
103+
// !opencl.ocl.version = !{!5}
104+
// !llvm.ident = !{!6, !7}
105+
//
106+
// !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
107+
// !1 = !{i32 1, !"wchar_size", i32 4}
108+
// !2 = !{i32 7, !"openmp", i32 51}
109+
// !3 = !{i32 7, !"openmp-device", i32 51}
110+
// !4 = !{i32 8, !"PIC Level", i32 2}
111+
// !5 = !{i32 2, i32 0}
112+
// !6 = !{!"clang version 20.1.5-rust-1.89.0-nightly (https://github.com/rust-lang/llvm-project.git c1118fdbb3024157df7f4cfe765f2b0b4339e8a2)"}
113+
// !7 = !{!"AMD clang version 19.0.0git (https://github.com/RadeonOpenCompute/llvm-project roc-6.4.0 25133 c7fe45cf4b819c5991fe208aaa96edf142730f1d)"}

0 commit comments

Comments
 (0)