@@ -15,56 +15,24 @@ pub(crate) fn handle_gpu_code<'ll>(
15
15
_cgcx : & CodegenContext < LlvmCodegenBackend > ,
16
16
cx : & ' ll SimpleCx < ' _ > ,
17
17
) {
18
- let ( offload_entry_ty, at_one, begin, update, end, tgt_bin_desc, fn_ty) = gen_globals ( & cx) ;
19
-
20
18
let mut o_types = vec ! [ ] ;
21
19
let mut kernels = vec ! [ ] ;
20
+ let offload_entry_ty = add_tgt_offload_entry ( & cx) ;
22
21
for num in 0 ..9 {
23
22
let kernel = cx. get_function ( & format ! ( "kernel_{num}" ) ) ;
24
23
if let Some ( kernel) = kernel {
25
24
o_types. push ( gen_define_handling ( & cx, kernel, offload_entry_ty, num) ) ;
26
25
kernels. push ( kernel) ;
27
26
}
28
27
}
29
- gen_call_handling ( & cx, & kernels, at_one, begin, update, end, tgt_bin_desc, fn_ty, & o_types) ;
30
- }
31
28
32
- // The meaning of the __tgt_offload_entry (as per llvm docs) is
33
- // Type, Identifier, Description
34
- // void*, addr, Address of global symbol within device image (function or global)
35
- // char*, name, Name of the symbol
36
- // size_t, size, Size of the entry info (0 if it is a function)
37
- // int32_t, flags, Flags associated with the entry (see Target Region Entry Flags)
38
- // int32_t, reserved, Reserved, to be used by the runtime library.
39
- pub ( crate ) fn add_tgt_offload_entry < ' ll > ( cx : & ' ll SimpleCx < ' _ > ) -> & ' ll llvm:: Type {
40
- let offload_entry_ty = cx. type_named_struct ( "struct.__tgt_offload_entry" ) ;
41
- let tptr = cx. type_ptr ( ) ;
42
- let ti64 = cx. type_i64 ( ) ;
43
- let ti32 = cx. type_i32 ( ) ;
44
- let ti16 = cx. type_i16 ( ) ;
45
- let entry_elements = vec ! [ ti64, ti16, ti16, ti32, tptr, tptr, ti64, ti64, tptr] ;
46
- cx. set_struct_body ( offload_entry_ty, & entry_elements, false ) ;
47
- offload_entry_ty
29
+ gen_call_handling ( & cx, & kernels, & o_types) ;
48
30
}
49
31
50
- fn gen_globals < ' ll > (
51
- cx : & ' ll SimpleCx < ' _ > ,
52
- ) -> (
53
- & ' ll llvm:: Type ,
54
- & ' ll llvm:: Value ,
55
- & ' ll llvm:: Value ,
56
- & ' ll llvm:: Value ,
57
- & ' ll llvm:: Value ,
58
- & ' ll llvm:: Type ,
59
- & ' ll llvm:: Type ,
60
- ) {
61
- let offload_entry_ty = add_tgt_offload_entry ( & cx) ;
62
- let kernel_arguments_ty = cx. type_named_struct ( "struct.__tgt_kernel_arguments" ) ;
63
- let tptr = cx. type_ptr ( ) ;
64
- let ti64 = cx. type_i64 ( ) ;
65
- let ti32 = cx. type_i32 ( ) ;
66
- let tarr = cx. type_array ( ti32, 3 ) ;
67
-
32
+ // What is our @1 here? A magic global, used in our data_{begin/update/end}_mapper:
33
+ // @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
34
+ // @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8
35
+ fn generate_at_one < ' ll > ( cx : & ' ll SimpleCx < ' _ > ) -> & ' ll llvm:: Value {
68
36
// @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
69
37
let unknown_txt = ";unknown;unknown;0;0;;" ;
70
38
let c_entry_name = CString :: new ( unknown_txt) . unwrap ( ) ;
@@ -87,11 +55,33 @@ fn gen_globals<'ll>(
87
55
cx. set_struct_body ( struct_ident_ty, & struct_elems_ty, false ) ;
88
56
let at_one = add_unnamed_global ( & cx, & "" , initializer, PrivateLinkage ) ;
89
57
llvm:: set_alignment ( at_one, Align :: EIGHT ) ;
58
+ at_one
59
+ }
90
60
91
- // %struct.__tgt_bin_desc = type { i32, ptr, ptr, ptr }
92
- let tgt_bin_desc_ty = vec ! [ ti32, tptr, tptr, tptr] ;
93
- let tgt_bin_desc_name = cx. type_named_struct ( "struct.__tgt_bin_desc" ) ;
94
- cx. set_struct_body ( tgt_bin_desc_name, & tgt_bin_desc_ty, false ) ;
61
+ // The meaning of the __tgt_offload_entry (as per llvm docs) is
62
+ // Type, Identifier, Description
63
+ // void*, addr, Address of global symbol within device image (function or global)
64
+ // char*, name, Name of the symbol
65
+ // size_t, size, Size of the entry info (0 if it is a function)
66
+ // int32_t, flags, Flags associated with the entry (see Target Region Entry Flags)
67
+ // int32_t, reserved, Reserved, to be used by the runtime library.
68
+ pub ( crate ) fn add_tgt_offload_entry < ' ll > ( cx : & ' ll SimpleCx < ' _ > ) -> & ' ll llvm:: Type {
69
+ let offload_entry_ty = cx. type_named_struct ( "struct.__tgt_offload_entry" ) ;
70
+ let tptr = cx. type_ptr ( ) ;
71
+ let ti64 = cx. type_i64 ( ) ;
72
+ let ti32 = cx. type_i32 ( ) ;
73
+ let ti16 = cx. type_i16 ( ) ;
74
+ let entry_elements = vec ! [ ti64, ti16, ti16, ti32, tptr, tptr, ti64, ti64, tptr] ;
75
+ cx. set_struct_body ( offload_entry_ty, & entry_elements, false ) ;
76
+ offload_entry_ty
77
+ }
78
+
79
+ fn gen_tgt_kernel_global < ' ll > ( cx : & ' ll SimpleCx < ' _ > ) {
80
+ let kernel_arguments_ty = cx. type_named_struct ( "struct.__tgt_kernel_arguments" ) ;
81
+ let tptr = cx. type_ptr ( ) ;
82
+ let ti64 = cx. type_i64 ( ) ;
83
+ let ti32 = cx. type_i32 ( ) ;
84
+ let tarr = cx. type_array ( ti32, 3 ) ;
95
85
96
86
// For each kernel to run on the gpu, we will later generate one entry of this type.
97
87
// coppied from LLVM
@@ -111,47 +101,32 @@ fn gen_globals<'ll>(
111
101
112
102
cx. set_struct_body ( kernel_arguments_ty, & kernel_elements, false ) ;
113
103
// For now we don't handle kernels, so for now we just add a global dummy
114
- // to make sure that the __tgt_offload_entrr is defined and handled correctly.
104
+ // to make sure that the __tgt_offload_entry is defined and handled correctly.
115
105
cx. declare_global ( "my_struct_global2" , kernel_arguments_ty) ;
106
+ }
107
+
108
+ fn gen_tgt_data_mappers < ' ll > (
109
+ cx : & ' ll SimpleCx < ' _ > ,
110
+ ) -> ( & ' ll llvm:: Value , & ' ll llvm:: Value , & ' ll llvm:: Value , & ' ll llvm:: Type ) {
111
+ let tptr = cx. type_ptr ( ) ;
112
+ let ti64 = cx. type_i64 ( ) ;
113
+ let ti32 = cx. type_i32 ( ) ;
116
114
117
- // Move data to the gpu
118
- let mapper_begin = "__tgt_target_data_begin_mapper" ;
119
- // Update data on the gpu, currently not used.
120
- let mapper_update = String :: from ( "__tgt_target_data_update_mapper" ) ;
121
- // Move data from the GPU
122
- let mapper_end = String :: from ( "__tgt_target_data_end_mapper" ) ;
123
115
let args = vec ! [ tptr, ti64, ti32, tptr, tptr, tptr, tptr, tptr, tptr] ;
124
116
let mapper_fn_ty = cx. type_func ( & args, cx. type_void ( ) ) ;
125
- let foo = crate :: declare:: declare_simple_fn (
126
- & cx,
127
- & mapper_begin,
128
- llvm:: CallConv :: CCallConv ,
129
- llvm:: UnnamedAddr :: No ,
130
- llvm:: Visibility :: Default ,
131
- mapper_fn_ty,
132
- ) ;
133
- let bar = crate :: declare:: declare_simple_fn (
134
- & cx,
135
- & mapper_update,
136
- llvm:: CallConv :: CCallConv ,
137
- llvm:: UnnamedAddr :: No ,
138
- llvm:: Visibility :: Default ,
139
- mapper_fn_ty,
140
- ) ;
141
- let baz = crate :: declare:: declare_simple_fn (
142
- & cx,
143
- & mapper_end,
144
- llvm:: CallConv :: CCallConv ,
145
- llvm:: UnnamedAddr :: No ,
146
- llvm:: Visibility :: Default ,
147
- mapper_fn_ty,
148
- ) ;
117
+ let mapper_begin = "__tgt_target_data_begin_mapper" ;
118
+ let mapper_update = "__tgt_target_data_update_mapper" ;
119
+ let mapper_end = "__tgt_target_data_end_mapper" ;
120
+ let begin_mapper_decl = declare_offload_fn ( & cx, mapper_begin, mapper_fn_ty) ;
121
+ let update_mapper_decl = declare_offload_fn ( & cx, mapper_update, mapper_fn_ty) ;
122
+ let end_mapper_decl = declare_offload_fn ( & cx, mapper_end, mapper_fn_ty) ;
123
+
149
124
let nounwind = llvm:: AttributeKind :: NoUnwind . create_attr ( cx. llcx ) ;
150
- attributes:: apply_to_llfn ( foo , Function , & [ nounwind] ) ;
151
- attributes:: apply_to_llfn ( bar , Function , & [ nounwind] ) ;
152
- attributes:: apply_to_llfn ( baz , Function , & [ nounwind] ) ;
125
+ attributes:: apply_to_llfn ( begin_mapper_decl , Function , & [ nounwind] ) ;
126
+ attributes:: apply_to_llfn ( update_mapper_decl , Function , & [ nounwind] ) ;
127
+ attributes:: apply_to_llfn ( end_mapper_decl , Function , & [ nounwind] ) ;
153
128
154
- ( offload_entry_ty , at_one , foo , bar , baz , tgt_bin_desc_name , mapper_fn_ty)
129
+ ( begin_mapper_decl , update_mapper_decl , end_mapper_decl , mapper_fn_ty)
155
130
}
156
131
157
132
fn add_priv_unnamed_arr < ' ll > ( cx : & SimpleCx < ' ll > , name : & str , vals : & [ u64 ] ) -> & ' ll llvm:: Value {
@@ -223,10 +198,10 @@ fn gen_define_handling<'ll>(
223
198
224
199
let c_entry_name = CString :: new ( format ! ( "kernel_{num}" ) ) . unwrap ( ) ;
225
200
let c_val = c_entry_name. as_bytes_with_nul ( ) ;
226
- let foo = format ! ( ".offloading.entry_name.{num}" ) ;
201
+ let offload_entry_name = format ! ( ".offloading.entry_name.{num}" ) ;
227
202
228
203
let initializer = crate :: common:: bytes_in_context ( cx. llcx , c_val) ;
229
- let llglobal = add_unnamed_global ( & cx, & foo , initializer, InternalLinkage ) ;
204
+ let llglobal = add_unnamed_global ( & cx, & offload_entry_name , initializer, InternalLinkage ) ;
230
205
llvm:: set_alignment ( llglobal, Align :: ONE ) ;
231
206
let c_section_name = CString :: new ( ".llvm.rodata.offloading" ) . unwrap ( ) ;
232
207
llvm:: set_section ( llglobal, & c_section_name) ;
@@ -259,6 +234,21 @@ fn gen_define_handling<'ll>(
259
234
o_types
260
235
}
261
236
237
+ fn declare_offload_fn < ' ll > (
238
+ cx : & ' ll SimpleCx < ' _ > ,
239
+ name : & str ,
240
+ ty : & ' ll llvm:: Type ,
241
+ ) -> & ' ll llvm:: Value {
242
+ crate :: declare:: declare_simple_fn (
243
+ cx,
244
+ name,
245
+ llvm:: CallConv :: CCallConv ,
246
+ llvm:: UnnamedAddr :: No ,
247
+ llvm:: Visibility :: Default ,
248
+ ty,
249
+ )
250
+ }
251
+
262
252
// For each kernel *call*, we now use some of our previous declared globals to move data to and from
263
253
// the gpu. We don't have a proper frontend yet, so we assume that every call to a kernel function
264
254
// from main is intended to run on the GPU. For now, we only handle the data transfer part of it.
@@ -282,14 +272,18 @@ fn gen_define_handling<'ll>(
282
272
fn gen_call_handling < ' ll > (
283
273
cx : & ' ll SimpleCx < ' _ > ,
284
274
_kernels : & [ & ' ll llvm:: Value ] ,
285
- s_ident_t : & ' ll llvm:: Value ,
286
- begin : & ' ll llvm:: Value ,
287
- _update : & ' ll llvm:: Value ,
288
- end : & ' ll llvm:: Value ,
289
- tgt_bin_desc : & ' ll llvm:: Type ,
290
- fn_ty : & ' ll llvm:: Type ,
291
275
o_types : & [ & ' ll llvm:: Value ] ,
292
276
) {
277
+ // %struct.__tgt_bin_desc = type { i32, ptr, ptr, ptr }
278
+ let tptr = cx. type_ptr ( ) ;
279
+ let ti32 = cx. type_i32 ( ) ;
280
+ let tgt_bin_desc_ty = vec ! [ ti32, tptr, tptr, tptr] ;
281
+ let tgt_bin_desc = cx. type_named_struct ( "struct.__tgt_bin_desc" ) ;
282
+ cx. set_struct_body ( tgt_bin_desc, & tgt_bin_desc_ty, false ) ;
283
+
284
+ gen_tgt_kernel_global ( & cx) ;
285
+ let ( begin_mapper_decl, _, end_mapper_decl, fn_ty) = gen_tgt_data_mappers ( & cx) ;
286
+
293
287
let main_fn = cx. get_function ( "main" ) ;
294
288
if let Some ( main_fn) = main_fn {
295
289
let kernel_name = "kernel_1" ;
@@ -351,39 +345,17 @@ fn gen_call_handling<'ll>(
351
345
Align :: from_bytes ( 8 ) . unwrap ( ) ,
352
346
) ;
353
347
354
- let tptr = cx. type_ptr ( ) ;
355
- let mapper_fn_ty = cx. type_func ( & [ tptr] , cx. type_void ( ) ) ;
356
- let foo = crate :: declare:: declare_simple_fn (
357
- & cx,
358
- & "__tgt_register_lib" ,
359
- llvm:: CallConv :: CCallConv ,
360
- llvm:: UnnamedAddr :: No ,
361
- llvm:: Visibility :: Default ,
362
- mapper_fn_ty,
363
- ) ;
364
- let bar = crate :: declare:: declare_simple_fn (
365
- & cx,
366
- & "__tgt_unregister_lib" ,
367
- llvm:: CallConv :: CCallConv ,
368
- llvm:: UnnamedAddr :: No ,
369
- llvm:: Visibility :: Default ,
370
- mapper_fn_ty,
371
- ) ;
348
+ let mapper_fn_ty = cx. type_func ( & [ cx. type_ptr ( ) ] , cx. type_void ( ) ) ;
349
+ let register_lib_decl = declare_offload_fn ( & cx, "__tgt_register_lib" , mapper_fn_ty) ;
350
+ let unregister_lib_decl = declare_offload_fn ( & cx, "__tgt_unregister_lib" , mapper_fn_ty) ;
372
351
let init_ty = cx. type_func ( & [ ] , cx. type_void ( ) ) ;
373
- let baz = crate :: declare:: declare_simple_fn (
374
- & cx,
375
- & "__tgt_init_all_rtls" ,
376
- llvm:: CallConv :: CCallConv ,
377
- llvm:: UnnamedAddr :: No ,
378
- llvm:: Visibility :: Default ,
379
- init_ty,
380
- ) ;
381
-
382
- builder. call ( mapper_fn_ty, foo, & [ tgt_bin_desc_alloca] , None ) ;
383
- builder. call ( init_ty, baz, & [ ] , None ) ;
352
+ let init_rtls_decl = declare_offload_fn ( cx, "__tgt_init_all_rtls" , init_ty) ;
384
353
385
354
// call void @__tgt_register_lib(ptr noundef %6)
355
+ builder. call ( mapper_fn_ty, register_lib_decl, & [ tgt_bin_desc_alloca] , None ) ;
386
356
// call void @__tgt_init_all_rtls()
357
+ builder. call ( init_ty, init_rtls_decl, & [ ] , None ) ;
358
+
387
359
for i in 0 ..num_args {
388
360
let idx = cx. get_const_i32 ( i) ;
389
361
let gep1 = builder. inbounds_gep ( ty, a1, & [ i32_0, idx] ) ;
@@ -401,6 +373,7 @@ fn gen_call_handling<'ll>(
401
373
402
374
let nullptr = cx. const_null ( cx. type_ptr ( ) ) ;
403
375
let o_type = o_types[ 0 ] ;
376
+ let s_ident_t = generate_at_one ( & cx) ;
404
377
let args = vec ! [
405
378
s_ident_t,
406
379
cx. get_const_i64( u64 :: MAX ) ,
@@ -412,7 +385,7 @@ fn gen_call_handling<'ll>(
412
385
nullptr,
413
386
nullptr,
414
387
] ;
415
- builder. call ( fn_ty, begin , & args, None ) ;
388
+ builder. call ( fn_ty, begin_mapper_decl , & args, None ) ;
416
389
417
390
// Step 4)
418
391
unsafe { llvm:: LLVMRustPositionAfter ( builder. llbuilder , kernel_call) } ;
@@ -434,14 +407,13 @@ fn gen_call_handling<'ll>(
434
407
nullptr,
435
408
nullptr,
436
409
] ;
437
- builder. call ( fn_ty, end , & args, None ) ;
438
- builder. call ( mapper_fn_ty, bar , & [ tgt_bin_desc_alloca] , None ) ;
410
+ builder. call ( fn_ty, end_mapper_decl , & args, None ) ;
411
+ builder. call ( mapper_fn_ty, unregister_lib_decl , & [ tgt_bin_desc_alloca] , None ) ;
439
412
413
+ // With this we generated the following begin and end mappers. We could easily generate the
414
+ // update mapper in an update.
440
415
// call void @__tgt_target_data_begin_mapper(ptr @1, i64 -1, i32 3, ptr %27, ptr %28, ptr %29, ptr @.offload_maptypes, ptr null, ptr null)
441
416
// call void @__tgt_target_data_update_mapper(ptr @1, i64 -1, i32 2, ptr %46, ptr %47, ptr %48, ptr @.offload_maptypes.1, ptr null, ptr null)
442
417
// call void @__tgt_target_data_end_mapper(ptr @1, i64 -1, i32 3, ptr %49, ptr %50, ptr %51, ptr @.offload_maptypes, ptr null, ptr null)
443
- // What is @1? Random but fixed:
444
- // @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
445
- // @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8
446
418
}
447
419
}
0 commit comments