@@ -242,6 +242,7 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num,
242
242
// Address of pointer on the host and device, respectively.
243
243
void *Pointer_HstPtrBegin, *Pointer_TgtPtrBegin;
244
244
bool IsNew, Pointer_IsNew;
245
+ bool IsHostPtr = false ;
245
246
bool IsImplicit = arg_types[i] & OMP_TGT_MAPTYPE_IMPLICIT;
246
247
// UpdateRef is based on MEMBER_OF instead of TARGET_PARAM because if we
247
248
// have reached this point via __tgt_target_data_begin and not __tgt_target
@@ -253,7 +254,7 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num,
253
254
DP (" Has a pointer entry: \n " );
254
255
// base is address of pointer.
255
256
Pointer_TgtPtrBegin = Device.getOrAllocTgtPtr (HstPtrBase, HstPtrBase,
256
- sizeof (void *), Pointer_IsNew, IsImplicit, UpdateRef);
257
+ sizeof (void *), Pointer_IsNew, IsHostPtr, IsImplicit, UpdateRef);
257
258
if (!Pointer_TgtPtrBegin) {
258
259
DP (" Call to getOrAllocTgtPtr returned null pointer (device failure or "
259
260
" illegal mapping).\n " );
@@ -269,7 +270,7 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num,
269
270
}
270
271
271
272
void *TgtPtrBegin = Device.getOrAllocTgtPtr (HstPtrBegin, HstPtrBase,
272
- data_size, IsNew, IsImplicit, UpdateRef);
273
+ data_size, IsNew, IsHostPtr, IsImplicit, UpdateRef);
273
274
if (!TgtPtrBegin && data_size) {
274
275
// If data_size==0, then the argument could be a zero-length pointer to
275
276
// NULL, so getOrAlloc() returning NULL is not an error.
@@ -289,19 +290,21 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num,
289
290
290
291
if (arg_types[i] & OMP_TGT_MAPTYPE_TO) {
291
292
bool copy = false ;
292
- if (IsNew || (arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS)) {
293
- copy = true ;
294
- } else if (arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) {
295
- // Copy data only if the "parent" struct has RefCount==1.
296
- int32_t parent_idx = member_of (arg_types[i]);
297
- long parent_rc = Device.getMapEntryRefCnt (args[parent_idx]);
298
- assert (parent_rc > 0 && " parent struct not found" );
299
- if (parent_rc == 1 ) {
293
+ if (!(Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)) {
294
+ if (IsNew || (arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS)) {
300
295
copy = true ;
296
+ } else if (arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) {
297
+ // Copy data only if the "parent" struct has RefCount==1.
298
+ int32_t parent_idx = member_of (arg_types[i]);
299
+ long parent_rc = Device.getMapEntryRefCnt (args[parent_idx]);
300
+ assert (parent_rc > 0 && " parent struct not found" );
301
+ if (parent_rc == 1 ) {
302
+ copy = true ;
303
+ }
301
304
}
302
305
}
303
306
304
- if (copy) {
307
+ if (copy && !IsHostPtr ) {
305
308
DP (" Moving %" PRId64 " bytes (hst:" DPxMOD " ) -> (tgt:" DPxMOD " )\n " ,
306
309
data_size, DPxPTR (HstPtrBegin), DPxPTR (TgtPtrBegin));
307
310
int rt = Device.data_submit (TgtPtrBegin, HstPtrBegin, data_size);
@@ -312,7 +315,7 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num,
312
315
}
313
316
}
314
317
315
- if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
318
+ if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ && !IsHostPtr ) {
316
319
DP (" Update pointer (" DPxMOD " ) -> [" DPxMOD " ]\n " ,
317
320
DPxPTR (Pointer_TgtPtrBegin), DPxPTR (TgtPtrBegin));
318
321
uint64_t Delta = (uint64_t )HstPtrBegin - (uint64_t )HstPtrBase;
@@ -363,14 +366,14 @@ int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
363
366
}
364
367
}
365
368
366
- bool IsLast;
369
+ bool IsLast, IsHostPtr ;
367
370
bool UpdateRef = !(arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) ||
368
371
(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ);
369
372
bool ForceDelete = arg_types[i] & OMP_TGT_MAPTYPE_DELETE;
370
373
371
374
// If PTR_AND_OBJ, HstPtrBegin is address of pointee
372
375
void *TgtPtrBegin = Device.getTgtPtrBegin (HstPtrBegin, data_size, IsLast,
373
- UpdateRef);
376
+ UpdateRef, IsHostPtr );
374
377
DP (" There are %" PRId64 " bytes allocated at target address " DPxMOD
375
378
" - is%s last\n " , data_size, DPxPTR (TgtPtrBegin),
376
379
(IsLast ? " " : " not" ));
@@ -387,18 +390,22 @@ int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
387
390
if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) {
388
391
bool Always = arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS;
389
392
bool CopyMember = false ;
390
- if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) &&
391
- !(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) {
392
- // Copy data only if the "parent" struct has RefCount==1.
393
- int32_t parent_idx = member_of (arg_types[i]);
394
- long parent_rc = Device.getMapEntryRefCnt (args[parent_idx]);
395
- assert (parent_rc > 0 && " parent struct not found" );
396
- if (parent_rc == 1 ) {
397
- CopyMember = true ;
393
+ if (!(Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)) {
394
+ if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) &&
395
+ !(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) {
396
+ // Copy data only if the "parent" struct has RefCount==1.
397
+ int32_t parent_idx = member_of (arg_types[i]);
398
+ long parent_rc = Device.getMapEntryRefCnt (args[parent_idx]);
399
+ assert (parent_rc > 0 && " parent struct not found" );
400
+ if (parent_rc == 1 ) {
401
+ CopyMember = true ;
402
+ }
398
403
}
399
404
}
400
405
401
- if (DelEntry || Always || CopyMember) {
406
+ if ((DelEntry || Always || CopyMember) &&
407
+ !(Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
408
+ TgtPtrBegin == HstPtrBegin)) {
402
409
DP (" Moving %" PRId64 " bytes (tgt:" DPxMOD " ) -> (hst:" DPxMOD " )\n " ,
403
410
data_size, DPxPTR (TgtPtrBegin), DPxPTR (HstPtrBegin));
404
411
int rt = Device.data_retrieve (HstPtrBegin, TgtPtrBegin, data_size);
@@ -471,14 +478,21 @@ int target_data_update(DeviceTy &Device, int32_t arg_num,
471
478
472
479
void *HstPtrBegin = args[i];
473
480
int64_t MapSize = arg_sizes[i];
474
- bool IsLast;
481
+ bool IsLast, IsHostPtr ;
475
482
void *TgtPtrBegin = Device.getTgtPtrBegin (HstPtrBegin, MapSize, IsLast,
476
- false );
483
+ false , IsHostPtr );
477
484
if (!TgtPtrBegin) {
478
485
DP (" hst data:" DPxMOD " not found, becomes a noop\n " , DPxPTR (HstPtrBegin));
479
486
continue ;
480
487
}
481
488
489
+ if (Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
490
+ TgtPtrBegin == HstPtrBegin) {
491
+ DP (" hst data:" DPxMOD " unified and shared, becomes a noop\n " ,
492
+ DPxPTR (HstPtrBegin));
493
+ continue ;
494
+ }
495
+
482
496
if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) {
483
497
DP (" Moving %" PRId64 " bytes (tgt:" DPxMOD " ) -> (hst:" DPxMOD " )\n " ,
484
498
arg_sizes[i], DPxPTR (TgtPtrBegin), DPxPTR (HstPtrBegin));
@@ -514,6 +528,7 @@ int target_data_update(DeviceTy &Device, int32_t arg_num,
514
528
DP (" Copying data to device failed.\n " );
515
529
return OFFLOAD_FAIL;
516
530
}
531
+
517
532
uintptr_t lb = (uintptr_t ) HstPtrBegin;
518
533
uintptr_t ub = (uintptr_t ) HstPtrBegin + MapSize;
519
534
Device.ShadowMtx .lock ();
@@ -640,19 +655,26 @@ int target(int64_t device_id, void *host_ptr, int32_t arg_num,
640
655
void *HstPtrVal = args[i];
641
656
void *HstPtrBegin = args_base[i];
642
657
void *HstPtrBase = args[idx];
643
- bool IsLast; // unused.
658
+ bool IsLast, IsHostPtr ; // unused.
644
659
void *TgtPtrBase =
645
660
(void *)((intptr_t )tgt_args[tgtIdx] + tgt_offsets[tgtIdx]);
646
661
DP (" Parent lambda base " DPxMOD " \n " , DPxPTR (TgtPtrBase));
647
662
uint64_t Delta = (uint64_t )HstPtrBegin - (uint64_t )HstPtrBase;
648
663
void *TgtPtrBegin = (void *)((uintptr_t )TgtPtrBase + Delta);
649
664
void *Pointer_TgtPtrBegin =
650
- Device.getTgtPtrBegin (HstPtrVal, arg_sizes[i], IsLast, false );
665
+ Device.getTgtPtrBegin (HstPtrVal, arg_sizes[i], IsLast, false ,
666
+ IsHostPtr);
651
667
if (!Pointer_TgtPtrBegin) {
652
668
DP (" No lambda captured variable mapped (" DPxMOD " ) - ignored\n " ,
653
669
DPxPTR (HstPtrVal));
654
670
continue ;
655
671
}
672
+ if (Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
673
+ TgtPtrBegin == HstPtrBegin) {
674
+ DP (" Unified memory is active, no need to map lambda captured"
675
+ " variable (" DPxMOD " )\n " , DPxPTR (HstPtrVal));
676
+ continue ;
677
+ }
656
678
DP (" Update lambda reference (" DPxMOD " ) -> [" DPxMOD " ]\n " ,
657
679
DPxPTR (Pointer_TgtPtrBegin), DPxPTR (TgtPtrBegin));
658
680
int rt = Device.data_submit (TgtPtrBegin, &Pointer_TgtPtrBegin,
@@ -668,7 +690,7 @@ int target(int64_t device_id, void *host_ptr, int32_t arg_num,
668
690
void *HstPtrBase = args_base[i];
669
691
void *TgtPtrBegin;
670
692
ptrdiff_t TgtBaseOffset;
671
- bool IsLast; // unused.
693
+ bool IsLast, IsHostPtr ; // unused.
672
694
if (arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) {
673
695
DP (" Forwarding first-private value " DPxMOD " to the target construct\n " ,
674
696
DPxPTR (HstPtrBase));
@@ -705,14 +727,14 @@ int target(int64_t device_id, void *host_ptr, int32_t arg_num,
705
727
}
706
728
} else if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
707
729
TgtPtrBegin = Device.getTgtPtrBegin (HstPtrBase, sizeof (void *), IsLast,
708
- false );
730
+ false , IsHostPtr );
709
731
TgtBaseOffset = 0 ; // no offset for ptrs.
710
732
DP (" Obtained target argument " DPxMOD " from host pointer " DPxMOD " to "
711
733
" object " DPxMOD " \n " , DPxPTR (TgtPtrBegin), DPxPTR (HstPtrBase),
712
734
DPxPTR (HstPtrBase));
713
735
} else {
714
736
TgtPtrBegin = Device.getTgtPtrBegin (HstPtrBegin, arg_sizes[i], IsLast,
715
- false );
737
+ false , IsHostPtr );
716
738
TgtBaseOffset = (intptr_t )HstPtrBase - (intptr_t )HstPtrBegin;
717
739
#ifdef OMPTARGET_DEBUG
718
740
void *TgtPtrBase = (void *)((intptr_t )TgtPtrBegin + TgtBaseOffset);
0 commit comments