@@ -265,18 +265,9 @@ static void convertToParamAS(Use *OldUse, Value *Param, bool HasCvtaParam,
265
265
if (HasCvtaParam) {
266
266
auto GetParamAddrCastToGeneric =
267
267
[](Value *Addr, Instruction *OriginalUser) -> Value * {
268
- PointerType *ReturnTy =
269
- PointerType::get (OriginalUser->getContext (), ADDRESS_SPACE_GENERIC);
270
- Function *CvtToGen = Intrinsic::getOrInsertDeclaration (
271
- OriginalUser->getModule (), Intrinsic::nvvm_ptr_param_to_gen,
272
- {ReturnTy, PointerType::get (OriginalUser->getContext (),
273
- ADDRESS_SPACE_PARAM)});
274
-
275
- // Cast param address to generic address space
276
- Value *CvtToGenCall =
277
- CallInst::Create (CvtToGen, Addr, Addr->getName () + " .gen" ,
278
- OriginalUser->getIterator ());
279
- return CvtToGenCall;
268
+ IRBuilder<> IRB (OriginalUser);
269
+ Type *GenTy = IRB.getPtrTy (ADDRESS_SPACE_GENERIC);
270
+ return IRB.CreateAddrSpaceCast (Addr, GenTy, Addr->getName () + " .gen" );
280
271
};
281
272
auto *ParamInGenericAS =
282
273
GetParamAddrCastToGeneric (I.NewParam , I.OldInstruction );
@@ -515,33 +506,34 @@ void copyByValParam(Function &F, Argument &Arg) {
515
506
BasicBlock::iterator FirstInst = F.getEntryBlock ().begin ();
516
507
Type *StructType = Arg.getParamByValType ();
517
508
const DataLayout &DL = F.getDataLayout ();
518
- AllocaInst *AllocA = new AllocaInst (StructType, DL. getAllocaAddrSpace (),
519
- Arg.getName (), FirstInst );
509
+ IRBuilder<> IRB (&*FirstInst);
510
+ AllocaInst *AllocA = IRB. CreateAlloca (StructType, nullptr , Arg.getName ());
520
511
// Set the alignment to alignment of the byval parameter. This is because,
521
512
// later load/stores assume that alignment, and we are going to replace
522
513
// the use of the byval parameter with this alloca instruction.
523
- AllocA->setAlignment (F. getParamAlign (Arg. getArgNo ())
524
- .value_or (DL.getPrefTypeAlign (StructType)));
514
+ AllocA->setAlignment (
515
+ Arg. getParamAlign () .value_or (DL.getPrefTypeAlign (StructType)));
525
516
Arg.replaceAllUsesWith (AllocA);
526
517
527
- Value *ArgInParam = new AddrSpaceCastInst (
528
- &Arg, PointerType::get (Arg.getContext (), ADDRESS_SPACE_PARAM),
529
- Arg.getName (), FirstInst);
518
+ Value *ArgInParam =
519
+ IRB.CreateIntrinsic (Intrinsic::nvvm_internal_addrspace_wrap,
520
+ {IRB.getPtrTy (ADDRESS_SPACE_PARAM), Arg.getType ()},
521
+ &Arg, {}, Arg.getName ());
522
+
530
523
// Be sure to propagate alignment to this load; LLVM doesn't know that NVPTX
531
524
// addrspacecast preserves alignment. Since params are constant, this load
532
525
// is definitely not volatile.
533
526
const auto ArgSize = *AllocA->getAllocationSize (DL);
534
- IRBuilder<> IRB (&*FirstInst);
535
527
IRB.CreateMemCpy (AllocA, AllocA->getAlign (), ArgInParam, AllocA->getAlign (),
536
528
ArgSize);
537
529
}
538
530
} // namespace
539
531
540
532
static void handleByValParam (const NVPTXTargetMachine &TM, Argument *Arg) {
541
533
Function *Func = Arg->getParent ();
542
- bool HasCvtaParam =
543
- TM.getSubtargetImpl (*Func)->hasCvtaParam () && isKernelFunction (*Func );
544
- bool IsGridConstant = HasCvtaParam && isParamGridConstant (*Arg);
534
+ assert ( isKernelFunction (*Func));
535
+ const bool HasCvtaParam = TM.getSubtargetImpl (*Func)->hasCvtaParam ();
536
+ const bool IsGridConstant = HasCvtaParam && isParamGridConstant (*Arg);
545
537
const DataLayout &DL = Func->getDataLayout ();
546
538
BasicBlock::iterator FirstInst = Func->getEntryBlock ().begin ();
547
539
Type *StructType = Arg->getParamByValType ();
@@ -556,9 +548,11 @@ static void handleByValParam(const NVPTXTargetMachine &TM, Argument *Arg) {
556
548
// skip creation of a local copy of the argument.
557
549
SmallVector<Use *, 16 > UsesToUpdate (llvm::make_pointer_range (Arg->uses ()));
558
550
559
- Value *ArgInParamAS = new AddrSpaceCastInst (
560
- Arg, PointerType::get (StructType->getContext (), ADDRESS_SPACE_PARAM),
561
- Arg->getName (), FirstInst);
551
+ IRBuilder<> IRB (&*FirstInst);
552
+ Value *ArgInParamAS = IRB.CreateIntrinsic (
553
+ Intrinsic::nvvm_internal_addrspace_wrap,
554
+ {IRB.getPtrTy (ADDRESS_SPACE_PARAM), Arg->getType ()}, {Arg});
555
+
562
556
for (Use *U : UsesToUpdate)
563
557
convertToParamAS (U, ArgInParamAS, HasCvtaParam, IsGridConstant);
564
558
LLVM_DEBUG (dbgs () << " No need to copy or cast " << *Arg << " \n " );
@@ -576,30 +570,31 @@ static void handleByValParam(const NVPTXTargetMachine &TM, Argument *Arg) {
576
570
// However, we're still not allowed to write to it. If the user specified
577
571
// `__grid_constant__` for the argument, we'll consider escaped pointer as
578
572
// read-only.
579
- if (HasCvtaParam && ( ArgUseIsReadOnly || IsGridConstant )) {
573
+ if (IsGridConstant || ( HasCvtaParam && ArgUseIsReadOnly)) {
580
574
LLVM_DEBUG (dbgs () << " Using non-copy pointer to " << *Arg << " \n " );
581
575
// Replace all argument pointer uses (which might include a device function
582
576
// call) with a cast to the generic address space using cvta.param
583
577
// instruction, which avoids a local copy.
584
578
IRBuilder<> IRB (&Func->getEntryBlock ().front ());
585
579
586
- // Cast argument to param address space
587
- auto *CastToParam = cast<AddrSpaceCastInst>(IRB.CreateAddrSpaceCast (
588
- Arg, IRB.getPtrTy (ADDRESS_SPACE_PARAM), Arg->getName () + " .param" ));
580
+ // Cast argument to param address space. Because the backend will emit the
581
+ // argument already in the param address space, we need to use the noop
582
+ // intrinsic, this had the added benefit of preventing other optimizations
583
+ // from folding away this pair of addrspacecasts.
584
+ auto *ParamSpaceArg =
585
+ IRB.CreateIntrinsic (Intrinsic::nvvm_internal_addrspace_wrap,
586
+ {IRB.getPtrTy (ADDRESS_SPACE_PARAM), Arg->getType ()},
587
+ Arg, {}, Arg->getName () + " .param" );
589
588
590
- // Cast param address to generic address space. We do not use an
591
- // addrspacecast to generic here, because, LLVM considers `Arg` to be in the
592
- // generic address space, and a `generic -> param` cast followed by a `param
593
- // -> generic` cast will be folded away. The `param -> generic` intrinsic
594
- // will be correctly lowered to `cvta.param`.
595
- Value *CvtToGenCall = IRB.CreateIntrinsic (
596
- IRB.getPtrTy (ADDRESS_SPACE_GENERIC), Intrinsic::nvvm_ptr_param_to_gen,
597
- CastToParam, nullptr , CastToParam->getName () + " .gen" );
589
+ // Cast param address to generic address space.
590
+ Value *GenericArg = IRB.CreateAddrSpaceCast (
591
+ ParamSpaceArg, IRB.getPtrTy (ADDRESS_SPACE_GENERIC),
592
+ Arg->getName () + " .gen" );
598
593
599
- Arg->replaceAllUsesWith (CvtToGenCall );
594
+ Arg->replaceAllUsesWith (GenericArg );
600
595
601
596
// Do not replace Arg in the cast to param space
602
- CastToParam ->setOperand (0 , Arg);
597
+ ParamSpaceArg ->setOperand (0 , Arg);
603
598
} else
604
599
copyByValParam (*Func, *Arg);
605
600
}
@@ -713,12 +708,14 @@ static bool copyFunctionByValArgs(Function &F) {
713
708
LLVM_DEBUG (dbgs () << " Creating a copy of byval args of " << F.getName ()
714
709
<< " \n " );
715
710
bool Changed = false ;
716
- for (Argument &Arg : F.args ())
717
- if (Arg.getType ()->isPointerTy () && Arg.hasByValAttr () &&
718
- !(isParamGridConstant (Arg) && isKernelFunction (F))) {
719
- copyByValParam (F, Arg);
720
- Changed = true ;
721
- }
711
+ if (isKernelFunction (F)) {
712
+ for (Argument &Arg : F.args ())
713
+ if (Arg.getType ()->isPointerTy () && Arg.hasByValAttr () &&
714
+ !isParamGridConstant (Arg)) {
715
+ copyByValParam (F, Arg);
716
+ Changed = true ;
717
+ }
718
+ }
722
719
return Changed;
723
720
}
724
721
0 commit comments