@@ -260,6 +260,18 @@ class AMDGPUInformationCache : public InformationCache {
260260 return !HasAperture && (Access & ADDR_SPACE_CAST);
261261 }
262262
263+ bool checkConstForAddrSpaceCastFromPrivate (const Constant *C) {
264+ SmallPtrSet<const Constant *, 8 > Visited;
265+ uint8_t Access = getConstantAccess (C, Visited);
266+
267+ if (Access & ADDR_SPACE_CAST)
268+ if (const auto *CE = dyn_cast<ConstantExpr>(C))
269+ if (CE->getOperand (0 )->getType ()->getPointerAddressSpace () ==
270+ AMDGPUAS::PRIVATE_ADDRESS)
271+ return true ;
272+ return false ;
273+ }
274+
263275private:
264276 // / Used to determine if the Constant needs the queue pointer.
265277 DenseMap<const Constant *, uint8_t > ConstantStatus;
@@ -524,6 +536,9 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
524536 if (isAssumed (COMPLETION_ACTION) && funcRetrievesCompletionAction (A, COV))
525537 removeAssumedBits (COMPLETION_ACTION);
526538
539+ if (isAssumed (FLAT_SCRATCH_INIT) && needFlatScratchInit (A))
540+ removeAssumedBits (FLAT_SCRATCH_INIT);
541+
527542 return getAssumed () != OrigAssumed ? ChangeStatus::CHANGED
528543 : ChangeStatus::UNCHANGED;
529544 }
@@ -682,6 +697,65 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
682697 return !A.checkForAllCallLikeInstructions (DoesNotRetrieve, *this ,
683698 UsedAssumedInformation);
684699 }
700+
701+ // Returns true if FlatScratchInit is needed, i.e., no-flat-scratch-init is
702+ // not to be set.
703+ bool needFlatScratchInit (Attributor &A) {
704+ assert (isAssumed (FLAT_SCRATCH_INIT)); // only called if the bit is still set
705+
706+ // Check all AddrSpaceCast instructions. FlatScratchInit is needed if
707+ // there is a cast from PRIVATE_ADDRESS.
708+ auto AddrSpaceCastNotFromPrivate = [](Instruction &I) {
709+ return cast<AddrSpaceCastInst>(I).getSrcAddressSpace () !=
710+ AMDGPUAS::PRIVATE_ADDRESS;
711+ };
712+
713+ bool UsedAssumedInformation = false ;
714+ if (!A.checkForAllInstructions (AddrSpaceCastNotFromPrivate, *this ,
715+ {Instruction::AddrSpaceCast},
716+ UsedAssumedInformation))
717+ return true ;
718+
719+ // Check for addrSpaceCast from PRIVATE_ADDRESS in constant expressions
720+ auto &InfoCache = static_cast <AMDGPUInformationCache &>(A.getInfoCache ());
721+
722+ Function *F = getAssociatedFunction ();
723+ for (Instruction &I : instructions (F)) {
724+ for (const Use &U : I.operands ()) {
725+ if (const auto *C = dyn_cast<Constant>(U)) {
726+ if (InfoCache.checkConstForAddrSpaceCastFromPrivate (C))
727+ return true ;
728+ }
729+ }
730+ }
731+
732+ // Finally check callees.
733+
734+ // This is called on each callee; false means callee shouldn't have
735+ // no-flat-scratch-init.
736+ auto CheckForNoFlatScratchInit = [&](Instruction &I) {
737+ const auto &CB = cast<CallBase>(I);
738+ const Function *Callee = CB.getCalledFunction ();
739+
740+ // Callee == 0 for inline asm or indirect call with known callees.
741+ // In the latter case, updateImpl() already checked the callees and we
742+ // know their FLAT_SCRATCH_INIT bit is set.
743+ // If function has indirect call with unknown callees, the bit is
744+ // already removed in updateImpl() and execution won't reach here.
745+ if (!Callee)
746+ return true ;
747+
748+ return Callee->getIntrinsicID () !=
749+ Intrinsic::amdgcn_addrspacecast_nonnull;
750+ };
751+
752+ UsedAssumedInformation = false ;
753+ // If any callee is false (i.e. need FlatScratchInit),
754+ // checkForAllCallLikeInstructions returns false, in which case this
755+ // function returns true.
756+ return !A.checkForAllCallLikeInstructions (CheckForNoFlatScratchInit, *this ,
757+ UsedAssumedInformation);
758+ }
685759};
686760
687761AAAMDAttributes &AAAMDAttributes::createForPosition (const IRPosition &IRP,
0 commit comments