Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def hasLDU : Predicate<"Subtarget->hasLDU()">;
def hasPTXASUnreachableBug : Predicate<"Subtarget->hasPTXASUnreachableBug()">;
def noPTXASUnreachableBug : Predicate<"!Subtarget->hasPTXASUnreachableBug()">;
def hasOptEnabled : Predicate<"TM.getOptLevel() != CodeGenOptLevel::None">;
def hasAcceleratedFeatures : Predicate<"Subtarget->hasAAFeatures()">;
def hasArchAccelFeatures : Predicate<"Subtarget->hasArchAccelFeatures()">;

def doF32FTZ : Predicate<"useF32FTZ()">;
def doNoF32FTZ : Predicate<"!useF32FTZ()">;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
Original file line number Diff line number Diff line change
Expand Up @@ -7596,7 +7596,7 @@ multiclass SET_MAXNREG<string Action, Intrinsic Intr> {
def : NVPTXInst<(outs), (ins i32imm:$reg_count),
"setmaxnreg." # Action # ".sync.aligned.u32 $reg_count;",
[(Intr timm:$reg_count)]>,
Requires<[hasAcceleratedFeatures, hasSM<90>, hasPTX<80>]>;
Requires<[hasArchAccelFeatures, hasSM<90>, hasPTX<80>]>;
}

defm INT_SET_MAXNREG_INC : SET_MAXNREG<"inc", int_nvvm_setmaxnreg_inc_sync_aligned_u32>;
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/NVPTX/NVPTXSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,11 +125,11 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
unsigned int getSmVersion() const { return getFullSmVersion() / 10; }
// GPUs with "a" suffix have include architecture-accelerated features that
// are supported on the specified architecture only, hence such targets do not
// follow the onion layer model. hasAAFeatures() allows distinguishing such
// GPU variants from the base GPU architecture.
// follow the onion layer model. hasArchAccelFeatures() allows
// distinguishing such GPU variants from the base GPU architecture.
// - 0 represents base GPU model,
// - non-zero value identifies particular architecture-accelerated variant.
bool hasAAFeatures() const { return getFullSmVersion() % 10; }
bool hasArchAccelFeatures() const { return getFullSmVersion() % 10; }

// If the user did not provide a target we default to the `sm_30` target.
std::string getTargetName() const {
Expand Down