Skip to content

Commit

Permalink
AMDGPU: Remove denormal subtarget features
Browse files Browse the repository at this point in the history
Switch to using the denormal-fp-math/denormal-fp-math-f32 attributes.
  • Loading branch information
arsenm committed Apr 2, 2020
1 parent ce2258c commit 5660bb6
Show file tree
Hide file tree
Showing 71 changed files with 448 additions and 422 deletions.
24 changes: 0 additions & 24 deletions llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -494,30 +494,6 @@ def FeatureNoDataDepHazard : SubtargetFeature<"no-data-dep-hazard",
// Subtarget Features (options and debugging)
//===------------------------------------------------------------===//

// Denormal handling for fp64 and fp16 is controlled by the same
// config register when fp16 supported.
// TODO: Do we need a separate f16 setting when not legal?
def FeatureFP64FP16Denormals : SubtargetFeature<"fp64-fp16-denormals",
"FP64FP16Denormals",
"true",
"Enable double and half precision denormal handling",
[FeatureFP64]
>;

def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
"FP64FP16Denormals",
"true",
"Enable double and half precision denormal handling",
[FeatureFP64, FeatureFP64FP16Denormals]
>;

def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals",
"FP64FP16Denormals",
"true",
"Enable half precision denormal handling",
[FeatureFP64FP16Denormals]
>;

def FeatureFPExceptions : SubtargetFeature<"fp-exceptions",
"FPExceptions",
"true",
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
#include "llvm/ADT/FloatingPointMode.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/ConstantFolding.h"
Expand Down Expand Up @@ -1387,7 +1388,9 @@ bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
DT = DTWP ? &DTWP->getDomTree() : nullptr;

HasUnsafeFPMath = hasUnsafeFPMath(F);
HasFP32Denormals = ST->hasFP32Denormals(F);

AMDGPU::SIModeRegisterDefaults Mode(F);
HasFP32Denormals = Mode.allFP32Denormals();

bool MadeChange = false;

Expand Down
9 changes: 0 additions & 9 deletions llvm/lib/Target/AMDGPU/AMDGPUFeatures.td
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,6 @@ def FeatureFMA : SubtargetFeature<"fmaf",
"Enable single precision FMA (not as fast as mul+add, but fused)"
>;

// Some instructions do not support denormals despite this flag. Using
// fp32 denormals also causes instructions to run at the double
// precision rate for the device.
def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals",
"FP32Denormals",
"true",
"Enable single precision denormal handling"
>;

class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
"localmemorysize"#Value,
"LocalMemorySize",
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@ bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
}
#endif
Subtarget = &MF.getSubtarget<GCNSubtarget>();
Mode = AMDGPU::SIModeRegisterDefaults(MF.getFunction(), *Subtarget);
Mode = AMDGPU::SIModeRegisterDefaults(MF.getFunction());
return SelectionDAGISel::runOnMachineFunction(MF);
}

Expand Down
14 changes: 8 additions & 6 deletions llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1644,9 +1644,10 @@ SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG,
const AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();

// float fr = mad(fqneg, fb, fa);
unsigned OpCode = MFI->getMode().allFP32Denormals() ?
(unsigned)AMDGPUISD::FMAD_FTZ :
(unsigned)ISD::FMAD;
unsigned OpCode = !MFI->getMode().allFP32Denormals() ?
(unsigned)ISD::FMAD :
(unsigned)AMDGPUISD::FMAD_FTZ;

SDValue fr = DAG.getNode(OpCode, DL, FltVT, fqneg, fb, fa);

// int iq = (int)fq;
Expand Down Expand Up @@ -1729,9 +1730,10 @@ void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

// Compute denominator reciprocal.
unsigned FMAD = MFI->getMode().allFP32Denormals() ?
(unsigned)AMDGPUISD::FMAD_FTZ :
(unsigned)ISD::FMAD;
unsigned FMAD = !MFI->getMode().allFP32Denormals() ?
(unsigned)ISD::FMAD :
(unsigned)AMDGPUISD::FMAD_FTZ;


SDValue Cvt_Lo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, RHS_Lo);
SDValue Cvt_Hi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, RHS_Hi);
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -807,3 +807,8 @@ def fmaxnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1),
[(fmaxnum_ieee_oneuse node:$src0, node:$src1),
(fmaxnum_oneuse node:$src0, node:$src1)]
>;

def any_fmad : PatFrags<(ops node:$src0, node:$src1, node:$src2),
[(fmad node:$src0, node:$src1, node:$src2),
(AMDGPUfmad_ftz node:$src0, node:$src1, node:$src2)]
>;
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
LocalMemoryObjects(),
ExplicitKernArgSize(0),
LDSSize(0),
Mode(MF.getFunction(), MF.getSubtarget<GCNSubtarget>()),
Mode(MF.getFunction()),
IsEntryFunction(AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv())),
NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath),
MemoryBound(false),
Expand Down
21 changes: 0 additions & 21 deletions llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,6 @@ R600Subtarget::initializeSubtargetDependencies(const Triple &TT,
FullFS += FS;
ParseSubtargetFeatures(GPU, FullFS);

// FIXME: I don't think think Evergreen has any useful support for
// denormals, but should be checked. Should we issue a warning somewhere
// if someone tries to enable these?
if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
FP32Denormals = false;
}

HasMulU24 = getGeneration() >= EVERGREEN;
HasMulI24 = hasCaymanISA();

Expand All @@ -76,9 +69,6 @@ GCNSubtarget &
GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
StringRef GPU, StringRef FS) {
// Determine default and user-specified characteristics
// On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
// enabled, but some instructions do not respect them and they run at the
// double precision rate, so don't enable by default.
//
// We want to be able to turn these off, but making this a subtarget feature
// for SI has the unhelpful behavior that it unsets everything else if you
Expand All @@ -93,15 +83,6 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,";

// FIXME: I don't think think Evergreen has any useful support for
// denormals, but should be checked. Should we issue a warning somewhere
// if someone tries to enable these?
if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
FullFS += "+fp64-fp16-denormals,+fp32-denormals,";
} else {
FullFS += "-fp32-denormals,";
}

FullFS += "+enable-prt-strict-null,"; // This is overridden by a disable in FS

// Disable mutually exclusive bits.
Expand Down Expand Up @@ -172,7 +153,6 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) :
TargetTriple(TT),
Has16BitInsts(false),
HasMadMixInsts(false),
FP32Denormals(false),
FPExceptions(false),
HasSDWA(false),
HasVOP3PInsts(false),
Expand Down Expand Up @@ -200,7 +180,6 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
FastFMAF32(false),
HalfRate64Ops(false),

FP64FP16Denormals(false),
FlatForGlobal(false),
AutoWaitcntBeforeBarrier(false),
CodeObjectV3(false),
Expand Down
25 changes: 0 additions & 25 deletions llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ class AMDGPUSubtarget {
protected:
bool Has16BitInsts;
bool HasMadMixInsts;
bool FP32Denormals;
bool FPExceptions;
bool HasSDWA;
bool HasVOP3PInsts;
Expand Down Expand Up @@ -149,15 +148,6 @@ class AMDGPUSubtarget {
return HasMadMixInsts;
}

bool hasFP32Denormals(const Function &F) const {
// FIXME: This should not be a property of the subtarget. This should be a
// property with a default set by the calling convention which can be
// overridden by attributes. For now, use the subtarget feature as a
// placeholder attribute. The function arguments only purpose is to
// discourage use without a function context until this is removed.
return FP32Denormals;
}

bool hasFPExceptions() const {
return FPExceptions;
}
Expand Down Expand Up @@ -304,7 +294,6 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo,
bool HalfRate64Ops;

// Dynamially set bits that enable features.
bool FP64FP16Denormals;
bool FlatForGlobal;
bool AutoWaitcntBeforeBarrier;
bool CodeObjectV3;
Expand Down Expand Up @@ -636,20 +625,6 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo,
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
const Function &) const;

/// Alias for hasFP64FP16Denormals
bool hasFP16Denormals(const Function &F) const {
return FP64FP16Denormals;
}

/// Alias for hasFP64FP16Denormals
bool hasFP64Denormals(const Function &F) const {
return FP64FP16Denormals;
}

bool hasFP64FP16Denormals(const Function &F) const {
return FP64FP16Denormals;
}

bool supportsMinMaxDenormModes() const {
return getGeneration() >= AMDGPUSubtarget::GFX9;
}
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -941,8 +941,8 @@ bool GCNTTIImpl::areInlineCompatible(const Function *Caller,

// FIXME: dx10_clamp can just take the caller setting, but there seems to be
// no way to support merge for backend defined attributes.
AMDGPU::SIModeRegisterDefaults CallerMode(*Caller, *CallerST);
AMDGPU::SIModeRegisterDefaults CalleeMode(*Callee, *CalleeST);
AMDGPU::SIModeRegisterDefaults CallerMode(*Caller);
AMDGPU::SIModeRegisterDefaults CalleeMode(*Callee);
return CallerMode.isInlineCompatible(CalleeMode);
}

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
TLI(ST->getTargetLowering()),
CommonTTI(TM, F),
IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())),
HasFP32Denormals(ST->hasFP32Denormals(F)) { }
HasFP32Denormals(AMDGPU::SIModeRegisterDefaults(F).allFP32Denormals()) {}

bool hasBranchDivergence() { return true; }
bool useGPUDivergenceAnalysis() const;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/R600Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1006,7 +1006,7 @@ class MULADD_Common <bits<5> inst> : R600_3OP <

class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
inst, "MULADD_IEEE",
[(set f32:$dst, (fmad f32:$src0, f32:$src1, f32:$src2))]
[(set f32:$dst, (any_fmad f32:$src0, f32:$src1, f32:$src2))]
>;

class FMA_Common <bits<5> inst> : R600_3OP <
Expand Down
27 changes: 20 additions & 7 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1333,8 +1333,7 @@ bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
return true;
}

SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F,
const GCNSubtarget &ST) {
SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F) {
*this = getDefaultForCallingConv(F.getCallingConv());

StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString();
Expand All @@ -1346,11 +1345,25 @@ SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F,
if (!DX10ClampAttr.empty())
DX10Clamp = DX10ClampAttr == "true";

// FIXME: Split this when denormal-fp-math is used
FP32InputDenormals = ST.hasFP32Denormals(F);
FP32OutputDenormals = FP32InputDenormals;
FP64FP16InputDenormals = ST.hasFP64FP16Denormals(F);
FP64FP16OutputDenormals = FP64FP16InputDenormals;
StringRef DenormF32Attr = F.getFnAttribute("denormal-fp-math-f32").getValueAsString();
if (!DenormF32Attr.empty()) {
DenormalMode DenormMode = parseDenormalFPAttribute(DenormF32Attr);
FP32InputDenormals = DenormMode.Input == DenormalMode::IEEE;
FP32OutputDenormals = DenormMode.Output == DenormalMode::IEEE;
}

StringRef DenormAttr = F.getFnAttribute("denormal-fp-math").getValueAsString();
if (!DenormAttr.empty()) {
DenormalMode DenormMode = parseDenormalFPAttribute(DenormAttr);

if (DenormF32Attr.empty()) {
FP32InputDenormals = DenormMode.Input == DenormalMode::IEEE;
FP32OutputDenormals = DenormMode.Output == DenormalMode::IEEE;
}

FP64FP16InputDenormals = DenormMode.Input == DenormalMode::IEEE;
FP64FP16OutputDenormals = DenormMode.Output == DenormalMode::IEEE;
}
}

namespace {
Expand Down
8 changes: 1 addition & 7 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -695,19 +695,13 @@ struct SIModeRegisterDefaults {
FP64FP16InputDenormals(true),
FP64FP16OutputDenormals(true) {}

// FIXME: Should not depend on the subtarget
SIModeRegisterDefaults(const Function &F, const GCNSubtarget &ST);
SIModeRegisterDefaults(const Function &F);

static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
const bool IsCompute = AMDGPU::isCompute(CC);

SIModeRegisterDefaults Mode;
Mode.DX10Clamp = true;
Mode.IEEE = IsCompute;
Mode.FP32InputDenormals = true;
Mode.FP32OutputDenormals = true;
Mode.FP64FP16InputDenormals = true;
Mode.FP64FP16OutputDenormals = true;
return Mode;
}

Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -278,5 +278,5 @@ define amdgpu_kernel void @rcp_v2f16_f32_ftzdaz(<2 x half> addrspace(1)* %out, <
ret void
}

attributes #0 = { nounwind "target-features"="+fp32-denormals" }
attributes #1 = { nounwind "target-features"="-fp32-denormals" }
attributes #0 = { nounwind "denormal-fp-math-f32"="ieee,ieee" }
attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+fp32-denormals -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
# RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
# RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+fp32-denormals -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
# RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
# RUN: llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=ieee -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
# RUN: llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
# RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=ieee -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
# RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s

---
name: fmad_ftz_s32_vvvv
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa -mattr=-fp32-denormals -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,GISEL %s
; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa -mattr=-fp32-denormals -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,CGP %s
; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,GISEL %s
; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,CGP %s

; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.

Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa -mattr=-fp32-denormals -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,GISEL %s
; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa -mattr=-fp32-denormals -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,CGP %s
; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,GISEL %s
; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,CGP %s

; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.

Expand Down
1 change: 0 additions & 1 deletion llvm/test/CodeGen/AMDGPU/amdgcn-ieee.ll
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,6 @@ define amdgpu_ps void @ps_ieee_mode_default() #0 {
ret void
}

; FIXME: Should have denormals off by default.
; GCN-LABEL: {{^}}ps_ieee_mode_on:
; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -338,8 +338,8 @@ define amdgpu_kernel void @fdiv_fpmath_f32_denormals(float addrspace(1)* %out, f
}

attributes #0 = { nounwind optnone noinline }
attributes #1 = { nounwind "target-features"="-fp32-denormals" }
attributes #2 = { nounwind "target-features"="+fp32-denormals" }
attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
attributes #2 = { nounwind "denormal-fp-math-f32"="ieee,ieee" }

!0 = !{float 2.500000e+00}
!1 = !{float 5.000000e-01}
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
Original file line number Diff line number Diff line change
Expand Up @@ -389,10 +389,10 @@ declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1

declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1

attributes #0 = { nounwind "target-features"="-fp32-denormals" }
attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind "target-features"="+fp32-denormals" }
attributes #3 = { nounwind "target-features"="-fp64-fp16-denormals" }
attributes #2 = { nounwind "denormal-fp-math-f32"="ieee.ieee" }
attributes #3 = { nounwind "denormal-fp-math-f32"="ieee,ieee" "denormal-fp-math"="preserve-sign,preserve-sign" }

!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!2, !3}
Expand Down
Loading

0 comments on commit 5660bb6

Please sign in to comment.