Skip to content

Split and simplify fgMorphOneAsgBlockOp #76793

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
2 changes: 0 additions & 2 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -2329,8 +2329,6 @@ class Compiler
GenTree* gtNewStructVal(ClassLayout* layout, GenTree* addr);
GenTree* gtNewBlockVal(GenTree* addr, unsigned size);

GenTree* gtNewCpObjNode(GenTree* dst, GenTree* src, CORINFO_CLASS_HANDLE structHnd, bool isVolatile);

GenTreeCall* gtNewCallNode(gtCallTypes callType,
CORINFO_METHOD_HANDLE handle,
var_types type,
Expand Down
36 changes: 0 additions & 36 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7578,42 +7578,6 @@ GenTree* Compiler::gtNewBlockVal(GenTree* addr, unsigned size)
return blkNode;
}

// Creates a new assignment node for a CpObj.
// Parameters (exactly the same as MSIL CpObj):
//
// dstAddr - The target to copy the struct to
// srcAddr - The source to copy the struct from
// structHnd - A class token that represents the type of object being copied. May be null
// if FEATURE_SIMD is enabled and the source has a SIMD type.
// isVolatile - Is this marked as volatile memory?

GenTree* Compiler::gtNewCpObjNode(GenTree* dstAddr, GenTree* srcAddr, CORINFO_CLASS_HANDLE structHnd, bool isVolatile)
{
ClassLayout* layout = typGetObjLayout(structHnd);
GenTree* lhs = gtNewStructVal(layout, dstAddr);
GenTree* src = gtNewStructVal(layout, srcAddr);

if (lhs->OperIs(GT_OBJ))
{
GenTreeObj* lhsObj = lhs->AsObj();
#if DEBUG
// Codegen for CpObj assumes that we cannot have a struct with GC pointers whose size is not a multiple
// of the register size. The EE currently does not allow this to ensure that GC pointers are aligned
// if the struct is stored in an array. Note that this restriction doesn't apply to stack-allocated objects:
// they are never stored in arrays. We should never get to this method with stack-allocated objects since they
// are never copied so we don't need to exclude them from the assert below.
// Let's assert it just to be safe.
ClassLayout* layout = lhsObj->GetLayout();
unsigned size = layout->GetSize();
assert((layout->GetGCPtrCount() == 0) || (roundUp(size, REGSIZE_BYTES) == size));
#endif
gtSetObjGcInfo(lhsObj);
}

GenTree* result = gtNewBlkOpNode(lhs, src, isVolatile, true);
return result;
}

//------------------------------------------------------------------------
// FixupInitBlkValue: Fixup the init value for an initBlk operation
//
Expand Down
8 changes: 8 additions & 0 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -1812,6 +1812,8 @@ struct GenTree
// The returned pointer might be nullptr if the node is not binary, or if non-null op2 is not required.
inline GenTree* gtGetOp2IfPresent() const;

inline GenTree*& Data();

bool TryGetUse(GenTree* operand, GenTree*** pUse);

bool TryGetUse(GenTree* operand)
Expand Down Expand Up @@ -8926,6 +8928,12 @@ inline GenTree* GenTree::gtGetOp2IfPresent() const
return op2;
}

inline GenTree*& GenTree::Data()
{
assert(OperIsStore() && (OperIsIndir() || OperIsLocal()));
return OperIsLocalStore() ? AsLclVarCommon()->Data() : AsIndir()->Data();
}

inline GenTree* GenTree::gtEffectiveVal(bool commaOnly /* = false */)
{
GenTree* effectiveVal = this;
Expand Down
46 changes: 24 additions & 22 deletions src/coreclr/jit/importer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8627,9 +8627,11 @@ void Compiler::impImportBlockCode(BasicBlock* block)
case CEE_STIND_R8:
lclTyp = TYP_DOUBLE;
goto STIND;
STIND:

STIND:
op2 = impPopStack().val; // value to store

STIND_VALUE:
op1 = impPopStack().val; // address to store to

// you can indirect off of a TYP_I_IMPL (if we are in C) or a BYREF
Expand Down Expand Up @@ -10840,24 +10842,17 @@ void Compiler::impImportBlockCode(BasicBlock* block)

JITDUMP(" %08X", resolvedToken.token);

op2 = gtNewIconNode(0); // Value
op1 = impPopStack().val; // Dest

if (eeIsValueClass(resolvedToken.hClass))
{
op1 = gtNewStructVal(typGetObjLayout(resolvedToken.hClass), op1);
if (op1->OperIs(GT_OBJ))
{
gtSetObjGcInfo(op1->AsObj());
}
}
else
lclTyp = JITtype2varType(info.compCompHnd->asCorInfoType(resolvedToken.hClass));
if (lclTyp != TYP_STRUCT)
{
size = info.compCompHnd->getClassSize(resolvedToken.hClass);
assert(size == TARGET_POINTER_SIZE);
op1 = gtNewBlockVal(op1, size);
op2 = gtNewZeroConNode(genActualType(lclTyp));
goto STIND_VALUE;
}

op1 = impPopStack().val;
op1 = gtNewStructVal(typGetObjLayout(resolvedToken.hClass), op1);
op2 = gtNewIconNode(0);

op1 = gtNewBlkOpNode(op1, op2, (prefixFlags & PREFIX_VOLATILE) != 0, false);
goto SPILL_APPEND;

Expand Down Expand Up @@ -10888,7 +10883,6 @@ void Compiler::impImportBlockCode(BasicBlock* block)
op1->gtFlags |= GTF_BLK_VOLATILE;
}
}

goto SPILL_APPEND;

case CEE_CPBLK:
Expand All @@ -10915,11 +10909,10 @@ void Compiler::impImportBlockCode(BasicBlock* block)
op1->gtFlags |= GTF_BLK_VOLATILE;
}
}

goto SPILL_APPEND;

case CEE_CPOBJ:

{
assertImp(sz == sizeof(unsigned));

_impResolveToken(CORINFO_TOKENKIND_Class);
Expand All @@ -10939,10 +10932,19 @@ void Compiler::impImportBlockCode(BasicBlock* block)
goto STIND;
}

op2 = impPopStack().val; // Src
op1 = impPopStack().val; // Dest
op1 = gtNewCpObjNode(op1, op2, resolvedToken.hClass, ((prefixFlags & PREFIX_VOLATILE) != 0));
op2 = impPopStack().val; // Src addr
op1 = impPopStack().val; // Dest addr

ClassLayout* layout = typGetObjLayout(resolvedToken.hClass);
op1 = gtNewStructVal(layout, op1);
op2 = gtNewStructVal(layout, op2);
if (op1->OperIs(GT_OBJ))
{
gtSetObjGcInfo(op1->AsObj());
}
op1 = gtNewBlkOpNode(op1, op2, ((prefixFlags & PREFIX_VOLATILE) != 0), /* isCopyBlock */ true);
goto SPILL_APPEND;
}

case CEE_STOBJ:
{
Expand Down
132 changes: 87 additions & 45 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3455,6 +3455,8 @@ void Lowering::LowerStoreLocCommon(GenTreeLclVarCommon* lclStore)
DISPTREERANGE(BlockRange(), lclStore);
JITDUMP("\n");

TryRetypingFloatingPointStoreToIntegerStore(lclStore);

GenTree* src = lclStore->gtGetOp1();
LclVarDsc* varDsc = comp->lvaGetDesc(lclStore);
const bool srcIsMultiReg = src->IsMultiRegNode();
Expand Down Expand Up @@ -7164,6 +7166,8 @@ void Lowering::LowerStoreIndirCommon(GenTreeStoreInd* ind)
{
assert(ind->TypeGet() != TYP_STRUCT);

TryRetypingFloatingPointStoreToIntegerStore(ind);

#if defined(TARGET_ARM64)
// Verify containment safety before creating an LEA that must be contained.
//
Expand Down Expand Up @@ -7192,51 +7196,6 @@ void Lowering::LowerStoreIndirCommon(GenTreeStoreInd* ind)
}
#endif

if (varTypeIsFloating(ind) && ind->Data()->IsCnsFltOrDbl())
{
// Optimize *x = DCON to *x = ICON which can be slightly faster and/or smaller.
GenTree* data = ind->Data();
double dblCns = data->AsDblCon()->DconValue();
ssize_t intCns = 0;
var_types type = TYP_UNKNOWN;
// XARCH: we can always contain the immediates.
// ARM64: zero can always be contained, other cases will use immediates from the data
// section and it is not a clear win to switch them to inline integers.
// ARM: FP constants are assembled from integral ones, so it is always profitable
// to directly use the integers as it avoids the int -> float conversion.
CLANG_FORMAT_COMMENT_ANCHOR;

#if defined(TARGET_XARCH) || defined(TARGET_ARM)
bool shouldSwitchToInteger = true;
#else // TARGET_ARM64
bool shouldSwitchToInteger = !data->IsCnsNonZeroFltOrDbl();
#endif

if (shouldSwitchToInteger)
{
if (ind->TypeIs(TYP_FLOAT))
{
float fltCns = static_cast<float>(dblCns); // should be a safe round-trip
intCns = static_cast<ssize_t>(*reinterpret_cast<INT32*>(&fltCns));
type = TYP_INT;
}
#ifdef TARGET_64BIT
else
{
assert(ind->TypeIs(TYP_DOUBLE));
intCns = static_cast<ssize_t>(*reinterpret_cast<INT64*>(&dblCns));
type = TYP_LONG;
}
#endif
}

if (type != TYP_UNKNOWN)
{
data->BashToConst(intCns, type);
ind->ChangeType(type);
}
}

LowerStoreIndir(ind);
}
}
Expand Down Expand Up @@ -7467,6 +7426,89 @@ bool Lowering::TryTransformStoreObjAsStoreInd(GenTreeBlk* blkNode)
return true;
}

//------------------------------------------------------------------------
// TryRetypingFloatingPointStoreToIntegerStore: Retype an FP memory store.
//
// On some targets, integer stores are cheaper and/or smaller than their
// floating-point counterparts, because, e. g., integer immediates can be
// encoded inline while FP ones need to be loaded from the data section.
//
// Arguments:
// store - The store node
//
void Lowering::TryRetypingFloatingPointStoreToIntegerStore(GenTree* store)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The original retyping was done under MinOpts too, so this follows; there is of course an argument to be made it should not.

{
assert(store->OperIsStore() && !store->OperIsAtomicOp());

if (!varTypeIsFloating(store))
{
return;
}

// We only want to transform memory stores, not definitions of candidate locals.
//
if (store->OperIs(GT_STORE_LCL_VAR) && !comp->lvaGetDesc(store->AsLclVar())->lvDoNotEnregister)
{
return;
}

GenTree* data = store->Data();
assert(store->TypeGet() == data->TypeGet());

// Optimize *x = DCON to *x = ICON which can be slightly faster and/or smaller.
//
if (data->IsCnsFltOrDbl())
{
double dblCns = data->AsDblCon()->DconValue();
ssize_t intCns = 0;
var_types type = TYP_UNKNOWN;
// XARCH: we can always contain the immediates.
// ARM64: zero can always be contained, other cases will use immediates from the data
// section and it is not a clear win to switch them to inline integers.
// ARM: FP constants are assembled from integral ones, so it is always profitable
// to directly use the integers as it avoids the int -> float conversion.
CLANG_FORMAT_COMMENT_ANCHOR;

#if defined(TARGET_XARCH) || defined(TARGET_ARM)
bool shouldSwitchToInteger = true;
#else // TARGET_ARM64 || TARGET_LOONGARCH64
bool shouldSwitchToInteger = FloatingPointUtils::isPositiveZero(dblCns);
#endif

if (shouldSwitchToInteger)
{
if (store->TypeIs(TYP_FLOAT))
{
float fltCns = static_cast<float>(dblCns);
intCns = *reinterpret_cast<INT32*>(&fltCns);
type = TYP_INT;
}
#ifdef TARGET_64BIT
else
{
assert(store->TypeIs(TYP_DOUBLE));
intCns = *reinterpret_cast<INT64*>(&dblCns);
type = TYP_LONG;
}
#endif
}

if (type != TYP_UNKNOWN)
{
data->BashToConst(intCns, type);

assert(!store->OperIsLocalStore() || comp->lvaGetDesc(store->AsLclVarCommon())->lvDoNotEnregister);
if (store->OperIs(GT_STORE_LCL_VAR))
{
store->SetOper(GT_STORE_LCL_FLD);
store->AsLclFld()->SetLclOffs(0);
store->AsLclFld()->SetLayout(nullptr);
}
store->ChangeType(type);
}
}
}

#ifdef FEATURE_SIMD
//----------------------------------------------------------------------------------------------
// Lowering::LowerSIMD: Perform containment analysis for a SIMD intrinsic node.
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/lower.h
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,8 @@ class Lowering final : public Phase

bool TryTransformStoreObjAsStoreInd(GenTreeBlk* blkNode);

void TryRetypingFloatingPointStoreToIntegerStore(GenTree* store);

GenTree* LowerSwitch(GenTree* node);
bool TryLowerSwitchToBitTest(
BasicBlock* jumpTable[], unsigned jumpCount, unsigned targetCount, BasicBlock* bbSwitch, GenTree* switchValue);
Expand Down
Loading