Skip to content

JIT: Support bitwise field insertions for return registers #113178

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Mar 8, 2025
4 changes: 2 additions & 2 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -6586,7 +6586,7 @@ class Compiler
#endif // FEATURE_SIMD
GenTree* fgMorphIndexAddr(GenTreeIndexAddr* tree);
GenTree* fgMorphExpandCast(GenTreeCast* tree);
GenTreeFieldList* fgMorphLclArgToFieldList(GenTreeLclVarCommon* lcl);
GenTreeFieldList* fgMorphLclToFieldList(GenTreeLclVar* lcl);
GenTreeCall* fgMorphArgs(GenTreeCall* call);

void fgMakeOutgoingStructArgCopy(GenTreeCall* call, CallArg* arg);
Expand Down Expand Up @@ -6661,7 +6661,7 @@ class Compiler
GenTree* fgMorphCopyBlock(GenTree* tree);
private:
GenTree* fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optAssertionPropDone = nullptr);
void fgTryReplaceStructLocalWithField(GenTree* tree);
void fgTryReplaceStructLocalWithFields(GenTree** use);
GenTree* fgMorphFinalizeIndir(GenTreeIndir* indir);
GenTree* fgOptimizeCast(GenTreeCast* cast);
GenTree* fgOptimizeCastOnStore(GenTree* store);
Expand Down
13 changes: 13 additions & 0 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -3095,6 +3095,19 @@ struct GenTreeOp : public GenTreeUnOp
// then sets the flag GTF_DIV_BY_CNS_OPT and GTF_DONT_CSE on the constant
void CheckDivideByConstOptimized(Compiler* comp);

GenTree*& ReturnValueRef()
{
assert(OperIs(GT_RETURN, GT_RETFILT, GT_SWIFT_ERROR_RET));
#ifdef SWIFT_SUPPORT
if (OperIs(GT_SWIFT_ERROR_RET))
{
return gtOp2;
}
#endif // SWIFT_SUPPORT

return gtOp1;
}

GenTree* GetReturnValue() const
{
assert(OperIs(GT_RETURN, GT_RETFILT, GT_SWIFT_ERROR_RET));
Expand Down
32 changes: 0 additions & 32 deletions src/coreclr/jit/lclmorph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1240,38 +1240,6 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
PopValue();
break;

case GT_RETURN:
if (TopValue(0).Node() != node)
{
assert(TopValue(1).Node() == node);
assert(TopValue(0).Node() == node->gtGetOp1());
GenTreeUnOp* ret = node->AsUnOp();
GenTree* retVal = ret->gtGetOp1();
if (retVal->OperIs(GT_LCL_VAR))
{
// TODO-1stClassStructs: this block is a temporary workaround to keep diffs small,
// having `doNotEnreg` affect block init and copy transformations that affect many methods.
// I have a change that introduces more precise and effective solution for that, but it would
// be merged separately.
GenTreeLclVar* lclVar = retVal->AsLclVar();
unsigned lclNum = lclVar->GetLclNum();
if (!m_compiler->compMethodReturnsMultiRegRetType() &&
!m_compiler->lvaIsImplicitByRefLocal(lclVar->GetLclNum()))
{
LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum);
if (varDsc->lvFieldCnt > 1)
{
m_compiler->lvaSetVarDoNotEnregister(
lclNum DEBUGARG(DoNotEnregisterReason::BlockOpRet));
}
}
}

EscapeValue(TopValue(0), node);
PopValue();
}
break;

case GT_CALL:
while (TopValue(0).Node() != node)
{
Expand Down
253 changes: 211 additions & 42 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -538,13 +538,14 @@ GenTree* Lowering::LowerNode(GenTree* node)

case GT_CAST:
{
GenTree* nextNode = LowerCast(node);
#if defined(TARGET_XARCH)
if (nextNode != nullptr)
if (!TryRemoveCast(node->AsCast()))
{
return nextNode;
GenTree* nextNode = LowerCast(node);
if (nextNode != nullptr)
{
return nextNode;
}
}
#endif // TARGET_XARCH
}
break;

Expand Down Expand Up @@ -4752,20 +4753,7 @@ void Lowering::LowerRetFieldList(GenTreeOp* ret, GenTreeFieldList* fieldList)
return;
}

unsigned regIndex = 0;
for (GenTreeFieldList::Use& use : fieldList->Uses())
{
var_types regType = retDesc.GetReturnRegType(regIndex);
if (varTypeUsesIntReg(regType) != varTypeUsesIntReg(use.GetNode()))
{
GenTree* bitcast = comp->gtNewOperNode(GT_BITCAST, regType, use.GetNode());
BlockRange().InsertBefore(fieldList, bitcast);
use.SetNode(bitcast);
LowerNode(bitcast);
}

regIndex++;
}
LowerFieldListToFieldListOfRegisters(fieldList);
}

//----------------------------------------------------------------------------------------------
Expand All @@ -4777,54 +4765,196 @@ void Lowering::LowerRetFieldList(GenTreeOp* ret, GenTreeFieldList* fieldList)
// fieldList - The FIELD_LIST node
//
// Returns:
// True if the fields of the FIELD_LIST map cleanly to the ABI returned
// registers. Insertions of bitcasts may still be required.
// True if the fields of the FIELD_LIST are all direct insertions into the
// return registers.
//
bool Lowering::IsFieldListCompatibleWithReturn(GenTreeFieldList* fieldList)
{
JITDUMP("Checking if field list [%06u] is compatible with return ABI: ", Compiler::dspTreeID(fieldList));
const ReturnTypeDesc& retDesc = comp->compRetTypeDesc;
unsigned numRetRegs = retDesc.GetReturnRegCount();
unsigned regIndex = 0;

for (const GenTreeFieldList::Use& use : fieldList->Uses())
GenTreeFieldList::Use* use = fieldList->Uses().GetHead();
for (unsigned i = 0; i < numRetRegs; i++)
{
if (regIndex >= numRetRegs)
{
JITDUMP("it is not; too many fields\n");
return false;
}
unsigned regStart = retDesc.GetReturnFieldOffset(i);
var_types regType = retDesc.GetReturnRegType(i);
unsigned regEnd = regStart + genTypeSize(regType);

unsigned offset = retDesc.GetReturnFieldOffset(regIndex);
if (offset != use.GetOffset())
// TODO-CQ: Could just create a 0 for this.
if (use == nullptr)
{
JITDUMP("it is not; field %u register starts at offset %u, but field starts at offset %u\n", regIndex,
offset, use.GetOffset());
JITDUMP("it is not; register %u has no corresponding field\n", i);
return false;
}

var_types fieldType = genActualType(use.GetNode());
var_types regType = genActualType(retDesc.GetReturnRegType(regIndex));
if (genTypeSize(fieldType) != genTypeSize(regType))
do
{
JITDUMP("it is not; field %u register has type %s but field has type %s\n", regIndex, varTypeName(regType),
varTypeName(fieldType));
return false;
}
unsigned fieldStart = use->GetOffset();

regIndex++;
if (fieldStart < regStart)
{
// Not fully contained in a register.
// TODO-CQ: Could just remove these fields if they don't partially overlap with the next register.
JITDUMP("it is not; field [%06u] starts before register %u\n", Compiler::dspTreeID(use->GetNode()), i);
return false;
}

if (fieldStart >= regEnd)
{
break;
}

unsigned fieldEnd = fieldStart + genTypeSize(use->GetType());
if (fieldEnd > regEnd)
{
JITDUMP("it is not; field [%06u] ends after register %u\n", Compiler::dspTreeID(use->GetNode()), i);
return false;
}

// float -> float insertions are not yet supported
if (varTypeUsesFloatReg(use->GetNode()) && varTypeUsesFloatReg(regType) && (fieldStart != regStart))
{
JITDUMP("it is not; field [%06u] requires an insertion into register %u\n",
Compiler::dspTreeID(use->GetNode()), i);
return false;
}

use = use->GetNext();
} while (use != nullptr);
}

if (regIndex != numRetRegs)
if (use != nullptr)
{
JITDUMP("it is not; too few fields\n");
// TODO-CQ: Could just remove these fields.
JITDUMP("it is not; field [%06u] corresponds to no register\n", Compiler::dspTreeID(use->GetNode()));
return false;
}

JITDUMP("it is\n");
return true;
}

//----------------------------------------------------------------------------------------------
// LowerFieldListToFieldListOfRegisters:
// Lower the specified field list into one that is compatible with the return
// registers.
//
// Arguments:
// fieldList - The field list
//
void Lowering::LowerFieldListToFieldListOfRegisters(GenTreeFieldList* fieldList)
{
const ReturnTypeDesc& retDesc = comp->compRetTypeDesc;
unsigned numRegs = retDesc.GetReturnRegCount();

GenTreeFieldList::Use* use = fieldList->Uses().GetHead();
assert(fieldList->Uses().IsSorted());

for (unsigned i = 0; i < numRegs; i++)
{
unsigned regStart = retDesc.GetReturnFieldOffset(i);
var_types regType = genActualType(retDesc.GetReturnRegType(i));
unsigned regEnd = regStart + genTypeSize(regType);

GenTreeFieldList::Use* regEntry = use;

assert(use != nullptr);

GenTree* fieldListPrev = fieldList->gtPrev;

do
{
unsigned fieldStart = use->GetOffset();

assert(fieldStart >= regStart);

if (fieldStart >= regEnd)
{
break;
}

var_types fieldType = use->GetType();
GenTree* value = use->GetNode();

unsigned insertOffset = fieldStart - regStart;
GenTreeFieldList::Use* nextUse = use->GetNext();

// First ensure the value does not have upper bits set that
// interfere with the next field.
if ((nextUse != nullptr) && (nextUse->GetOffset() < regEnd) &&
(fieldStart + genTypeSize(genActualType(fieldType)) > nextUse->GetOffset()))
{
assert(varTypeIsSmall(fieldType));
// This value may interfere with the next field. Ensure that doesn't happen.
if (comp->fgCastNeeded(value, varTypeToUnsigned(fieldType)))
{
value = comp->gtNewCastNode(TYP_INT, value, true, varTypeToUnsigned(fieldType));
BlockRange().InsertBefore(fieldList, value);
}
}

// If this is a float -> int insertion, then we need the bitcast now.
if (varTypeUsesFloatReg(value) && varTypeUsesIntReg(regType))
{
assert((genTypeSize(value) == 4) || (genTypeSize(value) == 8));
var_types castType = genTypeSize(value) == 4 ? TYP_INT : TYP_LONG;
value = comp->gtNewBitCastNode(castType, value);
BlockRange().InsertBefore(fieldList, value);
}

if (insertOffset + genTypeSize(fieldType) > genTypeSize(genActualType(value)))
{
value = comp->gtNewCastNode(TYP_LONG, value, true, TYP_LONG);
BlockRange().InsertBefore(fieldList, value);
}

if (fieldStart != regStart)
{
GenTree* shiftAmount = comp->gtNewIconNode((ssize_t)insertOffset * BITS_PER_BYTE);
value = comp->gtNewOperNode(GT_LSH, genActualType(value), value, shiftAmount);
BlockRange().InsertBefore(fieldList, shiftAmount, value);
}

if (regEntry != use)
{
GenTree* prevValue = regEntry->GetNode();
if (genActualType(value) != genActualType(regEntry->GetNode()))
{
prevValue = comp->gtNewCastNode(TYP_LONG, prevValue, true, TYP_LONG);
BlockRange().InsertBefore(fieldList, prevValue);
regEntry->SetNode(prevValue);
}

value = comp->gtNewOperNode(GT_OR, genActualType(value), prevValue, value);
BlockRange().InsertBefore(fieldList, value);

// Remove this field from the FIELD_LIST.
regEntry->SetNext(use->GetNext());
}

regEntry->SetNode(value);
regEntry->SetType(genActualType(value));
use = regEntry->GetNext();
} while (use != nullptr);

assert(regEntry != nullptr);
if (varTypeUsesIntReg(regEntry->GetNode()) != varTypeUsesIntReg(regType))
{
GenTree* bitCast = comp->gtNewBitCastNode(regType, regEntry->GetNode());
BlockRange().InsertBefore(fieldList, bitCast);
regEntry->SetNode(bitCast);
}

if (fieldListPrev->gtNext != fieldList)
{
LowerRange(fieldListPrev->gtNext, fieldList->gtPrev);
}
}

assert(use == nullptr);
}

//----------------------------------------------------------------------------------------------
// LowerStoreLocCommon: platform independent part of local var or field store lowering.
//
Expand Down Expand Up @@ -8792,6 +8922,45 @@ void Lowering::ContainCheckRet(GenTreeUnOp* ret)
#endif // FEATURE_MULTIREG_RET
}

//------------------------------------------------------------------------
// TryRemoveCast:
// Try to remove a cast node by changing its operand.
//
// Arguments:
// node - Cast node
//
// Returns:
// True if the cast was removed.
//
bool Lowering::TryRemoveCast(GenTreeCast* node)
{
if (comp->opts.OptimizationDisabled())
{
return false;
}

if (node->gtOverflow())
{
return false;
}

GenTree* op = node->CastOp();
if (!op->OperIsConst())
{
return false;
}

GenTree* folded = comp->gtFoldExprConst(node);
assert(folded == node);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure I understand why it calls gtFoldExprConst here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just because I didn't want to handle CAST(CNS_INT) for all combinations here

if (folded->OperIs(GT_CAST))
{
return false;
}

op->SetUnusedValue();
return true;
}

//------------------------------------------------------------------------
// TryRemoveBitCast:
// Try to remove a bitcast node by changing its operand.
Expand Down
Loading
Loading