Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

[ARM32/RyuJIT] Enable passing struct argument that use stack only #11541

Merged
merged 5 commits into from
May 22, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 62 additions & 23 deletions src/jit/codegenarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -580,18 +580,17 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
{
noway_assert((source->OperGet() == GT_LCL_VAR) || (source->OperGet() == GT_OBJ));

NYI_ARM("genPutArgStk: GT_OBJ or GT_LCL_VAR source of struct type");

#ifdef _TARGET_ARM64_

var_types targetType = source->TypeGet();
noway_assert(varTypeIsStruct(targetType));

// We will copy this struct to the stack, possibly using a ldp instruction
// Setup loReg and hiReg from the internal registers that we reserved in lower.
// We will copy this struct to the stack, possibly using a ldp/ldr instruction
// in ARM64/ARM
// Setup loReg (and hiReg) from the internal registers that we reserved in lower.
//
regNumber loReg = treeNode->ExtractTempReg();
regNumber hiReg = treeNode->GetSingleTempReg();
regNumber loReg = treeNode->ExtractTempReg();
#ifdef _TARGET_ARM64_
regNumber hiReg = treeNode->GetSingleTempReg();
#endif // _TARGET_ARM64_
regNumber addrReg = REG_NA;

GenTreeLclVarCommon* varNode = nullptr;
Expand Down Expand Up @@ -627,8 +626,10 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
// the xor ensures that only one of the two is setup, not both
assert((varNode != nullptr) ^ (addrNode != nullptr));

BYTE gcPtrs[MAX_ARG_REG_COUNT] = {}; // TYPE_GC_NONE = 0
unsigned gcPtrCount; // The count of GC pointers in the struct
BYTE gcPtrArray[MAX_ARG_REG_COUNT] = {}; // TYPE_GC_NONE = 0
BYTE* gcPtrs = gcPtrArray;

unsigned gcPtrCount; // The count of GC pointers in the struct
int structSize;
bool isHfa;

Expand All @@ -649,10 +650,15 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)

structSize = varDsc->lvSize(); // This yields the roundUp size, but that is fine
// as that is how much stack is allocated for this LclVar
isHfa = varDsc->lvIsHfa();
isHfa = varDsc->lvIsHfa();
#ifdef _TARGET_ARM64_
gcPtrCount = varDsc->lvStructGcCount;
for (unsigned i = 0; i < gcPtrCount; ++i)
gcPtrs[i] = varDsc->lvGcLayout[i];
#else // _TARGET_ARM_
gcPtrs = treeNode->gtGcPtrs;
gcPtrCount = treeNode->gtNumSlots;
#endif // _TARGET_ARM_
}

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you explain why this is ARM32-specific? Why does ARM64 do something different here?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

treeNode->gtGcPtrs and treeNode->gtNumSlots can be used when FEATURE_PUT_STRUCT_ARG_STK is set, but ARM64 is not use this feature.

else // addrNode is used
{
Expand All @@ -662,13 +668,15 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
genConsumeAddress(addrNode);
addrReg = addrNode->gtRegNum;

#ifdef _TARGET_ARM64_
// If addrReg equal to loReg, swap(loReg, hiReg)
// This reduces code complexity by only supporting one addrReg overwrite case
if (loReg == addrReg)
{
loReg = hiReg;
hiReg = addrReg;
}
#endif // _TARGET_ARM64_

CORINFO_CLASS_HANDLE objClass = source->gtObj.gtClass;

Expand All @@ -683,21 +691,24 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
{
noway_assert(gcPtrCount == 0);
}
#ifdef _TARGET_ARM64_
else
{
noway_assert(structSize <= 2 * TARGET_POINTER_SIZE);
}

noway_assert(structSize <= MAX_PASS_MULTIREG_BYTES);

// For a >= 16-byte structSize we will generate a ldp and stp instruction each loop
// ldp x2, x3, [x0]
// stp x2, x3, [sp, #16]
#endif // _TARGET_ARM64_

int remainingSize = structSize;
unsigned structOffset = 0;
unsigned nextIndex = 0;

#ifdef _TARGET_ARM64_
// For a >= 16-byte structSize we will generate a ldp and stp instruction each loop
// ldp x2, x3, [x0]
// stp x2, x3, [sp, #16]

while (remainingSize >= 2 * TARGET_POINTER_SIZE)
{
var_types type0 = compiler->getJitGCType(gcPtrs[nextIndex + 0]);
Expand Down Expand Up @@ -730,20 +741,52 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
structOffset += (2 * TARGET_POINTER_SIZE);
nextIndex += 2;
}
#else // _TARGET_ARM_
// For a >= 4 byte structSize we will generate a ldr and str instruction each loop
// ldr r2, [r0]
// str r2, [sp, #16]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about str r2, [sp, #4] ?
Because we are going to increase offset by TARGET_POINTER_SIZE for ARM32 here.

Copy link
Member

@hqueue hqueue May 18, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or does the offset start from 16 and increase by 4 ?

Copy link
Author

@hseok-oh hseok-oh May 18, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's not important what the offset value is. It is just example. This offset value isn't offset of this struct argument. It means stack offset in all of arguments for method.

while (remainingSize >= TARGET_POINTER_SIZE)
{
var_types type = compiler->getJitGCType(gcPtrs[nextIndex]);

if (varNode != nullptr)
{
// Load from our varNumImp source
emit->emitIns_R_S(INS_ldr, emitTypeSize(type), loReg, varNumInp, 0);
}
else
{
// check for case of destroying the addrRegister while we still need it
assert(loReg != addrReg);
noway_assert(remainingSize == TARGET_POINTER_SIZE);

// Load from our address expression source
emit->emitIns_R_R_I(INS_ldr, emitTypeSize(type), loReg, addrReg, structOffset);
}

// Emit str instruction to store the register into the outgoing argument area
emit->emitIns_S_R(INS_str, emitTypeSize(type), loReg, varNumOut, argOffsetOut);
argOffsetOut += TARGET_POINTER_SIZE; // We stored 4-bytes of the struct
assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area

remainingSize -= TARGET_POINTER_SIZE; // We loaded 4-bytes of the struct
structOffset += TARGET_POINTER_SIZE;
nextIndex += 1;
}
#endif // _TARGET_ARM_

// For a 12-byte structSize we will we will generate two load instructions
// ldr x2, [x0]
// ldr w3, [x0, #8]
// str x2, [sp, #16]
// str w3, [sp, #24]

var_types nextType = compiler->getJitGCType(gcPtrs[nextIndex]);
emitAttr nextAttr = emitTypeSize(nextType);

while (remainingSize > 0)
{
if (remainingSize >= TARGET_POINTER_SIZE)
{
var_types nextType = compiler->getJitGCType(gcPtrs[nextIndex]);
emitAttr nextAttr = emitTypeSize(nextType);
remainingSize -= TARGET_POINTER_SIZE;

if (varNode != nullptr)
Expand All @@ -765,8 +808,6 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)

structOffset += TARGET_POINTER_SIZE;
nextIndex++;
nextType = compiler->getJitGCType(gcPtrs[nextIndex]);
nextAttr = emitTypeSize(nextType);
}
else // (remainingSize < TARGET_POINTER_SIZE)
{
Expand All @@ -777,7 +818,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
assert(varNode == nullptr);

// the left over size is smaller than a pointer and thus can never be a GC type
assert(varTypeIsGC(nextType) == false);
assert(varTypeIsGC(compiler->getJitGCType(gcPtrs[nextIndex])) == false);

var_types loadType = TYP_UINT;
if (loadSize == 1)
Expand Down Expand Up @@ -807,8 +848,6 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
}
}

#endif // _TARGET_ARM64_
}
}
}
Expand Down
1 change: 1 addition & 0 deletions src/jit/lsraarm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -717,6 +717,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_PINVOKE_PROLOG:
case GT_JCC:
case GT_MEMORYBARRIER:
case GT_OBJ:
info->dstCount = tree->IsValue() ? 1 : 0;
if (kind & (GTK_CONST | GTK_LEAF))
{
Expand Down
5 changes: 5 additions & 0 deletions src/jit/lsraarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -654,8 +654,13 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntr
}
else
{
#ifdef _TARGET_ARM64_
// We could use a ldp/stp sequence so we need two internal registers
argNode->gtLsraInfo.internalIntCount = 2;
#else // _TARGET_ARM_
// We could use a ldr/str sequence so we need a internal register
argNode->gtLsraInfo.internalIntCount = 1;
#endif // _TARGET_ARM_

if (putArgChild->OperGet() == GT_OBJ)
{
Expand Down