Skip to content

Commit 17ed091

Browse files
Enable more general BITCAST folding in local morph (#84144)
* Enable the bitcast transform more generally * Enable general BITCAST support in the backend Correctness fixes: 1) The hacky way which contained indirections were handled will not work for targets where address containment has special rules that depends on the type being loaded. 2) Small-typed sources were not handled correctly. 3) Interference checks. This has some regressions attached to it because we now contain locals with optimizations disabled and that can lead to using double stores instead of integer ones when the user is a store itself.
1 parent 388edb6 commit 17ed091

File tree

7 files changed

+70
-45
lines changed

7 files changed

+70
-45
lines changed

src/coreclr/jit/assertionprop.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5709,6 +5709,7 @@ Compiler::fgWalkResult Compiler::optVNConstantPropCurStmt(BasicBlock* block, Sta
57095709
case GT_RSZ:
57105710
case GT_NEG:
57115711
case GT_CAST:
5712+
case GT_BITCAST:
57125713
case GT_INTRINSIC:
57135714
#ifdef FEATURE_HW_INTRINSICS
57145715
case GT_HWINTRINSIC:

src/coreclr/jit/codegencommon.cpp

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9377,28 +9377,18 @@ void CodeGen::genBitCast(var_types targetType, regNumber targetReg, var_types sr
93779377
//
93789378
void CodeGen::genCodeForBitCast(GenTreeOp* treeNode)
93799379
{
9380+
assert(treeNode->TypeGet() == genActualType(treeNode));
93809381
regNumber targetReg = treeNode->GetRegNum();
93819382
var_types targetType = treeNode->TypeGet();
93829383
GenTree* op1 = treeNode->gtGetOp1();
93839384
genConsumeRegs(op1);
93849385

93859386
if (op1->isContained())
93869387
{
9387-
assert(op1->IsLocal() || op1->isIndir());
9388-
if (genIsRegCandidateLocal(op1))
9389-
{
9390-
unsigned lclNum = op1->AsLclVar()->GetLclNum();
9391-
GetEmitter()->emitIns_R_S(ins_Load(treeNode->TypeGet(), compiler->isSIMDTypeLocalAligned(lclNum)),
9392-
emitTypeSize(treeNode), targetReg, lclNum, 0);
9393-
}
9394-
else
9395-
{
9396-
op1->gtType = treeNode->TypeGet();
9397-
op1->SetRegNum(targetReg);
9398-
op1->ClearContained();
9399-
JITDUMP("Changing type of BITCAST source to load directly.\n");
9400-
genCodeForTreeNode(op1);
9401-
}
9388+
assert(op1->OperIs(GT_LCL_VAR));
9389+
unsigned lclNum = op1->AsLclVarCommon()->GetLclNum();
9390+
instruction loadIns = ins_Load(targetType, compiler->isSIMDTypeLocalAligned(lclNum));
9391+
GetEmitter()->emitIns_R_S(loadIns, emitTypeSize(targetType), targetReg, lclNum, 0);
94029392
}
94039393
else
94049394
{

src/coreclr/jit/compiler.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5937,6 +5937,7 @@ class Compiler
59375937
void fgTryReplaceStructLocalWithField(GenTree* tree);
59385938
GenTree* fgOptimizeCast(GenTreeCast* cast);
59395939
GenTree* fgOptimizeCastOnAssignment(GenTreeOp* asg);
5940+
GenTree* fgOptimizeBitCast(GenTreeUnOp* bitCast);
59405941
GenTree* fgOptimizeEqualityComparisonWithConst(GenTreeOp* cmp);
59415942
GenTree* fgOptimizeRelationalComparisonWithConst(GenTreeOp* cmp);
59425943
GenTree* fgOptimizeRelationalComparisonWithFullRangeConst(GenTreeOp* cmp);

src/coreclr/jit/lclmorph.cpp

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1354,17 +1354,15 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
13541354
}
13551355
#endif // FEATURE_HW_INTRINSICS
13561356

1357-
// Turn this into a narrow-cast if we can.
1358-
if (!isDef && varTypeIsIntegral(indir) && varTypeIsIntegral(varDsc))
1357+
if (!isDef)
13591358
{
1360-
return IndirTransform::NarrowCast;
1361-
}
1359+
if (varTypeIsIntegral(indir) && varTypeIsIntegral(varDsc))
1360+
{
1361+
return IndirTransform::NarrowCast;
1362+
}
13621363

1363-
// Turn this into a bitcast if we can.
1364-
if ((genTypeSize(indir) == genTypeSize(varDsc)) && (varTypeIsFloating(indir) || varTypeIsFloating(varDsc)))
1365-
{
1366-
// TODO-ADDR: enable this optimization for all users and all targets.
1367-
if (user->OperIs(GT_RETURN) && (genTypeSize(indir) <= TARGET_POINTER_SIZE))
1364+
if ((genTypeSize(indir) == genTypeSize(varDsc)) && (genTypeSize(indir) <= TARGET_POINTER_SIZE) &&
1365+
(varTypeIsFloating(indir) || varTypeIsFloating(varDsc)))
13681366
{
13691367
return IndirTransform::BitCast;
13701368
}

src/coreclr/jit/lower.cpp

Lines changed: 16 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -524,11 +524,15 @@ GenTree* Lowering::LowerNode(GenTree* node)
524524
LowerCast(node);
525525
break;
526526

527+
case GT_BITCAST:
528+
ContainCheckBitCast(node);
529+
break;
530+
527531
#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
528532
case GT_BOUNDS_CHECK:
529533
ContainCheckBoundsChk(node->AsBoundsChk());
530534
break;
531-
#endif // TARGET_XARCH
535+
#endif // defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
532536

533537
case GT_ARR_ELEM:
534538
noway_assert(!comp->opts.compJitEarlyExpandMDArrays);
@@ -7659,6 +7663,12 @@ void Lowering::ContainCheckNode(GenTree* node)
76597663
#endif
76607664
ContainCheckShiftRotate(node->AsOp());
76617665
break;
7666+
case GT_CAST:
7667+
ContainCheckCast(node->AsCast());
7668+
break;
7669+
case GT_BITCAST:
7670+
ContainCheckBitCast(node);
7671+
break;
76627672
case GT_ARR_OFFSET:
76637673
ContainCheckArrOffset(node->AsArrOffs());
76647674
break;
@@ -7801,32 +7811,17 @@ void Lowering::ContainCheckRet(GenTreeUnOp* ret)
78017811
void Lowering::ContainCheckBitCast(GenTree* node)
78027812
{
78037813
GenTree* const op1 = node->AsOp()->gtOp1;
7804-
if (op1->isMemoryOp())
7814+
if (op1->OperIs(GT_LCL_VAR) && (genTypeSize(op1) == genTypeSize(node)))
78057815
{
7806-
op1->SetContained();
7807-
}
7808-
else if (op1->OperIs(GT_LCL_VAR))
7809-
{
7810-
if (!m_lsra->willEnregisterLocalVars())
7811-
{
7812-
op1->SetContained();
7813-
}
7814-
const LclVarDsc* varDsc = comp->lvaGetDesc(op1->AsLclVar());
7815-
// TODO-Cleanup: we want to check if the local is already known not
7816-
// to be on reg, for example, because local enreg is disabled.
7817-
if (varDsc->lvDoNotEnregister)
7816+
if (IsContainableMemoryOp(op1) && IsSafeToContainMem(node, op1))
78187817
{
7819-
op1->SetContained();
7818+
MakeSrcContained(node, op1);
78207819
}
7821-
else
7820+
else if (IsSafeToMarkRegOptional(node, op1))
78227821
{
7823-
op1->SetRegOptional();
7822+
MakeSrcRegOptional(node, op1);
78247823
}
78257824
}
7826-
else if (op1->IsLocal())
7827-
{
7828-
op1->SetContained();
7829-
}
78307825
}
78317826

78327827
//------------------------------------------------------------------------

src/coreclr/jit/morph.cpp

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9491,6 +9491,16 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA
94919491
op2 = tree->gtGetOp2IfPresent();
94929492
break;
94939493

9494+
case GT_BITCAST:
9495+
{
9496+
GenTree* optimizedTree = fgOptimizeBitCast(tree->AsUnOp());
9497+
if (optimizedTree != nullptr)
9498+
{
9499+
return optimizedTree;
9500+
}
9501+
}
9502+
break;
9503+
94949504
case GT_EQ:
94959505
case GT_NE:
94969506
// It is not safe to reorder/delete CSE's
@@ -10317,6 +10327,35 @@ GenTree* Compiler::fgOptimizeCastOnAssignment(GenTreeOp* asg)
1031710327
return asg;
1031810328
}
1031910329

10330+
//------------------------------------------------------------------------
10331+
// fgOptimizeBitCast: Optimizes the supplied BITCAST node.
10332+
//
10333+
// Retypes the source node and removes the cast if possible.
10334+
//
10335+
// Arguments:
10336+
// bitCast - the BITCAST node
10337+
//
10338+
// Return Value:
10339+
// The optimized tree or "nullptr" if no transformations were performed.
10340+
//
10341+
GenTree* Compiler::fgOptimizeBitCast(GenTreeUnOp* bitCast)
10342+
{
10343+
if (opts.OptimizationDisabled() || optValnumCSE_phase)
10344+
{
10345+
return nullptr;
10346+
}
10347+
10348+
GenTree* op1 = bitCast->gtGetOp1();
10349+
if (op1->OperIs(GT_IND, GT_LCL_FLD) && (genTypeSize(op1) == genTypeSize(bitCast)))
10350+
{
10351+
op1->ChangeType(bitCast->TypeGet());
10352+
op1->SetVNsFromNode(bitCast);
10353+
return op1;
10354+
}
10355+
10356+
return nullptr;
10357+
}
10358+
1032010359
//------------------------------------------------------------------------
1032110360
// fgOptimizeEqualityComparisonWithConst: optimizes various EQ/NE(OP, CONST) patterns.
1032210361
//

src/coreclr/jit/optcse.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3560,6 +3560,7 @@ bool Compiler::optIsCSEcandidate(GenTree* tree)
35603560
case GT_BSWAP:
35613561
case GT_BSWAP16:
35623562
case GT_CAST:
3563+
case GT_BITCAST:
35633564
return true; // CSE these Unary Operators
35643565

35653566
case GT_SUB:

0 commit comments

Comments
 (0)