Skip to content

[JIT] Enable conditional chaining for Intel APX #111072

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Apr 3, 2025
48 changes: 4 additions & 44 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -965,7 +965,7 @@ class CodeGen final : public CodeGenInterface
void genIntToFloatCast(GenTree* treeNode);
void genCkfinite(GenTree* treeNode);
void genCodeForCompare(GenTreeOp* tree);
#ifdef TARGET_ARM64
#if defined(TARGET_ARM64) || defined(TARGET_AMD64)
void genCodeForCCMP(GenTreeCCMP* ccmp);
#endif
void genCodeForSelect(GenTreeOp* select);
Expand Down Expand Up @@ -1706,53 +1706,13 @@ class CodeGen final : public CodeGenInterface
static insOpts ShiftOpToInsOpts(genTreeOps op);
#elif defined(TARGET_XARCH)
static instruction JumpKindToCmov(emitJumpKind condition);
static instruction JumpKindToCcmp(emitJumpKind condition);
static insOpts OptsFromCFlags(insCflags flags);
#endif

#if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
// Maps a GenCondition code to a sequence of conditional jumps or other conditional instructions
// such as X86's SETcc. A sequence of instructions rather than just a single one is required for
// certain floating point conditions.
// For example, X86's UCOMISS sets ZF to indicate equality but it also sets it, together with PF,
// to indicate an unordered result. So for GenCondition::FEQ we first need to check if PF is 0
// and then jump if ZF is 1:
// JP fallThroughBlock
// JE jumpDestBlock
// fallThroughBlock:
// ...
// jumpDestBlock:
//
// This is very similar to the way shortcircuit evaluation of bool AND and OR operators works so
// in order to make the GenConditionDesc mapping tables easier to read, a bool expression-like
// pattern is used to encode the above:
// { EJ_jnp, GT_AND, EJ_je }
// { EJ_jp, GT_OR, EJ_jne }
//
// For more details check inst_JCC and inst_SETCC functions.
//
struct GenConditionDesc
{
emitJumpKind jumpKind1;
genTreeOps oper;
emitJumpKind jumpKind2;
char padTo4Bytes;

static const GenConditionDesc& Get(GenCondition condition)
{
assert(condition.GetCode() < ArrLen(map));
const GenConditionDesc& desc = map[condition.GetCode()];
assert(desc.jumpKind1 != EJ_NONE);
assert((desc.oper == GT_NONE) || (desc.oper == GT_AND) || (desc.oper == GT_OR));
assert((desc.oper == GT_NONE) == (desc.jumpKind2 == EJ_NONE));
return desc;
}

private:
static const GenConditionDesc map[32];
};

void inst_JCC(GenCondition condition, BasicBlock* target);
void inst_SETCC(GenCondition condition, var_types type, regNumber dstReg);

#if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
void genCodeForJcc(GenTreeCC* tree);
void genCodeForSetcc(GenTreeCC* setcc);
void genCodeForJTrue(GenTreeOp* jtrue);
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/codegenarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4162,7 +4162,7 @@ void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize,
}

// clang-format off
const CodeGen::GenConditionDesc CodeGen::GenConditionDesc::map[32]
const GenConditionDesc GenConditionDesc::map[32]
{
{ }, // NONE
{ }, // 1
Expand Down
43 changes: 43 additions & 0 deletions src/coreclr/jit/codegeninterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -828,4 +828,47 @@ class CodeGenInterface
#endif
};

#if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
// Maps a GenCondition code to a sequence of conditional jumps or other conditional instructions
// such as X86's SETcc. A sequence of instructions rather than just a single one is required for
// certain floating point conditions.
// For example, X86's UCOMISS sets ZF to indicate equality but it also sets it, together with PF,
// to indicate an unordered result. So for GenCondition::FEQ we first need to check if PF is 0
// and then jump if ZF is 1:
// JP fallThroughBlock
// JE jumpDestBlock
// fallThroughBlock:
// ...
// jumpDestBlock:
//
// This is very similar to the way shortcircuit evaluation of bool AND and OR operators works so
// in order to make the GenConditionDesc mapping tables easier to read, a bool expression-like
// pattern is used to encode the above:
// { EJ_jnp, GT_AND, EJ_je }
// { EJ_jp, GT_OR, EJ_jne }
//
// For more details check inst_JCC and inst_SETCC functions.
//
struct GenConditionDesc
{
emitJumpKind jumpKind1;
genTreeOps oper;
emitJumpKind jumpKind2;
char padTo4Bytes;

static const GenConditionDesc& Get(GenCondition condition)
{
assert(condition.GetCode() < ArrLen(map));
const GenConditionDesc& desc = map[condition.GetCode()];
assert(desc.jumpKind1 != EJ_NONE);
assert((desc.oper == GT_NONE) || (desc.oper == GT_AND) || (desc.oper == GT_OR));
assert((desc.oper == GT_NONE) == (desc.jumpKind2 == EJ_NONE));
return desc;
}

private:
static const GenConditionDesc map[32];
};
#endif // !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)

#endif // _CODEGEN_INTERFACE_H_
126 changes: 125 additions & 1 deletion src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1573,6 +1573,46 @@ instruction CodeGen::JumpKindToCmov(emitJumpKind condition)
return s_table[condition];
}

//------------------------------------------------------------------------
// JumpKindToCcmp:
// Convert an emitJumpKind to the corresponding ccmp instruction.
//
// Arguments:
// condition - the condition
//
// Returns:
// A ccmp instruction.
//
instruction CodeGen::JumpKindToCcmp(emitJumpKind condition)
{
static constexpr instruction s_table[EJ_COUNT] = {
INS_none, INS_none, INS_ccmpo, INS_ccmpno, INS_ccmpb, INS_ccmpae, INS_ccmpe, INS_ccmpne, INS_ccmpbe,
INS_ccmpa, INS_ccmps, INS_ccmpns, INS_none, INS_none, INS_ccmpl, INS_ccmpge, INS_ccmple, INS_ccmpg,
};

static_assert_no_msg(s_table[EJ_NONE] == INS_none);
static_assert_no_msg(s_table[EJ_jmp] == INS_none);
static_assert_no_msg(s_table[EJ_jo] == INS_ccmpo);
static_assert_no_msg(s_table[EJ_jno] == INS_ccmpno);
static_assert_no_msg(s_table[EJ_jb] == INS_ccmpb);
static_assert_no_msg(s_table[EJ_jae] == INS_ccmpae);
static_assert_no_msg(s_table[EJ_je] == INS_ccmpe);
static_assert_no_msg(s_table[EJ_jne] == INS_ccmpne);
static_assert_no_msg(s_table[EJ_jbe] == INS_ccmpbe);
static_assert_no_msg(s_table[EJ_ja] == INS_ccmpa);
static_assert_no_msg(s_table[EJ_js] == INS_ccmps);
static_assert_no_msg(s_table[EJ_jns] == INS_ccmpns);
static_assert_no_msg(s_table[EJ_jp] == INS_none);
static_assert_no_msg(s_table[EJ_jnp] == INS_none);
static_assert_no_msg(s_table[EJ_jl] == INS_ccmpl);
static_assert_no_msg(s_table[EJ_jge] == INS_ccmpge);
static_assert_no_msg(s_table[EJ_jle] == INS_ccmple);
static_assert_no_msg(s_table[EJ_jg] == INS_ccmpg);

assert((condition >= EJ_NONE) && (condition < EJ_COUNT));
return s_table[condition];
}

//------------------------------------------------------------------------
// genCodeForCompare: Produce code for a GT_SELECT/GT_SELECTCC node.
//
Expand Down Expand Up @@ -1669,7 +1709,7 @@ void CodeGen::genCodeForSelect(GenTreeOp* select)
}

// clang-format off
const CodeGen::GenConditionDesc CodeGen::GenConditionDesc::map[32]
const GenConditionDesc GenConditionDesc::map[32]
{
{ }, // NONE
{ }, // 1
Expand Down Expand Up @@ -2270,6 +2310,12 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
// Do nothing; these nodes are simply markers for debug info.
break;

#if defined(TARGET_AMD64)
case GT_CCMP:
genCodeForCCMP(treeNode->AsCCMP());
break;
#endif
Comment on lines +2313 to +2317
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What prevents this from being supported on x86?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIUC, APX is only available in Intel 64-bit mode.


default:
{
#ifdef DEBUG
Expand Down Expand Up @@ -8926,6 +8972,84 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize,
regSet.verifyRegistersUsed(killMask);
}

//-----------------------------------------------------------------------------------------
// OptsFromCFlags - Convert condition flags into approxpriate insOpts.
//
// Arguments:
// flags - The condition flags to be converted.
//
// Return Value:
// An insOpts value encoding the condition flags.
//
// Notes:
// This function maps the condition flags (e.g., CF, ZF, SF, OF) to the appropriate
// instruction options used for setting the default flag values in extneded EVEX
// encoding conditional instructions.
//
insOpts CodeGen::OptsFromCFlags(insCflags flags)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add standard function header comments to this function, and the next one.

{
unsigned opts = 0x0;
if (flags & INS_FLAGS_CF)
opts |= INS_OPTS_EVEX_dfv_cf;
if (flags & INS_FLAGS_ZF)
opts |= INS_OPTS_EVEX_dfv_zf;
if (flags & INS_FLAGS_SF)
opts |= INS_OPTS_EVEX_dfv_sf;
if (flags & INS_FLAGS_OF)
opts |= INS_OPTS_EVEX_dfv_of;
return (insOpts)opts;
}

#ifdef TARGET_AMD64

//-----------------------------------------------------------------------------------------
// genCodeForCCMP - Generate code for a conditional compare (CCMP) node.
//
// Arguments:
// ccmp - The GenTreeCCMP node representing the conditional compare.
//
// Return Value:
// None.
//
// Notes:
// This function generates code for a conditional compare operation. On X86,
// comparisons using the extended EVEX encoding and ccmp instruction.
void CodeGen::genCodeForCCMP(GenTreeCCMP* ccmp)
{
emitter* emit = GetEmitter();
assert(emit->UsePromotedEVEXEncoding());

genConsumeOperands(ccmp);
GenTree* op1 = ccmp->gtGetOp1();
GenTree* op2 = ccmp->gtGetOp2();
var_types op1Type = genActualType(op1->TypeGet());
var_types op2Type = genActualType(op2->TypeGet());
emitAttr cmpSize = emitActualTypeSize(op1Type);
regNumber srcReg1 = op1->GetRegNum();

// No float support or swapping op1 and op2 to generate cmp reg, imm.
assert(!varTypeIsFloating(op2Type));
assert(!op1->isContainedIntOrIImmed());

// For the ccmp flags, invert the condition of the compare.
// For the condition, use the previous compare.
const GenConditionDesc& condDesc = GenConditionDesc::Get(ccmp->gtCondition);
instruction ccmpIns = JumpKindToCcmp(condDesc.jumpKind1);
insOpts opts = OptsFromCFlags(ccmp->gtFlagsVal);

if (op2->isContainedIntOrIImmed())
{
GenTreeIntConCommon* intConst = op2->AsIntConCommon();
emit->emitIns_R_I(ccmpIns, cmpSize, srcReg1, (int)intConst->IconValue(), opts);
}
else
{
regNumber srcReg2 = op2->GetRegNum();
emit->emitIns_R_R(ccmpIns, cmpSize, srcReg1, srcReg2, opts);
}
}
#endif // TARGET_AMD64

#if defined(DEBUG) && defined(TARGET_AMD64)

/*****************************************************************************
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -7013,7 +7013,7 @@ class Compiler
PhaseStatus optOptimizeBools();
PhaseStatus optRecognizeAndOptimizeSwitchJumps();
bool optSwitchConvert(BasicBlock* firstBlock, int testsCount, ssize_t* testValues, weight_t falseLikelihood, GenTree* nodeToTest);
bool optSwitchDetectAndConvert(BasicBlock* firstBlock);
bool optSwitchDetectAndConvert(BasicBlock* firstBlock, bool testingForConversion = false);

PhaseStatus optInvertLoops(); // Invert loops so they're entered at top and tested at bottom.
PhaseStatus optOptimizeFlow(); // Simplify flow graph and do tail duplication
Expand Down
8 changes: 8 additions & 0 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20116,6 +20116,14 @@ emitter::insFormat emitter::ExtractMemoryFormat(insFormat insFmt) const
return IF_NONE;
}

#ifdef TARGET_AMD64
// true if this 'imm' can be encoded as a input operand to a ccmp instruction
/*static*/ bool emitter::emitIns_valid_imm_for_ccmp(INT64 imm)
{
return (((INT32)imm) == imm);
}
#endif

#if defined(DEBUG) || defined(LATE_DISASM)

//----------------------------------------------------------------------------------------
Expand Down
5 changes: 5 additions & 0 deletions src/coreclr/jit/emitxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -1327,4 +1327,9 @@ inline bool HasExtendedGPReg(const instrDesc* id) const;

inline bool HasMaskReg(const instrDesc* id) const;

#ifdef TARGET_AMD64
// true if this 'imm' can be encoded as a input operand to a ccmp instruction
static bool emitIns_valid_imm_for_ccmp(INT64 imm);
#endif // TARGET_AMD64

#endif // TARGET_XARCH
2 changes: 1 addition & 1 deletion src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -9417,7 +9417,7 @@ enum insCC : unsigned
};
#endif

#if defined(TARGET_ARM64)
#if defined(TARGET_ARM64) || defined(TARGET_AMD64)
struct GenTreeCCMP final : public GenTreeOpCC
{
insCflags gtFlagsVal;
Expand Down
9 changes: 7 additions & 2 deletions src/coreclr/jit/gtlist.h
Original file line number Diff line number Diff line change
Expand Up @@ -245,11 +245,16 @@ GTNODE(JCC , GenTreeCC ,0,0,GTK_LEAF|GTK_NOVALUE|DBK_NOTHI
GTNODE(SETCC , GenTreeCC ,0,0,GTK_LEAF|DBK_NOTHIR)
// Variant of SELECT that reuses flags computed by a previous node with the specified condition.
GTNODE(SELECTCC , GenTreeOpCC ,0,0,GTK_BINOP|DBK_NOTHIR)
#ifdef TARGET_ARM64
// The arm64 ccmp instruction. If the specified condition is true, compares two

#if defined(TARGET_ARM64) || defined(TARGET_AMD64)
// The arm64 and x86 ccmp instruction. If the specified condition is true, compares two
// operands and sets the condition flags according to the result. Otherwise
// sets the condition flags to the specified immediate value.
GTNODE(CCMP , GenTreeCCMP ,0,0,GTK_BINOP|GTK_NOVALUE|DBK_NOTHIR)
#endif


#ifdef TARGET_ARM64
// Maps to arm64 csinc/cinc instruction. Computes result = condition ? op1 : op2 + 1.
// If op2 is null, computes result = condition ? op1 + 1 : op1.
GTNODE(SELECT_INC , GenTreeOp ,0,0,GTK_BINOP|DBK_NOTHIR)
Expand Down
4 changes: 3 additions & 1 deletion src/coreclr/jit/gtstructs.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,10 @@ GTSTRUCT_1(AllocObj , GT_ALLOCOBJ)
GTSTRUCT_1(RuntimeLookup, GT_RUNTIMELOOKUP)
GTSTRUCT_1(ArrAddr , GT_ARR_ADDR)
GTSTRUCT_2(CC , GT_JCC, GT_SETCC)
#ifdef TARGET_ARM64
#if defined(TARGET_ARM64) || defined(TARGET_AMD64)
GTSTRUCT_1(CCMP , GT_CCMP)
#endif
#ifdef TARGET_ARM64
GTSTRUCT_N(OpCC , GT_SELECTCC, GT_SELECT_INCCC, GT_JCMP, GT_JTEST, GT_SELECT_INVCC, GT_SELECT_NEGCC)
#else
GTSTRUCT_3(OpCC , GT_SELECTCC, GT_JCMP, GT_JTEST)
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/jitconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,7 @@ RELEASE_CONFIG_INTEGER(EnableRiscV64Zbb, "EnableRiscV64Zbb",
RELEASE_CONFIG_INTEGER(EnableEmbeddedBroadcast, "EnableEmbeddedBroadcast", 1) // Allows embedded broadcasts to be disabled
RELEASE_CONFIG_INTEGER(EnableEmbeddedMasking, "EnableEmbeddedMasking", 1) // Allows embedded masking to be disabled
RELEASE_CONFIG_INTEGER(EnableApxNDD, "EnableApxNDD", 0) // Allows APX NDD feature to be disabled
RELEASE_CONFIG_INTEGER(EnableApxConditionalChaining, "EnableApxConditionalChaining", 0) // Allows APX conditional compare chaining
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we really need a release knob for this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think at the moment, it's good to have as a knob until we are able to tune it on APX hardware (if needed).


// clang-format on

Expand Down
Loading
Loading