Skip to content

[RISC-V] Optimize comparisons #115039

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
May 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
266 changes: 85 additions & 181 deletions src/coreclr/jit/codegenriscv64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3123,6 +3123,7 @@ void CodeGen::genCodeForCompare(GenTreeOp* tree)
assert(!op2->isUsedFromMemory());

emitAttr cmpSize = EA_ATTR(genTypeSize(op1Type));
assert(cmpSize == EA_4BYTE || cmpSize == EA_8BYTE);

assert(genTypeSize(op1Type) == genTypeSize(op2Type));

Expand Down Expand Up @@ -3205,192 +3206,126 @@ void CodeGen::genCodeForCompare(GenTreeOp* tree)
{
ssize_t imm = op2->AsIntCon()->gtIconVal;

switch (cmpSize)
bool useAddSub = !(!tree->OperIs(GT_EQ, GT_NE) || (imm == -2048));
bool useShiftRight =
!isUnsigned && ((tree->OperIs(GT_LT) && (imm == 0)) || (tree->OperIs(GT_LE) && (imm == -1)));
bool useLoadImm = isUnsigned && ((tree->OperIs(GT_LT, GT_GE) && (imm == 0)) ||
(tree->OperIs(GT_LE, GT_GT) && (imm == -1)));

if (cmpSize == EA_4BYTE)
{
case EA_4BYTE:
if (!useAddSub && !useShiftRight && !useLoadImm)
{
regNumber tmpRegOp1 = internalRegisters.GetSingle(tree);
assert(regOp1 != tmpRegOp1);
if (isUnsigned)
{
imm = static_cast<uint32_t>(imm);
emit->emitIns_R_R_I(INS_slli, EA_8BYTE, tmpRegOp1, regOp1, 32);
emit->emitIns_R_R_I(INS_srli, EA_8BYTE, tmpRegOp1, tmpRegOp1, 32);
}
else
{
imm = static_cast<int32_t>(imm);
emit->emitIns_R_R(INS_sext_w, EA_8BYTE, tmpRegOp1, regOp1);
}
imm = static_cast<int32_t>(imm);
emit->emitIns_R_R(INS_sext_w, EA_8BYTE, tmpRegOp1, regOp1);
regOp1 = tmpRegOp1;
break;
}
case EA_8BYTE:
break;
default:
unreached();
}

if (tree->OperIs(GT_LT))
{
if (!isUnsigned && emitter::isValidSimm12(imm))
{
emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm);
}
else if (isUnsigned && emitter::isValidUimm11(imm))
{
emit->emitIns_R_R_I(INS_sltiu, EA_PTRSIZE, targetReg, regOp1, imm);
}
else
{
emit->emitLoadImmediate(EA_PTRSIZE, REG_RA, imm);
emit->emitIns_R_R_R(isUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, regOp1, REG_RA);
}
}
else if (tree->OperIs(GT_LE))
{
if (!isUnsigned && emitter::isValidSimm12(imm + 1))
{
emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm + 1);
}
else if (isUnsigned && emitter::isValidUimm11(imm + 1) && (imm != (~0)))
{
emit->emitIns_R_R_I(INS_sltiu, EA_PTRSIZE, targetReg, regOp1, imm + 1);
}
else
{
emit->emitLoadImmediate(EA_PTRSIZE, REG_RA, imm + 1);
emit->emitIns_R_R_R(isUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, regOp1, REG_RA);
}
}
else if (tree->OperIs(GT_GT))
if (tree->OperIs(GT_EQ, GT_NE))
{
if (!isUnsigned && emitter::isValidSimm12(imm + 1))
if ((imm != 0) || (cmpSize == EA_4BYTE))
{
emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm + 1);
emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1);
instruction diff = INS_xori;
if (imm != -2048)
{
assert(useAddSub);
diff = (cmpSize == EA_4BYTE) ? INS_addiw : INS_addi;
imm = -imm;
}
emit->emitIns_R_R_I(diff, cmpSize, targetReg, regOp1, imm);
regOp1 = targetReg;
}
else if (isUnsigned && emitter::isValidUimm11(imm + 1) && (imm != (~0)))
assert(emitter::isValidSimm12(imm));

if (tree->OperIs(GT_EQ))
{
emit->emitIns_R_R_I(INS_sltiu, EA_PTRSIZE, targetReg, regOp1, imm + 1);
emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1);
emit->emitIns_R_R_I(INS_sltiu, EA_PTRSIZE, targetReg, regOp1, 1);
}
else
{
emit->emitLoadImmediate(EA_PTRSIZE, REG_RA, imm);
emit->emitIns_R_R_R(isUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, REG_RA, regOp1);
assert(tree->OperIs(GT_NE));
emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_ZERO, regOp1);
}
}
else if (tree->OperIs(GT_GE))
else
{
if (!isUnsigned && emitter::isValidSimm12(imm))
{
emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm);
}
else if (isUnsigned && emitter::isValidUimm11(imm))
{
emit->emitIns_R_R_I(INS_sltiu, EA_PTRSIZE, targetReg, regOp1, imm);
}
else
assert(tree->OperIs(GT_LT, GT_LE, GT_GT, GT_GE));
if (useLoadImm)
{
emit->emitLoadImmediate(EA_PTRSIZE, REG_RA, imm);
emit->emitIns_R_R_R(isUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, regOp1, REG_RA);
// unsigned (a <= ~0), (a >= 0) / (a > ~0), (a < 0) is always true / false
imm = tree->OperIs(GT_GE, GT_LE) ? 1 : 0;
emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, targetReg, REG_ZERO, imm);
}
emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1);
}
else if (tree->OperIs(GT_NE))
{
if (!imm)
else if (useShiftRight)
{
emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, regOp1);
// signed (a < 0) or (a <= -1) is just the sign bit
instruction srli = (cmpSize == EA_4BYTE) ? INS_srliw : INS_srli;
emit->emitIns_R_R_I(srli, cmpSize, targetReg, regOp1, cmpSize * 8 - 1);
}
else if (emitter::isValidUimm12(imm))
else if ((tree->OperIs(GT_GT) && (imm == 0)) || (tree->OperIs(GT_GE) && (imm == 1)))
{
emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, regOp1, imm);
emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, targetReg);
instruction slt = isUnsigned ? INS_sltu : INS_slt;
emit->emitIns_R_R_R(slt, EA_PTRSIZE, targetReg, REG_ZERO, regOp1);
}
else
{
emit->emitLoadImmediate(EA_PTRSIZE, REG_RA, imm);
emit->emitIns_R_R_R(INS_xor, EA_PTRSIZE, targetReg, regOp1, REG_RA);
emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, targetReg);
}
}
else if (tree->OperIs(GT_EQ))
{
if (!imm)
{
emit->emitIns_R_R_I(INS_sltiu, EA_PTRSIZE, targetReg, regOp1, 1);
}
else if (emitter::isValidUimm12(imm))
{
emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, regOp1, imm);
emit->emitIns_R_R_I(INS_sltiu, EA_PTRSIZE, targetReg, targetReg, 1);
}
else
{
emit->emitLoadImmediate(EA_PTRSIZE, REG_RA, imm);
emit->emitIns_R_R_R(INS_xor, EA_PTRSIZE, targetReg, regOp1, REG_RA);
emit->emitIns_R_R_I(INS_sltiu, EA_PTRSIZE, targetReg, targetReg, 1);
instruction slti = isUnsigned ? INS_sltiu : INS_slti;
if (tree->OperIs(GT_LE, GT_GT))
imm += 1;
assert(emitter::isValidSimm12(imm));
assert(!isUnsigned || (imm != 0)); // should be handled in useLoadImm

emit->emitIns_R_R_I(slti, EA_PTRSIZE, targetReg, regOp1, imm);

if (tree->OperIs(GT_GT, GT_GE))
emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1);
}
}
}
else
{
regNumber regOp2 = op2->GetRegNum();

if (cmpSize == EA_4BYTE)
if (tree->OperIs(GT_EQ, GT_NE))
{
regNumber tmpRegOp1 = REG_RA;
regNumber tmpRegOp2 = internalRegisters.GetSingle(tree);
assert(regOp1 != tmpRegOp2);
assert(regOp2 != tmpRegOp2);

if (isUnsigned)
instruction sub = (cmpSize == EA_4BYTE) ? INS_subw : INS_sub;
emit->emitIns_R_R_R(sub, EA_PTRSIZE, targetReg, regOp1, regOp2);
if (tree->OperIs(GT_EQ))
{
emit->emitIns_R_R_I(INS_slli, EA_8BYTE, tmpRegOp1, regOp1, 32);
emit->emitIns_R_R_I(INS_srli, EA_8BYTE, tmpRegOp1, tmpRegOp1, 32);

emit->emitIns_R_R_I(INS_slli, EA_8BYTE, tmpRegOp2, regOp2, 32);
emit->emitIns_R_R_I(INS_srli, EA_8BYTE, tmpRegOp2, tmpRegOp2, 32);
emit->emitIns_R_R_I(INS_sltiu, EA_PTRSIZE, targetReg, targetReg, 1);
}
else
{
emit->emitIns_R_R_I(INS_slliw, EA_8BYTE, tmpRegOp1, regOp1, 0);
emit->emitIns_R_R_I(INS_slliw, EA_8BYTE, tmpRegOp2, regOp2, 0);
assert(tree->OperIs(GT_NE));
emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_ZERO, targetReg);
}

regOp1 = tmpRegOp1;
regOp2 = tmpRegOp2;
}

if (tree->OperIs(GT_LT))
{
emit->emitIns_R_R_R(isUnsigned ? INS_sltu : INS_slt, EA_8BYTE, targetReg, regOp1, regOp2);
}
else if (tree->OperIs(GT_LE))
{
emit->emitIns_R_R_R(isUnsigned ? INS_sltu : INS_slt, EA_8BYTE, targetReg, regOp2, regOp1);
emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1);
}
else if (tree->OperIs(GT_GT))
{
emit->emitIns_R_R_R(isUnsigned ? INS_sltu : INS_slt, EA_8BYTE, targetReg, regOp2, regOp1);
}
else if (tree->OperIs(GT_GE))
{
emit->emitIns_R_R_R(isUnsigned ? INS_sltu : INS_slt, EA_8BYTE, targetReg, regOp1, regOp2);
emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1);
}
else if (tree->OperIs(GT_NE))
{
emit->emitIns_R_R_R(INS_xor, EA_PTRSIZE, targetReg, regOp1, regOp2);
emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, targetReg);
}
else if (tree->OperIs(GT_EQ))
else
{
emit->emitIns_R_R_R(INS_xor, EA_PTRSIZE, targetReg, regOp1, regOp2);
emit->emitIns_R_R_I(INS_sltiu, EA_PTRSIZE, targetReg, targetReg, 1);
assert(tree->OperIs(GT_LT, GT_LE, GT_GT, GT_GE));
if (cmpSize == EA_4BYTE)
{
regNumber tmpRegOp1 = REG_RA;
regNumber tmpRegOp2 = internalRegisters.GetSingle(tree);
assert(regOp1 != tmpRegOp2);
assert(regOp2 != tmpRegOp2);
emit->emitIns_R_R(INS_sext_w, EA_8BYTE, tmpRegOp1, regOp1);
emit->emitIns_R_R(INS_sext_w, EA_8BYTE, tmpRegOp2, regOp2);
regOp1 = tmpRegOp1;
regOp2 = tmpRegOp2;
}

instruction slt = isUnsigned ? INS_sltu : INS_slt;
if (tree->OperIs(GT_LE, GT_GT))
std::swap(regOp1, regOp2);

emit->emitIns_R_R_R(slt, EA_8BYTE, targetReg, regOp1, regOp2);

if (tree->OperIs(GT_LE, GT_GE))
emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1);
}
}
}
Expand Down Expand Up @@ -3452,19 +3387,8 @@ void CodeGen::genCodeForJumpCompare(GenTreeOpCC* tree)
{
regNumber tmpRegOp1 = rsGetRsvdReg();
assert(regOp1 != tmpRegOp1);
if (cond.IsUnsigned())
{
imm = static_cast<uint32_t>(imm);

assert(regOp1 != tmpRegOp1);
emit->emitIns_R_R_I(INS_slli, EA_8BYTE, tmpRegOp1, regOp1, 32);
emit->emitIns_R_R_I(INS_srli, EA_8BYTE, tmpRegOp1, tmpRegOp1, 32);
}
else
{
imm = static_cast<int32_t>(imm);
emit->emitIns_R_R(INS_sext_w, EA_8BYTE, tmpRegOp1, regOp1);
}
imm = static_cast<int32_t>(imm);
emit->emitIns_R_R(INS_sext_w, EA_8BYTE, tmpRegOp1, regOp1);
regOp1 = tmpRegOp1;
break;
}
Expand Down Expand Up @@ -3500,15 +3424,7 @@ void CodeGen::genCodeForJumpCompare(GenTreeOpCC* tree)
{
regNumber tmpRegOp1 = rsGetRsvdReg();
assert(regOp1 != tmpRegOp1);
if (cond.IsUnsigned())
{
emit->emitIns_R_R_I(INS_slli, EA_8BYTE, tmpRegOp1, regOp1, 32);
emit->emitIns_R_R_I(INS_srli, EA_8BYTE, tmpRegOp1, tmpRegOp1, 32);
}
else
{
emit->emitIns_R_R(INS_sext_w, EA_8BYTE, tmpRegOp1, regOp1);
}
emit->emitIns_R_R(INS_sext_w, EA_8BYTE, tmpRegOp1, regOp1);
regOp1 = tmpRegOp1;
}
}
Expand Down Expand Up @@ -3557,20 +3473,8 @@ void CodeGen::genCodeForJumpCompare(GenTreeOpCC* tree)
regNumber tmpRegOp2 = rsGetRsvdReg();
assert(regOp1 != tmpRegOp2);
assert(regOp2 != tmpRegOp2);

if (cond.IsUnsigned())
{
emit->emitIns_R_R_I(INS_slli, EA_8BYTE, tmpRegOp1, regOp1, 32);
emit->emitIns_R_R_I(INS_srli, EA_8BYTE, tmpRegOp1, tmpRegOp1, 32);
emit->emitIns_R_R_I(INS_slli, EA_8BYTE, tmpRegOp2, regOp2, 32);
emit->emitIns_R_R_I(INS_srli, EA_8BYTE, tmpRegOp2, tmpRegOp2, 32);
}
else
{
emit->emitIns_R_R_I(INS_slliw, EA_8BYTE, tmpRegOp1, regOp1, 0);
emit->emitIns_R_R_I(INS_slliw, EA_8BYTE, tmpRegOp2, regOp2, 0);
}

emit->emitIns_R_R(INS_sext_w, EA_8BYTE, tmpRegOp1, regOp1);
emit->emitIns_R_R(INS_sext_w, EA_8BYTE, tmpRegOp2, regOp2);
regOp1 = tmpRegOp1;
regOp2 = tmpRegOp2;
}
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/emitriscv64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5492,7 +5492,7 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,

// TODO-RISCV64-CQ: here sign-extend dst when deal with 32bit data is too conservative.
if (EA_SIZE(attr) == EA_4BYTE)
emitIns_R_R_I(INS_slliw, attr, dstReg, dstReg, 0);
emitIns_R_R(INS_sext_w, attr, dstReg, dstReg);
}
break;

Expand Down
14 changes: 8 additions & 6 deletions src/coreclr/jit/lowerriscv64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,17 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const

switch (parentNode->OperGet())
{
case GT_ADD:
case GT_EQ:
case GT_NE:
return emitter::isValidSimm12(-immVal) || (immVal == -2048);

case GT_LE: // a <= N -> a < N+1
case GT_GT: // a > N -> !(a <= N) -> !(a < N+1)
immVal += 1;
FALLTHROUGH;
case GT_LT:
case GT_LE:
case GT_GE:
case GT_GT:
case GT_ADD:
case GT_AND:
case GT_OR:
case GT_XOR:
Expand All @@ -85,9 +89,7 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const
case GT_XCHG:
case GT_STORE_LCL_FLD:
case GT_STORE_LCL_VAR:
if (immVal == 0)
return true;
break;
return (immVal == 0);

default:
break;
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/lsrabuild.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4611,6 +4611,8 @@ int LinearScan::BuildCmp(GenTree* tree)
assert(tree->OperIsCompare() || tree->OperIs(GT_CMP, GT_TEST, GT_BT));
#elif defined(TARGET_ARM64)
assert(tree->OperIsCompare() || tree->OperIs(GT_CMP, GT_TEST, GT_JCMP, GT_JTEST, GT_CCMP));
#elif defined(TARGET_RISCV64)
assert(tree->OperIsCmpCompare() || tree->OperIs(GT_JCMP));
#else
assert(tree->OperIsCompare() || tree->OperIs(GT_CMP, GT_TEST, GT_JCMP));
#endif
Expand Down
Loading
Loading