Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
05c2eda
[RISC-V] Group arrRef with index
fuad1502 Mar 28, 2025
d29beee
[RISC-V] Add Zba instructions
fuad1502 Mar 28, 2025
0c6d467
[RISC-V] Create Shxadd GenTree
fuad1502 Mar 28, 2025
1885a54
[RISC-V] Lower ADD(LSH) node to SH(X)ADD(.UW) node
fuad1502 Mar 28, 2025
ae27be3
[RISC-V] Utilize SH(X)ADD instruction for GT_INDEX_ADDR
fuad1502 Mar 28, 2025
406dd72
[RISC-V] Fix build error: correct format & add preprocessor directives
fuad1502 Mar 28, 2025
0da57a8
[RISC-V] Update conditions for transforming ADD(LSH) into SHXADD
fuad1502 Mar 29, 2025
796c050
[RISC-V] Update GT_SHXADD* register liveliness
fuad1502 Mar 29, 2025
97361e1
[RISC-V] Guard SHXADD instruction usage with extension check
fuad1502 Mar 29, 2025
074fb75
[RISC-V] Add description comments to SHXADD node and struct
fuad1502 Mar 29, 2025
fac820d
[RISC-V] Add more JIT dumps
fuad1502 Mar 29, 2025
b57b808
[RISC-V] Remove GenTreeShxadd and create separate nodes (SH1ADD, SH1A…
fuad1502 Apr 8, 2025
bf8418f
[RISC-V] Only use SH(X)ADD when ADD is expected, not ADDW.
fuad1502 Apr 8, 2025
e4fbeb3
[RISC-V] Support add.uw instruction.
fuad1502 Apr 8, 2025
0f8f015
[RISC-V] Utilize ADD.UW for zero extension
fuad1502 Apr 8, 2025
4751eac
[RISC-V] Support slli.uw instruction
fuad1502 Apr 9, 2025
6fe8a3e
[RISC-V] Refactor
fuad1502 Apr 9, 2025
8d42b94
[RISC-V] Fix missed optimization: contain slli.uw into sh(x)add.uw
fuad1502 Apr 9, 2025
876e58b
[RISC-V] Remove repeated directive condition
fuad1502 Apr 18, 2025
fb6ead6
Merge branch 'main' into riscv-jit-opt/utilize-shxadd
fuad1502 Apr 18, 2025
1522ba2
[RISC-V] Update comment to reflect changes
fuad1502 Apr 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -841,6 +841,13 @@ class CodeGen final : public CodeGenInterface
int scale RISCV64_ARG(regNumber scaleTempReg));
#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 || TARGET_RISCV64

#if defined(TARGET_RISCV64)
void genCodeForShxadd(GenTreeOp* tree);
void genCodeForAddUw(GenTreeOp* tree);
void genCodeForSlliUw(GenTreeOp* tree);
instruction getShxaddVariant(int scale, bool useUnsignedVariant);
#endif

#if defined(TARGET_ARMARCH)
void genCodeForMulLong(GenTreeOp* mul);
#endif // TARGET_ARMARCH
Expand Down
141 changes: 137 additions & 4 deletions src/coreclr/jit/codegenriscv64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2775,6 +2775,30 @@ instruction CodeGen::genGetInsForOper(GenTree* treeNode)
}
break;

case GT_SH1ADD:
ins = INS_sh1add;
break;

case GT_SH2ADD:
ins = INS_sh2add;
break;

case GT_SH3ADD:
ins = INS_sh3add;
break;

case GT_SH1ADD_UW:
ins = INS_sh1add_uw;
break;

case GT_SH2ADD_UW:
ins = INS_sh2add_uw;
break;

case GT_SH3ADD_UW:
ins = INS_sh3add_uw;
break;

case GT_XOR_NOT:
assert(compiler->compOpportunisticallyDependsOn(InstructionSet_Zbb));
assert(!isImmed(treeNode));
Expand Down Expand Up @@ -4584,6 +4608,23 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
// Do nothing; these nodes are simply markers for debug info.
break;

case GT_SH1ADD:
case GT_SH1ADD_UW:
case GT_SH2ADD:
case GT_SH2ADD_UW:
case GT_SH3ADD:
case GT_SH3ADD_UW:
genCodeForShxadd(treeNode->AsOp());
break;

case GT_ADD_UW:
genCodeForAddUw(treeNode->AsOp());
break;

case GT_SLLI_UW:
genCodeForSlliUw(treeNode->AsOp());
break;

default:
{
#ifdef DEBUG
Expand Down Expand Up @@ -5624,7 +5665,16 @@ void CodeGen::genCodeForIndexAddr(GenTreeIndexAddr* node)
// dest = base + (index << scale)
if (node->gtElemSize <= 64)
{
genScaledAdd(attr, node->GetRegNum(), base->GetRegNum(), index->GetRegNum(), scale, tempReg);
instruction shxaddIns = getShxaddVariant(scale, (genTypeSize(index) == 4));

if (compiler->compOpportunisticallyDependsOn(InstructionSet_Zba) && (shxaddIns != INS_none))
{
GetEmitter()->emitIns_R_R_R(shxaddIns, attr, node->GetRegNum(), index->GetRegNum(), base->GetRegNum());
}
else
{
genScaledAdd(attr, node->GetRegNum(), base->GetRegNum(), index->GetRegNum(), scale, tempReg);
}
}
else
{
Expand Down Expand Up @@ -6428,9 +6478,15 @@ void CodeGen::genIntToIntCast(GenTreeCast* cast)
}

case GenIntCastDesc::ZERO_EXTEND_INT:

emit->emitIns_R_R_I(INS_slli, EA_PTRSIZE, dstReg, srcReg, 32);
emit->emitIns_R_R_I(INS_srli, EA_PTRSIZE, dstReg, dstReg, 32);
if (compiler->compOpportunisticallyDependsOn(InstructionSet_Zba))
{
emit->emitIns_R_R_R(INS_add_uw, EA_PTRSIZE, dstReg, srcReg, REG_R0);
}
else
{
emit->emitIns_R_R_I(INS_slli, EA_PTRSIZE, dstReg, srcReg, 32);
emit->emitIns_R_R_I(INS_srli, EA_PTRSIZE, dstReg, dstReg, 32);
}
break;
case GenIntCastDesc::SIGN_EXTEND_INT:
emit->emitIns_R_R_I(INS_slliw, EA_4BYTE, dstReg, srcReg, 0);
Expand Down Expand Up @@ -6718,6 +6774,83 @@ void CodeGen::genLeaInstruction(GenTreeAddrMode* lea)
genProduceReg(lea);
}

instruction CodeGen::getShxaddVariant(int scale, bool useUnsignedVariant)
{
if (useUnsignedVariant)
{
switch (scale)
{
case 1:
return INS_sh1add_uw;
case 2:
return INS_sh2add_uw;
case 3:
return INS_sh3add_uw;
}
}
else
{
switch (scale)
{
case 1:
return INS_sh1add;
case 2:
return INS_sh2add;
case 3:
return INS_sh3add;
}
}
return INS_none;
}

void CodeGen::genCodeForShxadd(GenTreeOp* tree)
{
instruction ins = genGetInsForOper(tree);

assert(ins == INS_sh1add || ins == INS_sh2add || ins == INS_sh3add || ins == INS_sh1add_uw ||
ins == INS_sh2add_uw || ins == INS_sh3add_uw);

genConsumeOperands(tree);

emitAttr attr = emitActualTypeSize(tree);

GetEmitter()->emitIns_R_R_R(ins, attr, tree->GetRegNum(), tree->gtOp1->GetRegNum(), tree->gtOp2->GetRegNum());

genProduceReg(tree);
}

void CodeGen::genCodeForAddUw(GenTreeOp* tree)
{
assert(tree->gtOper == GT_ADD_UW);

genConsumeOperands(tree);

emitAttr attr = emitActualTypeSize(tree);

GetEmitter()->emitIns_R_R_R(INS_add_uw, attr, tree->GetRegNum(), tree->gtOp1->GetRegNum(),
tree->gtOp2->GetRegNum());

genProduceReg(tree);
}

void CodeGen::genCodeForSlliUw(GenTreeOp* tree)
{
assert(tree->gtOper == GT_SLLI_UW);

genConsumeOperands(tree);

emitAttr attr = emitActualTypeSize(tree);
GenTree* shiftBy = tree->gtOp2;

assert(shiftBy->IsCnsIntOrI());

unsigned shamt = (unsigned)shiftBy->AsIntCon()->gtIconVal;

GetEmitter()->emitIns_R_R_I(INS_slli_uw, attr, tree->GetRegNum(), tree->gtOp1->GetRegNum(), shamt);

genProduceReg(tree);
}

//------------------------------------------------------------------------
// genEstablishFramePointer: Set up the frame pointer by adding an offset to the stack pointer.
//
Expand Down
112 changes: 87 additions & 25 deletions src/coreclr/jit/emitriscv64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -728,7 +728,7 @@ void emitter::emitIns_R_R_I(

if ((INS_addi <= ins && INS_srai >= ins) || (INS_addiw <= ins && INS_sraiw >= ins) ||
(INS_lb <= ins && INS_lhu >= ins) || INS_ld == ins || INS_lw == ins || INS_jalr == ins || INS_fld == ins ||
INS_flw == ins || INS_rori == ins || INS_roriw == ins)
INS_flw == ins || INS_slli_uw == ins || INS_rori == ins || INS_roriw == ins)
{
assert(isGeneralRegister(reg2));
code |= (reg1 & 0x1f) << 7; // rd
Expand Down Expand Up @@ -827,7 +827,7 @@ void emitter::emitIns_R_R_R(
(INS_addw <= ins && ins <= INS_sraw) || (INS_fadd_s <= ins && ins <= INS_fmax_s) ||
(INS_fadd_d <= ins && ins <= INS_fmax_d) || (INS_feq_s <= ins && ins <= INS_fle_s) ||
(INS_feq_d <= ins && ins <= INS_fle_d) || (INS_lr_w <= ins && ins <= INS_amomaxu_d) ||
(INS_rol <= ins && ins <= INS_maxu))
(INS_sh1add <= ins && ins <= INS_sh3add_uw) || (INS_rol <= ins && ins <= INS_maxu))
{
#ifdef DEBUG
switch (ins)
Expand Down Expand Up @@ -915,6 +915,14 @@ void emitter::emitIns_R_R_R(
case INS_amomaxu_w:
case INS_amomaxu_d:

case INS_sh1add:
case INS_sh2add:
case INS_sh3add:
case INS_add_uw:
case INS_sh1add_uw:
case INS_sh2add_uw:
case INS_sh3add_uw:

case INS_rol:
case INS_rolw:
case INS_ror:
Expand Down Expand Up @@ -3970,28 +3978,36 @@ void emitter::emitDispInsName(
emitDispImmediate(imm12, !willPrintLoadImmValue);
}
return;
case 0x1:
case 0x1: // SLLIW, SLLI.UW, CLZW, CTZW, & CPOPW
{
static constexpr unsigned kSlliwFunct7 = 0b0000000;
static constexpr unsigned kSlliUwFunct6 = 0b000010;

unsigned funct7 = (imm12 >> 5) & 0x7f;
unsigned shamt = imm12 & 0x1f; // 5 BITS for SHAMT in RISCV64
switch (funct7)
unsigned funct6 = (imm12 >> 6) & 0x3f;
// SLLIW's instruction code's upper 7 bits have to be equal to zero
if (funct7 == kSlliwFunct7)
{
case 0b0110000:
{
static const char* names[] = {"clzw ", "ctzw ", "cpopw"};
// shift amount is treated as funct additional opcode bits
if (shamt >= ARRAY_SIZE(names))
return emitDispIllegalInstruction(code);

printf("%s %s, %s\n", names[shamt], rd, rs1);
return;
}
case 0b0000000:
printf("slliw %s, %s, %d\n", rd, rs1, shamt);
return;

default:
printf("slliw %s, %s, %d\n", rd, rs1, imm12 & 0x1f); // 5 BITS for SHAMT in RISCV64
}
// SLLI.UW's instruction code's upper 6 bits have to be equal to 0b000010
else if (funct6 == kSlliUwFunct6)
{
printf("slli.uw %s, %s, %d\n", rd, rs1, imm12 & 0x3f); // 6 BITS for SHAMT in RISCV64
}
else if (funct7 == 0b0110000)
{
static const char* names[] = {"clzw ", "ctzw ", "cpopw"};
// shift amount is treated as funct additional opcode bits
unsigned shamt = imm12 & 0x1f; // 5 BITS for SHAMT in RISCV64
if (shamt >= ARRAY_SIZE(names))
return emitDispIllegalInstruction(code);

printf("%s %s, %s\n", names[shamt], rd, rs1);
}
else
{
emitDispIllegalInstruction(code);
}
}
return;
Expand Down Expand Up @@ -4113,6 +4129,20 @@ void emitter::emitDispInsName(
return emitDispIllegalInstruction(code);
}
return;
case 0b0010000:
switch (opcode3)
{
case 0x2: // SH1ADD
printf("sh1add %s, %s, %s\n", rd, rs1, rs2);
return;
case 0x4: // SH2ADD
printf("sh2add %s, %s, %s\n", rd, rs1, rs2);
return;
case 0x6: // SH3ADD
printf("sh3add %s, %s, %s\n", rd, rs1, rs2);
return;
}
return;
case 0b0110000:
switch (opcode3)
{
Expand Down Expand Up @@ -4201,6 +4231,22 @@ void emitter::emitDispInsName(
return emitDispIllegalInstruction(code);
}
return;
case 0b0010000:
switch (opcode3)
{
case 0x2: // SH1ADD.UW
printf("sh1add.uw %s, %s, %s\n", rd, rs1, rs2);
return;
case 0x4: // SH2ADD.UW
printf("sh2add.uw %s, %s, %s\n", rd, rs1, rs2);
return;
case 0x6: // SH3ADD.UW
printf("sh3add.uw %s, %s, %s\n", rd, rs1, rs2);
return;
default:
return emitDispIllegalInstruction(code);
}
return;
case 0b0110000:
switch (opcode3)
{
Expand All @@ -4215,12 +4261,28 @@ void emitter::emitDispInsName(
}
return;
case 0b0000100:
// Currently only zext.h for this opcode2.
// Note: zext.h is encoded as a pseudo for 'packw rd, rs1, zero' which is not in Zbb.
if (opcode3 != 0b100 || rs2Num != REG_ZERO)
return emitDispIllegalInstruction(code);
switch (opcode3)
{
case 0b000: // ZEXT.W & ADD.UW
if (rs2Num == REG_ZERO)
{
printf("zext.w %s, %s\n", rd, rs1);
}
else
{
printf("add.uw %s, %s, %s\n", rd, rs1, rs2);
}
return;
case 0b100: // ZEXT.H
// Note: zext.h is encoded as a pseudo for 'packw rd, rs1, zero' which is not in Zbb.
if (rs2Num != REG_ZERO)
return emitDispIllegalInstruction(code);

printf("zext.h %s, %s\n", rd, rs1);
printf("zext.h %s, %s\n", rd, rs1);
return;
default:
return emitDispIllegalInstruction(code);
}
return;

default:
Expand Down
4 changes: 3 additions & 1 deletion src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6526,6 +6526,9 @@ unsigned GenTree::GetScaledIndex()
case GT_MUL:
return AsOp()->gtOp2->GetScaleIndexMul();

#ifdef TARGET_RISCV64
case GT_SLLI_UW:
#endif
case GT_LSH:
return AsOp()->gtOp2->GetScaleIndexShf();

Expand Down Expand Up @@ -12839,7 +12842,6 @@ void Compiler::gtDispTree(GenTree* tree,
InsCflagsToString(tree->AsCCMP()->gtFlagsVal));
}
#endif

gtDispCommonEndLine(tree);

if (!topOnly)
Expand Down
Loading
Loading