Skip to content

Implement SVE AddCarryWidening (Upper/Lower) #116429

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Jun 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2685,6 +2685,15 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
break;
}

case NI_Sve2_AddCarryWideningLower:
case NI_Sve2_AddCarryWideningUpper:
if (targetReg != op3Reg)
{
GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op3Reg, /* canSkip */ true);
}
GetEmitter()->emitInsSve_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt);
break;

case NI_Sve2_BitwiseClearXor:
case NI_Sve2_Xor:
if (targetReg != op1Reg)
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,8 @@ HARDWARE_INTRINSIC(Sve2, AbsoluteDifferenceAddWideningLower,
HARDWARE_INTRINSIC(Sve2, AbsoluteDifferenceAddWideningUpper, -1, 3, {INS_invalid, INS_invalid, INS_sve_sabalt, INS_sve_uabalt, INS_sve_sabalt, INS_sve_uabalt, INS_sve_sabalt, INS_sve_uabalt, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve2, AbsoluteDifferenceWideningLower, -1, 2, {INS_invalid, INS_invalid, INS_sve_sabdlb, INS_sve_uabdlb, INS_sve_sabdlb, INS_sve_uabdlb, INS_sve_sabdlb, INS_sve_uabdlb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable)
HARDWARE_INTRINSIC(Sve2, AbsoluteDifferenceWideningUpper, -1, 2, {INS_invalid, INS_invalid, INS_sve_sabdlt, INS_sve_uabdlt, INS_sve_sabdlt, INS_sve_uabdlt, INS_sve_sabdlt, INS_sve_uabdlt, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable)
HARDWARE_INTRINSIC(Sve2, AddCarryWideningLower, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_adclb, INS_invalid, INS_sve_adclb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve2, AddCarryWideningUpper, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_adclt, INS_invalid, INS_sve_adclt, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve2, BitwiseClearXor, -1, 3, {INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve2, BitwiseSelect, -1, 3, {INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve2, BitwiseSelectLeftInverted, -1, 3, {INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics)
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/lsra.h
Original file line number Diff line number Diff line change
Expand Up @@ -1935,6 +1935,7 @@ class LinearScan : public LinearScanInterface
// 'tgtPrefUse' to that RefPosition.
RefPosition* tgtPrefUse = nullptr;
RefPosition* tgtPrefUse2 = nullptr;
RefPosition* tgtPrefUse3 = nullptr;

public:
// The following keep track of information about internal (temporary register) intervals
Expand All @@ -1957,6 +1958,7 @@ class LinearScan : public LinearScanInterface
{
tgtPrefUse = nullptr;
tgtPrefUse2 = nullptr;
tgtPrefUse3 = nullptr;
internalCount = 0;
setInternalRegsDelayFree = false;
pendingDelayFree = false;
Expand Down
20 changes: 19 additions & 1 deletion src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1464,15 +1464,25 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
{
assert(tgtPrefUse == nullptr);
assert(tgtPrefUse2 == nullptr);
assert(tgtPrefUse3 == nullptr);
tgtPrefUse = delayUse;
}
else
else if (opNum == 2)
{
assert(opNum == 2);
assert(tgtPrefUse == nullptr);
assert(tgtPrefUse2 == nullptr);
assert(tgtPrefUse3 == nullptr);
tgtPrefUse2 = delayUse;
}
else
{
assert(opNum == 3);
assert(tgtPrefUse == nullptr);
assert(tgtPrefUse2 == nullptr);
assert(tgtPrefUse3 == nullptr);
tgtPrefUse3 = delayUse;
}
}
}
else if (containedCselOp == operand)
Expand Down Expand Up @@ -2292,6 +2302,14 @@ GenTree* LinearScan::getDelayFreeOperand(GenTreeHWIntrinsic* intrinsicTree, bool
assert(delayFreeOp != nullptr);
break;

case NI_Sve2_AddCarryWideningLower:
case NI_Sve2_AddCarryWideningUpper:
// RMW operates on the third op.
assert(isRMW);
delayFreeOp = intrinsicTree->Op(3);
assert(delayFreeOp != nullptr);
break;

default:
if (isRMW)
{
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/lsrabuild.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3062,6 +3062,7 @@ RefPosition* LinearScan::BuildDef(GenTree* tree, SingleTypeRegSet dstCandidates,
#ifndef TARGET_ARM
setTgtPref(interval, tgtPrefUse);
setTgtPref(interval, tgtPrefUse2);
setTgtPref(interval, tgtPrefUse3);
#endif // !TARGET_ARM

#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,34 @@ internal Arm64() { }
/// </summary>
public static Vector<ulong> AbsoluteDifferenceWideningUpper(Vector<uint> left, Vector<uint> right) { throw new PlatformNotSupportedException(); }

// Add with carry long (bottom)

/// <summary>
/// svuint32_t svadclb[_u32](svuint32_t op1, svuint32_t op2, svuint32_t op3)
/// ADCLB Ztied1.S, Zop2.S, Zop3.S
/// </summary>
public static unsafe Vector<uint> AddCarryWideningLower(Vector<uint> op1, Vector<uint> op2, Vector<uint> op3) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint64_t svadclb[_u64](svuint64_t op1, svuint64_t op2, svuint64_t op3)
/// ADCLB Ztied1.D, Zop2.D, Zop3.D
/// </summary>
public static unsafe Vector<ulong> AddCarryWideningLower(Vector<ulong> op1, Vector<ulong> op2, Vector<ulong> op3) { throw new PlatformNotSupportedException(); }

// Add with carry long (top)

/// <summary>
/// svuint32_t svadclt[_u32](svuint32_t op1, svuint32_t op2, svuint32_t op3)
/// ADCLT Ztied1.S, Zop2.S, Zop3.S
/// </summary>
public static unsafe Vector<uint> AddCarryWideningUpper(Vector<uint> op1, Vector<uint> op2, Vector<uint> op3) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint64_t svadclt[_u64](svuint64_t op1, svuint64_t op2, svuint64_t op3)
/// ADCLT Ztied1.D, Zop2.D, Zop3.D
/// </summary>
public static unsafe Vector<ulong> AddCarryWideningUpper(Vector<ulong> op1, Vector<ulong> op2, Vector<ulong> op3) { throw new PlatformNotSupportedException(); }

// Bitwise clear and exclusive OR

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,34 @@ internal Arm64() { }
/// </summary>
public static Vector<ulong> AbsoluteDifferenceWideningUpper(Vector<uint> left, Vector<uint> right) => AbsoluteDifferenceWideningUpper(left, right);

// Add with carry long (bottom)

/// <summary>
/// svuint32_t svadclb[_u32](svuint32_t op1, svuint32_t op2, svuint32_t op3)
/// ADCLB Ztied1.S, Zop2.S, Zop3.S
/// </summary>
public static unsafe Vector<uint> AddCarryWideningLower(Vector<uint> op1, Vector<uint> op2, Vector<uint> op3) => AddCarryWideningLower(op1, op2, op3);

/// <summary>
/// svuint64_t svadclb[_u64](svuint64_t op1, svuint64_t op2, svuint64_t op3)
/// ADCLB Ztied1.D, Zop2.D, Zop3.D
/// </summary>
public static unsafe Vector<ulong> AddCarryWideningLower(Vector<ulong> op1, Vector<ulong> op2, Vector<ulong> op3) => AddCarryWideningLower(op1, op2, op3);

// Add with carry long (top)

/// <summary>
/// svuint32_t svadclt[_u32](svuint32_t op1, svuint32_t op2, svuint32_t op3)
/// ADCLT Ztied1.S, Zop2.S, Zop3.S
/// </summary>
public static unsafe Vector<uint> AddCarryWideningUpper(Vector<uint> op1, Vector<uint> op2, Vector<uint> op3) => AddCarryWideningUpper(op1, op2, op3);

/// <summary>
/// svuint64_t svadclt[_u64](svuint64_t op1, svuint64_t op2, svuint64_t op3)
/// ADCLT Ztied1.D, Zop2.D, Zop3.D
/// </summary>
public static unsafe Vector<ulong> AddCarryWideningUpper(Vector<ulong> op1, Vector<ulong> op2, Vector<ulong> op3) => AddCarryWideningUpper(op1, op2, op3);

// Bitwise clear and exclusive OR

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6116,6 +6116,10 @@ internal Arm64() { }
public static System.Numerics.Vector<uint> AbsoluteDifferenceWideningUpper(System.Numerics.Vector<ushort> left, System.Numerics.Vector<ushort> right) { throw null; }
public static System.Numerics.Vector<ulong> AbsoluteDifferenceWideningUpper(System.Numerics.Vector<uint> left, System.Numerics.Vector<uint> right) { throw null; }

public static System.Numerics.Vector<uint> AddCarryWideningLower(System.Numerics.Vector<uint> op1, System.Numerics.Vector<uint> op2, System.Numerics.Vector<uint> op3) { throw null; }
public static System.Numerics.Vector<ulong> AddCarryWideningLower(System.Numerics.Vector<ulong> op1, System.Numerics.Vector<ulong> op2, System.Numerics.Vector<ulong> op3) { throw null; }
public static System.Numerics.Vector<uint> AddCarryWideningUpper(System.Numerics.Vector<uint> op1, System.Numerics.Vector<uint> op2, System.Numerics.Vector<uint> op3) { throw null; }
public static System.Numerics.Vector<ulong> AddCarryWideningUpper(System.Numerics.Vector<ulong> op1, System.Numerics.Vector<ulong> op2, System.Numerics.Vector<ulong> op3) { throw null; }
public static System.Numerics.Vector<byte> BitwiseClearXor(System.Numerics.Vector<byte> xor, System.Numerics.Vector<byte> value, System.Numerics.Vector<byte> mask) { throw null; }
public static System.Numerics.Vector<short> BitwiseClearXor(System.Numerics.Vector<short> xor, System.Numerics.Vector<short> value, System.Numerics.Vector<short> mask) { throw null; }
public static System.Numerics.Vector<int> BitwiseClearXor(System.Numerics.Vector<int> xor, System.Numerics.Vector<int> value, System.Numerics.Vector<int> mask) { throw null; }
Expand Down
Loading
Loading