-
Notifications
You must be signed in to change notification settings - Fork 5.1k
ARM64-SVE: Add Not, InsertIntoShiftedVector #103725
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2365,6 +2365,79 @@ internal Arm64() { } | |
public static unsafe ulong GetActiveElementCount(Vector<ulong> mask, Vector<ulong> from) { throw new PlatformNotSupportedException(); } | ||
|
||
|
||
/// Insert scalar into shifted vector | ||
|
||
/// <summary> | ||
/// svuint8_t svinsr[_n_u8](svuint8_t op1, uint8_t op2) | ||
/// INSR Ztied1.B, Wop2 | ||
/// INSR Ztied1.B, Bop2 | ||
/// </summary> | ||
public static unsafe Vector<byte> InsertIntoShiftedVector(Vector<byte> left, byte right) { throw new PlatformNotSupportedException(); } | ||
|
||
/// <summary> | ||
/// svfloat64_t svinsr[_n_f64](svfloat64_t op1, float64_t op2) | ||
/// INSR Ztied1.D, Xop2 | ||
/// INSR Ztied1.D, Dop2 | ||
/// </summary> | ||
public static unsafe Vector<double> InsertIntoShiftedVector(Vector<double> left, double right) { throw new PlatformNotSupportedException(); } | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This API, as per the docs is implementing both INSR - SIMD&FP and INSR - scalar, but it is not clear to me, how we decide which one to pick. @a74nh - any idea? |
||
|
||
/// <summary> | ||
/// svint16_t svinsr[_n_s16](svint16_t op1, int16_t op2) | ||
/// INSR Ztied1.H, Wop2 | ||
/// INSR Ztied1.H, Hop2 | ||
/// </summary> | ||
public static unsafe Vector<short> InsertIntoShiftedVector(Vector<short> left, short right) { throw new PlatformNotSupportedException(); } | ||
|
||
/// <summary> | ||
/// svint32_t svinsr[_n_s32](svint32_t op1, int32_t op2) | ||
/// INSR Ztied1.S, Wop2 | ||
/// INSR Ztied1.S, Sop2 | ||
/// </summary> | ||
public static unsafe Vector<int> InsertIntoShiftedVector(Vector<int> left, int right) { throw new PlatformNotSupportedException(); } | ||
|
||
/// <summary> | ||
/// svint64_t svinsr[_n_s64](svint64_t op1, int64_t op2) | ||
/// INSR Ztied1.D, Xop2 | ||
/// INSR Ztied1.D, Dop2 | ||
/// </summary> | ||
public static unsafe Vector<long> InsertIntoShiftedVector(Vector<long> left, long right) { throw new PlatformNotSupportedException(); } | ||
|
||
/// <summary> | ||
/// svint8_t svinsr[_n_s8](svint8_t op1, int8_t op2) | ||
/// INSR Ztied1.B, Wop2 | ||
/// INSR Ztied1.B, Bop2 | ||
/// </summary> | ||
public static unsafe Vector<sbyte> InsertIntoShiftedVector(Vector<sbyte> left, sbyte right) { throw new PlatformNotSupportedException(); } | ||
|
||
/// <summary> | ||
/// svfloat32_t svinsr[_n_f32](svfloat32_t op1, float32_t op2) | ||
/// INSR Ztied1.S, Wop2 | ||
/// INSR Ztied1.S, Sop2 | ||
/// </summary> | ||
public static unsafe Vector<float> InsertIntoShiftedVector(Vector<float> left, float right) { throw new PlatformNotSupportedException(); } | ||
|
||
/// <summary> | ||
/// svuint16_t svinsr[_n_u16](svuint16_t op1, uint16_t op2) | ||
/// INSR Ztied1.H, Wop2 | ||
/// INSR Ztied1.H, Hop2 | ||
/// </summary> | ||
public static unsafe Vector<ushort> InsertIntoShiftedVector(Vector<ushort> left, ushort right) { throw new PlatformNotSupportedException(); } | ||
|
||
/// <summary> | ||
/// svuint32_t svinsr[_n_u32](svuint32_t op1, uint32_t op2) | ||
/// INSR Ztied1.S, Wop2 | ||
/// INSR Ztied1.S, Sop2 | ||
/// </summary> | ||
public static unsafe Vector<uint> InsertIntoShiftedVector(Vector<uint> left, uint right) { throw new PlatformNotSupportedException(); } | ||
|
||
/// <summary> | ||
/// svuint64_t svinsr[_n_u64](svuint64_t op1, uint64_t op2) | ||
/// INSR Ztied1.D, Xop2 | ||
/// INSR Ztied1.D, Dop2 | ||
/// </summary> | ||
public static unsafe Vector<ulong> InsertIntoShiftedVector(Vector<ulong> left, ulong right) { throw new PlatformNotSupportedException(); } | ||
|
||
|
||
/// Count leading sign bits | ||
|
||
/// <summary> | ||
|
@@ -4034,6 +4107,96 @@ internal Arm64() { } | |
/// </summary> | ||
public static unsafe Vector<float> Negate(Vector<float> value) { throw new PlatformNotSupportedException(); } | ||
|
||
/// Bitwise invert | ||
|
||
/// <summary> | ||
/// svuint8_t svnot[_u8]_m(svuint8_t inactive, svbool_t pg, svuint8_t op) | ||
/// NOT Ztied.B, Pg/M, Zop.B | ||
/// svuint8_t svnot[_u8]_x(svbool_t pg, svuint8_t op) | ||
/// NOT Ztied.B, Pg/M, Ztied.B | ||
/// svuint8_t svnot[_u8]_z(svbool_t pg, svuint8_t op) | ||
/// svbool_t svnot[_b]_z(svbool_t pg, svbool_t op) | ||
/// EOR Presult.B, Pg/Z, Pop.B, Pg.B | ||
/// </summary> | ||
public static unsafe Vector<byte> Not(Vector<byte> value) { throw new PlatformNotSupportedException(); } | ||
|
||
/// <summary> | ||
/// svint16_t svnot[_s16]_m(svint16_t inactive, svbool_t pg, svint16_t op) | ||
/// NOT Ztied.H, Pg/M, Zop.H | ||
/// svint16_t svnot[_s16]_x(svbool_t pg, svint16_t op) | ||
/// NOT Ztied.H, Pg/M, Ztied.H | ||
/// svint16_t svnot[_s16]_z(svbool_t pg, svint16_t op) | ||
/// svbool_t svnot[_b]_z(svbool_t pg, svbool_t op) | ||
/// EOR Presult.B, Pg/Z, Pop.B, Pg.B | ||
/// </summary> | ||
public static unsafe Vector<short> Not(Vector<short> value) { throw new PlatformNotSupportedException(); } | ||
|
||
/// <summary> | ||
/// svint32_t svnot[_s32]_m(svint32_t inactive, svbool_t pg, svint32_t op) | ||
/// NOT Ztied.S, Pg/M, Zop.S | ||
/// svint32_t svnot[_s32]_x(svbool_t pg, svint32_t op) | ||
/// NOT Ztied.S, Pg/M, Ztied.S | ||
/// svint32_t svnot[_s32]_z(svbool_t pg, svint32_t op) | ||
/// svbool_t svnot[_b]_z(svbool_t pg, svbool_t op) | ||
/// EOR Presult.B, Pg/Z, Pop.B, Pg.B | ||
/// </summary> | ||
public static unsafe Vector<int> Not(Vector<int> value) { throw new PlatformNotSupportedException(); } | ||
|
||
/// <summary> | ||
/// svint64_t svnot[_s64]_m(svint64_t inactive, svbool_t pg, svint64_t op) | ||
/// NOT Ztied.D, Pg/M, Zop.D | ||
/// svint64_t svnot[_s64]_x(svbool_t pg, svint64_t op) | ||
/// NOT Ztied.D, Pg/M, Ztied.D | ||
/// svint64_t svnot[_s64]_z(svbool_t pg, svint64_t op) | ||
/// svbool_t svnot[_b]_z(svbool_t pg, svbool_t op) | ||
/// EOR Presult.B, Pg/Z, Pop.B, Pg.B | ||
/// </summary> | ||
public static unsafe Vector<long> Not(Vector<long> value) { throw new PlatformNotSupportedException(); } | ||
|
||
/// <summary> | ||
/// svint8_t svnot[_s8]_m(svint8_t inactive, svbool_t pg, svint8_t op) | ||
/// NOT Ztied.B, Pg/M, Zop.B | ||
/// svint8_t svnot[_s8]_x(svbool_t pg, svint8_t op) | ||
/// NOT Ztied.B, Pg/M, Ztied.B | ||
/// svint8_t svnot[_s8]_z(svbool_t pg, svint8_t op) | ||
/// svbool_t svnot[_b]_z(svbool_t pg, svbool_t op) | ||
/// EOR Presult.B, Pg/Z, Pop.B, Pg.B | ||
/// </summary> | ||
public static unsafe Vector<sbyte> Not(Vector<sbyte> value) { throw new PlatformNotSupportedException(); } | ||
|
||
/// <summary> | ||
/// svuint16_t svnot[_u16]_m(svuint16_t inactive, svbool_t pg, svuint16_t op) | ||
/// NOT Ztied.H, Pg/M, Zop.H | ||
/// svuint16_t svnot[_u16]_x(svbool_t pg, svuint16_t op) | ||
/// NOT Ztied.H, Pg/M, Ztied.H | ||
/// svuint16_t svnot[_u16]_z(svbool_t pg, svuint16_t op) | ||
/// svbool_t svnot[_b]_z(svbool_t pg, svbool_t op) | ||
/// EOR Presult.B, Pg/Z, Pop.B, Pg.B | ||
/// </summary> | ||
public static unsafe Vector<ushort> Not(Vector<ushort> value) { throw new PlatformNotSupportedException(); } | ||
|
||
/// <summary> | ||
/// svuint32_t svnot[_u32]_m(svuint32_t inactive, svbool_t pg, svuint32_t op) | ||
/// NOT Ztied.S, Pg/M, Zop.S | ||
/// svuint32_t svnot[_u32]_x(svbool_t pg, svuint32_t op) | ||
/// NOT Ztied.S, Pg/M, Ztied.S | ||
/// svuint32_t svnot[_u32]_z(svbool_t pg, svuint32_t op) | ||
/// svbool_t svnot[_b]_z(svbool_t pg, svbool_t op) | ||
/// EOR Presult.B, Pg/Z, Pop.B, Pg.B | ||
/// </summary> | ||
public static unsafe Vector<uint> Not(Vector<uint> value) { throw new PlatformNotSupportedException(); } | ||
|
||
/// <summary> | ||
/// svuint64_t svnot[_u64]_m(svuint64_t inactive, svbool_t pg, svuint64_t op) | ||
/// NOT Ztied.D, Pg/M, Zop.D | ||
/// svuint64_t svnot[_u64]_x(svbool_t pg, svuint64_t op) | ||
/// NOT Ztied.D, Pg/M, Ztied.D | ||
/// svuint64_t svnot[_u64]_z(svbool_t pg, svuint64_t op) | ||
/// svbool_t svnot[_b]_z(svbool_t pg, svbool_t op) | ||
/// EOR Presult.B, Pg/Z, Pop.B, Pg.B | ||
/// </summary> | ||
public static unsafe Vector<ulong> Not(Vector<ulong> value) { throw new PlatformNotSupportedException(); } | ||
|
||
/// Or : Bitwise inclusive OR | ||
|
||
/// <summary> | ||
|
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this work with both
op1Reg
being gpr or SIMD/FP?Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think so, though I'm struggling to get the JIT to use a gpr for
op1Reg
under the stress modes -- I'm only ever getting the "easy" case, where whenop1Reg
andtargetReg
differ,op1Reg
is already a vector register, so we're moving from a vector reg to a vector reg. Since the first argument inSve.InsertIntoShiftedVector<T>
is of typeVector<T>
, we'd expectop1Reg
to always be a vector register, right? I could add an assert here clarify this. (Though looking atemitIns_Mov
, it does have a path for emittingmov
instructions from a gpr to a vector register.)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually I meant that for
op2Reg
. Sorry. So ideally, where you havedouble
orfloat
as 2nd argument, we should haveop2Reg
as SIMD/floating point and otherwise should be gpr. Can you verify that please? Forop1Reg
it should always be scalable register and we already assert that in emitter.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No worries, I've added an assert to check this. Stress tests for unoptimized and optimized tests are still passing.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you mind sharing small section of disassembly for both the categories?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not at all. Here's a snippet from the
double
tests:And from the
uint
tests: