-
Notifications
You must be signed in to change notification settings - Fork 5.1k
Fix genPutArgStkFieldList for SIMD12 #88920
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Tagging subscribers to this area: @JulieLeeMSFT, @jakobbotsch |
This was referenced Jul 14, 2023
jakobbotsch
reviewed
Jul 14, 2023
I verified that this fixes #88842 as well, diff: @@ -674,27 +674,31 @@ G_M13415_IG07:
;; size=10 bbWeight=1595.95 PerfScore 1462.95
G_M13415_IG08:
test edx, edx
- je SHORT G_M13415_IG11
- ;; size=4 bbWeight=99.75 PerfScore 124.68
+ je G_M13415_IG11
+ ;; size=8 bbWeight=99.75 PerfScore 124.68
G_M13415_IG09:
mov gword ptr [rbp-78H], rax
mov dword ptr [rbp-70H], edi
vmovaps xmmword ptr [rbp+C0H], xmm4
vmovups xmmword ptr [rbp+CCH], xmm2
cmp byte ptr [rbx], bl
- vmovups xmmword ptr [rsp], xmm4
- vmovups xmmword ptr [rsp+0CH], xmm2
+ vmovsd qword ptr [rsp], xmm4
+ vextractps dword ptr [rsp+08H], xmm4, 2
+ vmovsd qword ptr [rsp+0CH], xmm2
+ vextractps dword ptr [rsp+14H], xmm2, 2
mov rdi, rbx
call [System.Collections.Generic.Dictionary`2[MartinCostello.AdventOfCode.Puzzles.Y2021.Day22+Cuboid,long]:FindValue(MartinCostello.AdventOfCode.Puzzles.Y2021.Day22+Cuboid):byref:this]
test rax, rax
je SHORT G_M13415_IG12
- ;; size=50 bbWeight=36.60 PerfScore 494.10
+ ;; size=66 bbWeight=36.60 PerfScore 713.70
G_M13415_IG10:
mov rsi, qword ptr [rax]
vmovaps xmm4, xmmword ptr [rbp+C0H]
- vmovups xmmword ptr [rsp], xmm4
+ vmovsd qword ptr [rsp], xmm4
+ vextractps dword ptr [rsp+08H], xmm4, 2
vmovups xmm2, xmmword ptr [rbp+CCH]
- vmovups xmmword ptr [rsp+0CH], xmm2
+ vmovsd qword ptr [rsp+0CH], xmm2
+ vextractps dword ptr [rsp+14H], xmm2, 2
sub rsi, r13
mov rdi, rbx
mov edx, 1
@@ -705,7 +709,7 @@ G_M13415_IG10:
vmovups xmm0, xmmword ptr [rbp-58H]
vmovups xmm1, xmmword ptr [rbp-68H]
jmp SHORT G_M13415_IG13
- ;; size=69 bbWeight=585.60 PerfScore 14493.55
+ ;; size=85 bbWeight=585.60 PerfScore 18007.13
G_M13415_IG11:
mov ecx, dword ptr [rbp-6CH]
vmovups xmm0, xmmword ptr [rbp-58H]
@@ -714,9 +718,11 @@ G_M13415_IG11:
;; size=15 bbWeight=49.87 PerfScore 448.86
G_M13415_IG12:
vmovaps xmm4, xmmword ptr [rbp+C0H]
- vmovups xmmword ptr [rsp], xmm4
+ vmovsd qword ptr [rsp], xmm4
+ vextractps dword ptr [rsp+08H], xmm4, 2
vmovups xmm2, xmmword ptr [rbp+CCH]
- vmovups xmmword ptr [rsp+0CH], xmm2
+ vmovsd qword ptr [rsp+0CH], xmm2
+ vextractps dword ptr [rsp+14H], xmm2, 2
mov rsi, r13
neg rsi
mov rdi, rbx
@@ -727,7 +733,7 @@ G_M13415_IG12:
mov edi, dword ptr [rbp-70H]
vmovups xmm0, xmmword ptr [rbp-58H]
vmovups xmm1, xmmword ptr [rbp-68H]
- ;; size=67 bbWeight=585.60 PerfScore 12297.56
+ ;; size=83 bbWeight=585.60 PerfScore 15811.14
G_M13415_IG13:
mov r13d, 1
mov dword ptr [rbp-6CH], ecx
@@ -769,13 +775,15 @@ G_M13415_IG18:
vmovups xmmword ptr [rbp-38H], xmm0
vmovups xmmword ptr [rbp-48H], xmm1
cmp byte ptr [rbx], bl
- vmovups xmmword ptr [rsp], xmm0
- vmovups xmmword ptr [rsp+0CH], xmm1
+ vmovsd qword ptr [rsp], xmm0
+ vextractps dword ptr [rsp+08H], xmm0, 2
+ vmovsd qword ptr [rsp+0CH], xmm1
+ vextractps dword ptr [rsp+14H], xmm1, 2
mov rdi, rbx
call [System.Collections.Generic.Dictionary`2[MartinCostello.AdventOfCode.Puzzles.Y2021.Day22+Cuboid,long]:FindValue(MartinCostello.AdventOfCode.Puzzles.Y2021.Day22+Cuboid):byref:this]
test rax, rax
je G_M13415_IG25
- ;; size=41 bbWeight=0.19 PerfScore 2.15
+ ;; size=57 bbWeight=0.19 PerfScore 3.28
G_M13415_IG19:
mov rsi, qword ptr [rax]
jmp G_M13415_IG24
@@ -801,47 +809,55 @@ G_M13415_IG22:
vmovups xmmword ptr [rbp+A8H], xmm2
vmovups xmmword ptr [rbp+B4H], xmm3
mov qword ptr [rbp+A0H], rdx
- vmovups xmmword ptr [rsp], xmm2
- vmovups xmmword ptr [rsp+0CH], xmm3
+ vmovsd qword ptr [rsp], xmm2
+ vextractps dword ptr [rsp+08H], xmm2, 2
+ vmovsd qword ptr [rsp+0CH], xmm3
+ vextractps dword ptr [rsp+14H], xmm3, 2
mov rdi, r15
call [System.Collections.Generic.Dictionary`2[MartinCostello.AdventOfCode.Puzzles.Y2021.Day22+Cuboid,long]:FindValue(MartinCostello.AdventOfCode.Puzzles.Y2021.Day22+Cuboid):byref:this]
test rax, rax
je G_M13415_IG33
- ;; size=52 bbWeight=20.63 PerfScore 196.03
+ ;; size=68 bbWeight=20.63 PerfScore 319.83
G_M13415_IG23:
mov rsi, qword ptr [rax]
vmovups xmm2, xmmword ptr [rbp+A8H]
- vmovups xmmword ptr [rsp], xmm2
+ vmovsd qword ptr [rsp], xmm2
+ vextractps dword ptr [rsp+08H], xmm2, 2
vmovups xmm3, xmmword ptr [rbp+B4H]
- vmovups xmmword ptr [rsp+0CH], xmm3
+ vmovsd qword ptr [rsp+0CH], xmm3
+ vextractps dword ptr [rsp+14H], xmm3, 2
add rsi, qword ptr [rbp+A0H]
mov rdi, r15
mov edx, 1
call [System.Collections.Generic.Dictionary`2[MartinCostello.AdventOfCode.Puzzles.Y2021.Day22+Cuboid,long]:TryInsert(MartinCostello.AdventOfCode.Puzzles.Y2021.Day22+Cuboid,long,ubyte):bool:this]
mov eax, dword ptr [rbp-70H]
jmp G_M13415_IG34
- ;; size=59 bbWeight=330.15 PerfScore 6107.73
+ ;; size=75 bbWeight=330.15 PerfScore 8088.61
G_M13415_IG24:
vmovups xmm1, xmmword ptr [rbp-38H]
- vmovups xmmword ptr [rsp], xmm1
+ vmovsd qword ptr [rsp], xmm1
+ vextractps dword ptr [rsp+08H], xmm1, 2
vmovups xmm2, xmmword ptr [rbp-48H]
- vmovups xmmword ptr [rsp+0CH], xmm2
+ vmovsd qword ptr [rsp+0CH], xmm2
+ vextractps dword ptr [rsp+14H], xmm2, 2
inc rsi
mov rdi, rbx
mov edx, 1
call [System.Collections.Generic.Dictionary`2[MartinCostello.AdventOfCode.Puzzles.Y2021.Day22+Cuboid,long]:TryInsert(MartinCostello.AdventOfCode.Puzzles.Y2021.Day22+Cuboid,long,ubyte):bool:this]
jmp SHORT G_M13415_IG26
- ;; size=40 bbWeight=0.19 PerfScore 2.57
+ ;; size=56 bbWeight=0.19 PerfScore 3.70
G_M13415_IG25:
vmovups xmm1, xmmword ptr [rbp-38H]
- vmovups xmmword ptr [rsp], xmm1
+ vmovsd qword ptr [rsp], xmm1
+ vextractps dword ptr [rsp+08H], xmm1, 2
vmovups xmm2, xmmword ptr [rbp-48H]
- vmovups xmmword ptr [rsp+0CH], xmm2
+ vmovsd qword ptr [rsp+0CH], xmm2
+ vextractps dword ptr [rsp+14H], xmm2, 2
mov rdi, rbx
mov esi, 1
mov edx, 1
call [System.Collections.Generic.Dictionary`2[MartinCostello.AdventOfCode.Puzzles.Y2021.Day22+Cuboid,long]:TryInsert(MartinCostello.AdventOfCode.Puzzles.Y2021.Day22+Cuboid,long,ubyte):bool:this]
- ;; size=40 bbWeight=0.75 PerfScore 8.80
+ ;; size=56 bbWeight=0.75 PerfScore 13.29
G_M13415_IG26:
mov edi, dword ptr [rbx+38H]
sub edi, dword ptr [rbx+40H]
@@ -893,15 +909,17 @@ G_M13415_IG32:
;; size=22 bbWeight=0.23 PerfScore 1.33
G_M13415_IG33:
vmovups xmm2, xmmword ptr [rbp+A8H]
- vmovups xmmword ptr [rsp], xmm2
+ vmovsd qword ptr [rsp], xmm2
+ vextractps dword ptr [rsp+08H], xmm2, 2
vmovups xmm3, xmmword ptr [rbp+B4H]
- vmovups xmmword ptr [rsp+0CH], xmm3
+ vmovsd qword ptr [rsp+0CH], xmm3
+ vextractps dword ptr [rsp+14H], xmm3, 2
mov rdi, r15
mov rsi, qword ptr [rbp+A0H]
mov edx, 1
call [System.Collections.Generic.Dictionary`2[MartinCostello.AdventOfCode.Puzzles.Y2021.Day22+Cuboid,long]:TryInsert(MartinCostello.AdventOfCode.Puzzles.Y2021.Day22+Cuboid,long,ubyte):bool:this]
mov eax, dword ptr [rbp-70H]
- ;; size=51 bbWeight=330.15 PerfScore 4456.99
+ ;; size=67 bbWeight=330.15 PerfScore 6437.88
G_M13415_IG34:
cmp r14d, dword ptr [rbx+44H]
jne G_M13415_IG47
@@ -1031,6 +1049,11 @@ G_M13415_IG48:
int3
;; size=6 bbWeight=0 PerfScore 0.00
-; Total bytes of code 1573, prolog size 84, PerfScore 71319.51, instruction count 350, allocated bytes for code 1573 (MethodHash=e316cb98) for method MartinCostello.AdventOfCode.Puzzles.Y2021.Day22:<Reboot>g__Reboot|8_1(System.Collections.Generic.List`1[System.ValueTuple`2[MartinCostello.AdventOfCode.Puzzles.Y2021.Day22+Cuboid,bool]]):long (Tier1-OSR)
+; Total bytes of code 1721, prolog size 84, PerfScore 82673.40, instruction count 368, allocated bytes for code 1721 (MethodHash=e316cb98) for method MartinCostello.AdventOfCode.Puzzles.Y2021.Day22:<Reboot>g__Reboot|8_1(System.Collections.Generic.List`1[System.ValueTuple`2[MartinCostello.AdventOfCode.Puzzles.Y2021.Day22+Cuboid,bool]]):long (Tier1-OSR) |
jakobbotsch
reviewed
Jul 15, 2023
jakobbotsch
approved these changes
Jul 15, 2023
This was referenced Jul 15, 2023
SPMI is failing everywhere, real diffs were +24 few cases, e.g.: @@ -163,7 +163,9 @@ G_M13885_IG02: ; bbWeight=1, gcrefRegs=380000 {x19 x20 x21}, byrefRegs=00
fmul v0.4s, v1.4s, v0.s[0]
fsub v0.4s, v16.4s, v0.4s
str q0, [fp, #0x20] // [V50 tmp41]
- str q0, [sp] // [V09 OutArgs]
+ str d0, [sp] // [V09 OutArgs]
+ mov w1, v0.s[2]
+ str w1, [sp, #0x08] // [V09 OutArgs+0x08]
ldp s0, s1, [fp, #0xA0] // [V24 tmp15], [V24 tmp15+0x04]
ldr s2, [fp, #0xA8] // [V24 tmp15+0x08]
ldp s3, s4, [fp, #0xC0] // [V06 loc2], [V06 loc2+0x04]
@@ -189,7 +191,7 @@ G_M13885_IG02: ; bbWeight=1, gcrefRegs=380000 {x19 x20 x21}, byrefRegs=00
cmp w22, #5
mov v8.d[1], v9.d[0]
blt G_M13885_IG05
- ;; size=248 bbWeight=1 PerfScore 109.00
+ ;; size=256 bbWeight=1 PerfScore 111.00 Because previously we only ran that logic on osx-arm64 |
Sign up for free
to subscribe to this conversation on GitHub.
Already have an account?
Sign in.
Labels
area-CodeGen-coreclr
CLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMI
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Fixes #79118
Fixes #88842