Skip to content

Fix genPutArgStkFieldList for SIMD12 #88920

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jul 15, 2023
Merged

Conversation

EgorBo
Copy link
Member

@EgorBo EgorBo commented Jul 14, 2023

Fixes #79118
Fixes #88842

@ghost ghost added the area-CodeGen-coreclr CLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMI label Jul 14, 2023
@ghost ghost assigned EgorBo Jul 14, 2023
@ghost
Copy link

ghost commented Jul 14, 2023

Tagging subscribers to this area: @JulieLeeMSFT, @jakobbotsch
See info in area-owners.md if you want to be subscribed.

Issue Details

Fixes #88842
and, presumably, fixes #88842

Author: EgorBo
Assignees: -
Labels:

area-CodeGen-coreclr

Milestone: -

@filipnavara
Copy link
Member

filipnavara commented Jul 14, 2023

Fixes #88842 and, presumably, fixes #88842

Is this a joke on a.SequenceEqual(a) == false (aka issue #65292)?

Seriously though, I assume you meant to link two different issues.

@EgorBo
Copy link
Member Author

EgorBo commented Jul 14, 2023

Fixes #88842 and, presumably, fixes #88842

Is this a joke on a.SequenceEqual(a) == false (aka issue #65292)?

Seriously though, I assume you meant to link two different issues.

took me a while to understand what you mean 😄 yeah, fixed

@jakobbotsch
Copy link
Member

I verified that this fixes #88842 as well, diff:

@@ -674,27 +674,31 @@ G_M13415_IG07:
 						;; size=10 bbWeight=1595.95 PerfScore 1462.95
 G_M13415_IG08:
        test     edx, edx
-       je       SHORT G_M13415_IG11
-						;; size=4 bbWeight=99.75 PerfScore 124.68
+       je       G_M13415_IG11
+						;; size=8 bbWeight=99.75 PerfScore 124.68
 G_M13415_IG09:
        mov      gword ptr [rbp-78H], rax
        mov      dword ptr [rbp-70H], edi
        vmovaps  xmmword ptr [rbp+C0H], xmm4
        vmovups  xmmword ptr [rbp+CCH], xmm2
        cmp      byte  ptr [rbx], bl
-       vmovups  xmmword ptr [rsp], xmm4
-       vmovups  xmmword ptr [rsp+0CH], xmm2
+       vmovsd   qword ptr [rsp], xmm4
+       vextractps dword ptr [rsp+08H], xmm4, 2
+       vmovsd   qword ptr [rsp+0CH], xmm2
+       vextractps dword ptr [rsp+14H], xmm2, 2
        mov      rdi, rbx
        call     [System.Collections.Generic.Dictionary`2[MartinCostello.AdventOfCode.Puzzles.Y2021.Day22+Cuboid,long]:FindValue(MartinCostello.AdventOfCode.Puzzles.Y2021.Day22+Cuboid):byref:this]
        test     rax, rax
        je       SHORT G_M13415_IG12
-						;; size=50 bbWeight=36.60 PerfScore 494.10
+						;; size=66 bbWeight=36.60 PerfScore 713.70
 G_M13415_IG10:
        mov      rsi, qword ptr [rax]
        vmovaps  xmm4, xmmword ptr [rbp+C0H]
-       vmovups  xmmword ptr [rsp], xmm4
+       vmovsd   qword ptr [rsp], xmm4
+       vextractps dword ptr [rsp+08H], xmm4, 2
        vmovups  xmm2, xmmword ptr [rbp+CCH]
-       vmovups  xmmword ptr [rsp+0CH], xmm2
+       vmovsd   qword ptr [rsp+0CH], xmm2
+       vextractps dword ptr [rsp+14H], xmm2, 2
        sub      rsi, r13
        mov      rdi, rbx
        mov      edx, 1
@@ -705,7 +709,7 @@ G_M13415_IG10:
        vmovups  xmm0, xmmword ptr [rbp-58H]
        vmovups  xmm1, xmmword ptr [rbp-68H]
        jmp      SHORT G_M13415_IG13
-						;; size=69 bbWeight=585.60 PerfScore 14493.55
+						;; size=85 bbWeight=585.60 PerfScore 18007.13
 G_M13415_IG11:
        mov      ecx, dword ptr [rbp-6CH]
        vmovups  xmm0, xmmword ptr [rbp-58H]
@@ -714,9 +718,11 @@ G_M13415_IG11:
 						;; size=15 bbWeight=49.87 PerfScore 448.86
 G_M13415_IG12:
        vmovaps  xmm4, xmmword ptr [rbp+C0H]
-       vmovups  xmmword ptr [rsp], xmm4
+       vmovsd   qword ptr [rsp], xmm4
+       vextractps dword ptr [rsp+08H], xmm4, 2
        vmovups  xmm2, xmmword ptr [rbp+CCH]
-       vmovups  xmmword ptr [rsp+0CH], xmm2
+       vmovsd   qword ptr [rsp+0CH], xmm2
+       vextractps dword ptr [rsp+14H], xmm2, 2
        mov      rsi, r13
        neg      rsi
        mov      rdi, rbx
@@ -727,7 +733,7 @@ G_M13415_IG12:
        mov      edi, dword ptr [rbp-70H]
        vmovups  xmm0, xmmword ptr [rbp-58H]
        vmovups  xmm1, xmmword ptr [rbp-68H]
-						;; size=67 bbWeight=585.60 PerfScore 12297.56
+						;; size=83 bbWeight=585.60 PerfScore 15811.14
 G_M13415_IG13:
        mov      r13d, 1
        mov      dword ptr [rbp-6CH], ecx
@@ -769,13 +775,15 @@ G_M13415_IG18:
        vmovups  xmmword ptr [rbp-38H], xmm0
        vmovups  xmmword ptr [rbp-48H], xmm1
        cmp      byte  ptr [rbx], bl
-       vmovups  xmmword ptr [rsp], xmm0
-       vmovups  xmmword ptr [rsp+0CH], xmm1
+       vmovsd   qword ptr [rsp], xmm0
+       vextractps dword ptr [rsp+08H], xmm0, 2
+       vmovsd   qword ptr [rsp+0CH], xmm1
+       vextractps dword ptr [rsp+14H], xmm1, 2
        mov      rdi, rbx
        call     [System.Collections.Generic.Dictionary`2[MartinCostello.AdventOfCode.Puzzles.Y2021.Day22+Cuboid,long]:FindValue(MartinCostello.AdventOfCode.Puzzles.Y2021.Day22+Cuboid):byref:this]
        test     rax, rax
        je       G_M13415_IG25
-						;; size=41 bbWeight=0.19 PerfScore 2.15
+						;; size=57 bbWeight=0.19 PerfScore 3.28
 G_M13415_IG19:
        mov      rsi, qword ptr [rax]
        jmp      G_M13415_IG24
@@ -801,47 +809,55 @@ G_M13415_IG22:
        vmovups  xmmword ptr [rbp+A8H], xmm2
        vmovups  xmmword ptr [rbp+B4H], xmm3
        mov      qword ptr [rbp+A0H], rdx
-       vmovups  xmmword ptr [rsp], xmm2
-       vmovups  xmmword ptr [rsp+0CH], xmm3
+       vmovsd   qword ptr [rsp], xmm2
+       vextractps dword ptr [rsp+08H], xmm2, 2
+       vmovsd   qword ptr [rsp+0CH], xmm3
+       vextractps dword ptr [rsp+14H], xmm3, 2
        mov      rdi, r15
        call     [System.Collections.Generic.Dictionary`2[MartinCostello.AdventOfCode.Puzzles.Y2021.Day22+Cuboid,long]:FindValue(MartinCostello.AdventOfCode.Puzzles.Y2021.Day22+Cuboid):byref:this]
        test     rax, rax
        je       G_M13415_IG33
-						;; size=52 bbWeight=20.63 PerfScore 196.03
+						;; size=68 bbWeight=20.63 PerfScore 319.83
 G_M13415_IG23:
        mov      rsi, qword ptr [rax]
        vmovups  xmm2, xmmword ptr [rbp+A8H]
-       vmovups  xmmword ptr [rsp], xmm2
+       vmovsd   qword ptr [rsp], xmm2
+       vextractps dword ptr [rsp+08H], xmm2, 2
        vmovups  xmm3, xmmword ptr [rbp+B4H]
-       vmovups  xmmword ptr [rsp+0CH], xmm3
+       vmovsd   qword ptr [rsp+0CH], xmm3
+       vextractps dword ptr [rsp+14H], xmm3, 2
        add      rsi, qword ptr [rbp+A0H]
        mov      rdi, r15
        mov      edx, 1
        call     [System.Collections.Generic.Dictionary`2[MartinCostello.AdventOfCode.Puzzles.Y2021.Day22+Cuboid,long]:TryInsert(MartinCostello.AdventOfCode.Puzzles.Y2021.Day22+Cuboid,long,ubyte):bool:this]
        mov      eax, dword ptr [rbp-70H]
        jmp      G_M13415_IG34
-						;; size=59 bbWeight=330.15 PerfScore 6107.73
+						;; size=75 bbWeight=330.15 PerfScore 8088.61
 G_M13415_IG24:
        vmovups  xmm1, xmmword ptr [rbp-38H]
-       vmovups  xmmword ptr [rsp], xmm1
+       vmovsd   qword ptr [rsp], xmm1
+       vextractps dword ptr [rsp+08H], xmm1, 2
        vmovups  xmm2, xmmword ptr [rbp-48H]
-       vmovups  xmmword ptr [rsp+0CH], xmm2
+       vmovsd   qword ptr [rsp+0CH], xmm2
+       vextractps dword ptr [rsp+14H], xmm2, 2
        inc      rsi
        mov      rdi, rbx
        mov      edx, 1
        call     [System.Collections.Generic.Dictionary`2[MartinCostello.AdventOfCode.Puzzles.Y2021.Day22+Cuboid,long]:TryInsert(MartinCostello.AdventOfCode.Puzzles.Y2021.Day22+Cuboid,long,ubyte):bool:this]
        jmp      SHORT G_M13415_IG26
-						;; size=40 bbWeight=0.19 PerfScore 2.57
+						;; size=56 bbWeight=0.19 PerfScore 3.70
 G_M13415_IG25:
        vmovups  xmm1, xmmword ptr [rbp-38H]
-       vmovups  xmmword ptr [rsp], xmm1
+       vmovsd   qword ptr [rsp], xmm1
+       vextractps dword ptr [rsp+08H], xmm1, 2
        vmovups  xmm2, xmmword ptr [rbp-48H]
-       vmovups  xmmword ptr [rsp+0CH], xmm2
+       vmovsd   qword ptr [rsp+0CH], xmm2
+       vextractps dword ptr [rsp+14H], xmm2, 2
        mov      rdi, rbx
        mov      esi, 1
        mov      edx, 1
        call     [System.Collections.Generic.Dictionary`2[MartinCostello.AdventOfCode.Puzzles.Y2021.Day22+Cuboid,long]:TryInsert(MartinCostello.AdventOfCode.Puzzles.Y2021.Day22+Cuboid,long,ubyte):bool:this]
-						;; size=40 bbWeight=0.75 PerfScore 8.80
+						;; size=56 bbWeight=0.75 PerfScore 13.29
 G_M13415_IG26:
        mov      edi, dword ptr [rbx+38H]
        sub      edi, dword ptr [rbx+40H]
@@ -893,15 +909,17 @@ G_M13415_IG32:
 						;; size=22 bbWeight=0.23 PerfScore 1.33
 G_M13415_IG33:
        vmovups  xmm2, xmmword ptr [rbp+A8H]
-       vmovups  xmmword ptr [rsp], xmm2
+       vmovsd   qword ptr [rsp], xmm2
+       vextractps dword ptr [rsp+08H], xmm2, 2
        vmovups  xmm3, xmmword ptr [rbp+B4H]
-       vmovups  xmmword ptr [rsp+0CH], xmm3
+       vmovsd   qword ptr [rsp+0CH], xmm3
+       vextractps dword ptr [rsp+14H], xmm3, 2
        mov      rdi, r15
        mov      rsi, qword ptr [rbp+A0H]
        mov      edx, 1
        call     [System.Collections.Generic.Dictionary`2[MartinCostello.AdventOfCode.Puzzles.Y2021.Day22+Cuboid,long]:TryInsert(MartinCostello.AdventOfCode.Puzzles.Y2021.Day22+Cuboid,long,ubyte):bool:this]
        mov      eax, dword ptr [rbp-70H]
-						;; size=51 bbWeight=330.15 PerfScore 4456.99
+						;; size=67 bbWeight=330.15 PerfScore 6437.88
 G_M13415_IG34:
        cmp      r14d, dword ptr [rbx+44H]
        jne      G_M13415_IG47
@@ -1031,6 +1049,11 @@ G_M13415_IG48:
        int3     
 						;; size=6 bbWeight=0 PerfScore 0.00
 
-; Total bytes of code 1573, prolog size 84, PerfScore 71319.51, instruction count 350, allocated bytes for code 1573 (MethodHash=e316cb98) for method MartinCostello.AdventOfCode.Puzzles.Y2021.Day22:<Reboot>g__Reboot|8_1(System.Collections.Generic.List`1[System.ValueTuple`2[MartinCostello.AdventOfCode.Puzzles.Y2021.Day22+Cuboid,bool]]):long (Tier1-OSR)
+; Total bytes of code 1721, prolog size 84, PerfScore 82673.40, instruction count 368, allocated bytes for code 1721 (MethodHash=e316cb98) for method MartinCostello.AdventOfCode.Puzzles.Y2021.Day22:<Reboot>g__Reboot|8_1(System.Collections.Generic.List`1[System.ValueTuple`2[MartinCostello.AdventOfCode.Puzzles.Y2021.Day22+Cuboid,bool]]):long (Tier1-OSR)

@EgorBo
Copy link
Member Author

EgorBo commented Jul 15, 2023

SPMI is failing everywhere, real diffs were +24 few cases, e.g.:

@@ -163,7 +163,9 @@ G_M13885_IG02:        ; bbWeight=1, gcrefRegs=380000 {x19 x20 x21}, byrefRegs=00
             fmul    v0.4s, v1.4s, v0.s[0]
             fsub    v0.4s, v16.4s, v0.4s
             str     q0, [fp, #0x20]	// [V50 tmp41]
-            str     q0, [sp]	// [V09 OutArgs]
+            str     d0, [sp]	// [V09 OutArgs]
+            mov     w1, v0.s[2]
+            str     w1, [sp, #0x08]	// [V09 OutArgs+0x08]
             ldp     s0, s1, [fp, #0xA0]	// [V24 tmp15], [V24 tmp15+0x04]
             ldr     s2, [fp, #0xA8]	// [V24 tmp15+0x08]
             ldp     s3, s4, [fp, #0xC0]	// [V06 loc2], [V06 loc2+0x04]
@@ -189,7 +191,7 @@ G_M13885_IG02:        ; bbWeight=1, gcrefRegs=380000 {x19 x20 x21}, byrefRegs=00
             cmp     w22, #5
             mov     v8.d[1], v9.d[0]
             blt     G_M13885_IG05
-						;; size=248 bbWeight=1 PerfScore 109.00
+						;; size=256 bbWeight=1 PerfScore 111.00

Because previously we only ran that logic on osx-arm64

@EgorBo EgorBo merged commit b898a54 into dotnet:main Jul 15, 2023
@EgorBo EgorBo deleted the fix-simd12-store branch July 15, 2023 19:26
@ghost ghost locked as resolved and limited conversation to collaborators Aug 15, 2023
Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Labels
area-CodeGen-coreclr CLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMI
Projects
None yet
3 participants