Skip to content

[Perf] Windows/x64: 11 Improvements on 12/28/2022 6:20:56 PM #11284

Closed
@performanceautofiler

Description

@performanceautofiler

Run Information

Architecture x64
OS Windows 10.0.18362
Baseline 8c58fc2347820ce48e09605d8adddb993df9ebb5
Compare 1d15f2140f7eb30a976c66290491ec89cd628da0
Diff Diff

Improvements in System.Numerics.Tests.Perf_Matrix4x4

Benchmark Baseline Test Test/Base Test Quality Edge Detector Baseline IR Compare IR IR Ratio Baseline ETL Compare ETL
CreateLookAtBenchmark - Duration of single invocation 31.56 ns 29.75 ns 0.94 0.14 False 149.18093115744372 138.9753481735635 0.9315892258836395 Trace Trace
CreateBillboardBenchmark - Duration of single invocation 23.04 ns 20.53 ns 0.89 0.11 False 116.78119720330676 106.01327812553706 0.907794068431893 Trace Trace
CreateWorldBenchmark - Duration of single invocation 23.81 ns 20.35 ns 0.85 0.09 False 119.7388710165708 110.01858998207872 0.9188210064787827 Trace Trace
CreateConstrainedBillboardBenchmark - Duration of single invocation 30.75 ns 29.19 ns 0.95 0.01 False Trace Trace

graph
graph
graph
graph
Test Report

Repro

git clone https://github.com/dotnet/performance.git
py .\performance\scripts\benchmarks_ci.py -f net8.0 --filter 'System.Numerics.Tests.Perf_Matrix4x4*'

Payloads

Baseline
Compare

Histogram

System.Numerics.Tests.Perf_Matrix4x4.CreateLookAtBenchmark


Description of detection logic

IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsRegressionBase: Marked as not a regression because the compare was not 5% greater than the baseline, or the value was too small.
IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsImprovementWindowed:Marked as improvement because 29.745970644173973 < 30.579009425098956.
IsChangePoint: Marked as a change because one of 12/28/2022 5:13:50 PM, 1/3/2023 6:29:42 AM falls between 12/23/2022 9:40:21 PM and 1/3/2023 6:29:42 AM.
IsImprovementStdDev: Marked as improvement because 8.03377540262315 (T) = (0 -29.786236112966343) / Math.Sqrt((0.6142171521206813 / (299)) + (1.2209179767904108 / (14))) is greater than 1.9676211333067681 = MathNet.Numerics.Distributions.StudentT.InvCDF(0, 1, (299) + (14) - 2, .975) and 0.07457292595103529 = (32.18647578857234 - 29.786236112966343) / 32.18647578857234 is greater than 0.05.
IsChangeEdgeDetector: Marked not as a regression because Edge Detector said so.

```### Baseline Jit Disasm

```assembly
; System.Numerics.Tests.Perf_Matrix4x4.CreateLookAtBenchmark()
       push      rsi
       sub       rsp,50
       vzeroupper
       mov       rsi,rdx
       vmovupd   xmm0,[7FFB23F432E0]
       vmovupd   xmm1,[7FFB23F432F0]
       vmovapd   [rsp+40],xmm0
       vxorps    xmm0,xmm0,xmm0
       vmovapd   [rsp+30],xmm0
       vmovapd   [rsp+20],xmm1
       mov       rcx,rsi
       lea       rdx,[rsp+40]
       lea       r8,[rsp+30]
       lea       r9,[rsp+20]
       call      qword ptr [7FFB2468B1F8]; System.Numerics.Matrix4x4.CreateLookAt(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
       mov       rax,rsi
       add       rsp,50
       pop       rsi
       ret
; Total bytes of code 82
; System.Numerics.Matrix4x4.CreateLookAt(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
       push      rdi
       push      rsi
       sub       rsp,0C8
       vzeroupper
       vmovaps   [rsp+0B0],xmm6
       vmovaps   [rsp+0A0],xmm7
       vmovaps   [rsp+90],xmm8
       vmovaps   [rsp+80],xmm9
       vmovaps   [rsp+70],xmm10
       vmovaps   [rsp+60],xmm11
       mov       rdi,rcx
       mov       rsi,rdx
       vmovss    xmm0,dword ptr [rsi+8]
       vmovsd    xmm1,qword ptr [rsi]
       vshufps   xmm1,xmm1,xmm0,44
       vmovss    xmm0,dword ptr [r8+8]
       vmovsd    xmm2,qword ptr [r8]
       vshufps   xmm2,xmm2,xmm0,44
       vsubps    xmm0,xmm1,xmm2
       vdpps     xmm1,xmm0,xmm0,71
       vsqrtss   xmm1,xmm1,xmm1
       vbroadcastss xmm1,xmm1
       vdivps    xmm0,xmm0,xmm1
       vpslldq   xmm0,xmm0,4
       vpsrldq   xmm6,xmm0,4
       vmovss    xmm0,dword ptr [r9+8]
       vmovsd    xmm1,qword ptr [r9]
       vshufps   xmm1,xmm1,xmm0,44
       vmovshdup xmm0,xmm1
       vunpckhps xmm7,xmm6,xmm6
       vmulss    xmm2,xmm0,xmm7
       vunpckhps xmm3,xmm1,xmm1
       vmovshdup xmm8,xmm6
       vmulss    xmm4,xmm3,xmm8
       vsubss    xmm2,xmm2,xmm4
       vmovaps   xmm9,xmm6
       vmulss    xmm3,xmm3,xmm9
       vmulss    xmm4,xmm1,xmm7
       vsubss    xmm3,xmm3,xmm4
       vmulss    xmm1,xmm1,xmm8
       vmulss    xmm0,xmm0,xmm9
       vsubss    xmm0,xmm1,xmm0
       vxorps    xmm1,xmm1,xmm1
       vmovss    xmm1,xmm1,xmm0
       vpslldq   xmm1,xmm1,4
       vmovss    xmm1,xmm1,xmm3
       vpslldq   xmm1,xmm1,4
       vmovss    xmm1,xmm1,xmm2
       vmovaps   xmm0,xmm1
       vdpps     xmm1,xmm0,xmm0,71
       vsqrtss   xmm1,xmm1,xmm1
       vbroadcastss xmm1,xmm1
       vdivps    xmm0,xmm0,xmm1
       vpslldq   xmm0,xmm0,4
       vpsrldq   xmm10,xmm0,4
       vunpckhps xmm0,xmm10,xmm10
       vmulss    xmm0,xmm8,xmm0
       vmovshdup xmm1,xmm10
       vmulss    xmm1,xmm7,xmm1
       vsubss    xmm0,xmm0,xmm1
       vmovaps   xmm1,xmm10
       vmulss    xmm1,xmm7,xmm1
       vunpckhps xmm2,xmm10,xmm10
       vmulss    xmm2,xmm9,xmm2
       vsubss    xmm1,xmm1,xmm2
       vmovshdup xmm2,xmm10
       vmulss    xmm2,xmm9,xmm2
       vmovaps   xmm3,xmm10
       vmulss    xmm3,xmm8,xmm3
       vsubss    xmm2,xmm2,xmm3
       vxorps    xmm3,xmm3,xmm3
       vmovss    xmm3,xmm3,xmm2
       vpslldq   xmm3,xmm3,4
       vmovss    xmm3,xmm3,xmm1
       vpslldq   xmm3,xmm3,4
       vmovss    xmm3,xmm3,xmm0
       vmovaps   xmm11,xmm3
       lea       rcx,[rsp+20]
       call      qword ptr [7FFB2468B030]; System.Numerics.Matrix4x4.get_Identity()
       vmovaps   xmm0,xmm10
       vmovss    dword ptr [rsp+20],xmm0
       vmovaps   xmm0,xmm11
       vmovss    dword ptr [rsp+24],xmm0
       vmovss    dword ptr [rsp+28],xmm9
       vmovshdup xmm0,xmm10
       vmovss    dword ptr [rsp+30],xmm0
       vmovshdup xmm0,xmm11
       vmovss    dword ptr [rsp+34],xmm0
       vmovss    dword ptr [rsp+38],xmm8
       vunpckhps xmm0,xmm10,xmm10
       vmovss    dword ptr [rsp+40],xmm0
       vunpckhps xmm0,xmm11,xmm11
       vmovss    dword ptr [rsp+44],xmm0
       vmovss    dword ptr [rsp+48],xmm7
       vmovss    xmm0,dword ptr [rsi+8]
       vmovsd    xmm1,qword ptr [rsi]
       vshufps   xmm1,xmm1,xmm0,44
       vdpps     xmm0,xmm10,xmm1,71
       vxorps    xmm0,xmm0,[7FFB23F43640]
       vmovss    dword ptr [rsp+50],xmm0
       vmovss    xmm0,dword ptr [rsi+8]
       vmovsd    xmm1,qword ptr [rsi]
       vshufps   xmm1,xmm1,xmm0,44
       vdpps     xmm0,xmm11,xmm1,71
       vxorps    xmm0,xmm0,[7FFB23F43640]
       vmovss    dword ptr [rsp+54],xmm0
       vmovss    xmm0,dword ptr [rsi+8]
       vmovsd    xmm1,qword ptr [rsi]
       vshufps   xmm1,xmm1,xmm0,44
       vdpps     xmm0,xmm6,xmm1,71
       vxorps    xmm0,xmm0,[7FFB23F43640]
       vmovss    dword ptr [rsp+58],xmm0
       vmovdqu   ymm0,ymmword ptr [rsp+20]
       vmovdqu   ymmword ptr [rdi],ymm0
       vmovdqu   ymm0,ymmword ptr [rsp+40]
       vmovdqu   ymmword ptr [rdi+20],ymm0
       mov       rax,rdi
       vmovaps   xmm6,[rsp+0B0]
       vmovaps   xmm7,[rsp+0A0]
       vmovaps   xmm8,[rsp+90]
       vmovaps   xmm9,[rsp+80]
       vmovaps   xmm10,[rsp+70]
       vmovaps   xmm11,[rsp+60]
       add       rsp,0C8
       pop       rsi
       pop       rdi
       ret
; Total bytes of code 639

Compare Jit Disasm

; System.Numerics.Tests.Perf_Matrix4x4.CreateLookAtBenchmark()
       push      rsi
       sub       rsp,50
       vzeroupper
       mov       rsi,rdx
       vmovupd   xmm0,[7FFC51CB32A0]
       vmovapd   [rsp+40],xmm0
       vxorps    xmm0,xmm0,xmm0
       vmovapd   [rsp+30],xmm0
       vmovupd   xmm0,[7FFC51CB32B0]
       vmovapd   [rsp+20],xmm0
       mov       rcx,rsi
       lea       rdx,[rsp+40]
       lea       r8,[rsp+30]
       lea       r9,[rsp+20]
       call      qword ptr [7FFC523FB1F8]; System.Numerics.Matrix4x4.CreateLookAt(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
       mov       rax,rsi
       add       rsp,50
       pop       rsi
       ret
; Total bytes of code 82
; System.Numerics.Matrix4x4.CreateLookAt(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
       push      rdi
       push      rsi
       sub       rsp,0C8
       vzeroupper
       vmovaps   [rsp+0B0],xmm6
       vmovaps   [rsp+0A0],xmm7
       vmovaps   [rsp+90],xmm8
       vmovaps   [rsp+80],xmm9
       vmovaps   [rsp+70],xmm10
       vmovaps   [rsp+60],xmm11
       mov       rdi,rcx
       mov       rsi,rdx
       vmovss    xmm0,dword ptr [rsi+8]
       vmovsd    xmm1,qword ptr [rsi]
       vshufps   xmm1,xmm1,xmm0,44
       vmovss    xmm0,dword ptr [r8+8]
       vmovsd    xmm2,qword ptr [r8]
       vshufps   xmm2,xmm2,xmm0,44
       vsubps    xmm0,xmm1,xmm2
       vdpps     xmm1,xmm0,xmm0,71
       vsqrtss   xmm1,xmm1,xmm1
       vbroadcastss xmm1,xmm1
       vdivps    xmm0,xmm0,xmm1
       vpslldq   xmm0,xmm0,4
       vpsrldq   xmm6,xmm0,4
       vmovss    xmm0,dword ptr [r9+8]
       vmovsd    xmm1,qword ptr [r9]
       vshufps   xmm1,xmm1,xmm0,44
       vmovshdup xmm0,xmm1
       vunpckhps xmm7,xmm6,xmm6
       vmulss    xmm2,xmm0,xmm7
       vunpckhps xmm3,xmm1,xmm1
       vmovshdup xmm8,xmm6
       vmulss    xmm4,xmm3,xmm8
       vsubss    xmm2,xmm2,xmm4
       vmovaps   xmm9,xmm6
       vmulss    xmm3,xmm3,xmm9
       vmulss    xmm4,xmm1,xmm7
       vsubss    xmm3,xmm3,xmm4
       vinsertps xmm2,xmm2,xmm3,10
       vmulss    xmm1,xmm1,xmm8
       vmulss    xmm0,xmm0,xmm9
       vsubss    xmm0,xmm1,xmm0
       vinsertps xmm0,xmm2,xmm0,28
       vdpps     xmm1,xmm0,xmm0,71
       vsqrtss   xmm1,xmm1,xmm1
       vbroadcastss xmm1,xmm1
       vdivps    xmm0,xmm0,xmm1
       vpslldq   xmm0,xmm0,4
       vpsrldq   xmm10,xmm0,4
       vunpckhps xmm0,xmm10,xmm10
       vmulss    xmm0,xmm8,xmm0
       vmovshdup xmm1,xmm10
       vmulss    xmm1,xmm7,xmm1
       vsubss    xmm0,xmm0,xmm1
       vmovaps   xmm1,xmm10
       vmulss    xmm1,xmm7,xmm1
       vunpckhps xmm2,xmm10,xmm10
       vmulss    xmm2,xmm9,xmm2
       vsubss    xmm1,xmm1,xmm2
       vinsertps xmm0,xmm0,xmm1,10
       vmovshdup xmm1,xmm10
       vmulss    xmm1,xmm9,xmm1
       vmovaps   xmm2,xmm10
       vmulss    xmm2,xmm8,xmm2
       vsubss    xmm1,xmm1,xmm2
       vinsertps xmm11,xmm0,xmm1,28
       lea       rcx,[rsp+20]
       call      qword ptr [7FFC523FB030]; System.Numerics.Matrix4x4.get_Identity()
       vmovaps   xmm0,xmm10
       vmovss    dword ptr [rsp+20],xmm0
       vmovaps   xmm0,xmm11
       vmovss    dword ptr [rsp+24],xmm0
       vmovss    dword ptr [rsp+28],xmm9
       vmovshdup xmm0,xmm10
       vmovss    dword ptr [rsp+30],xmm0
       vmovshdup xmm0,xmm11
       vmovss    dword ptr [rsp+34],xmm0
       vmovss    dword ptr [rsp+38],xmm8
       vunpckhps xmm0,xmm10,xmm10
       vmovss    dword ptr [rsp+40],xmm0
       vunpckhps xmm0,xmm11,xmm11
       vmovss    dword ptr [rsp+44],xmm0
       vmovss    dword ptr [rsp+48],xmm7
       vmovss    xmm0,dword ptr [rsi+8]
       vmovsd    xmm1,qword ptr [rsi]
       vshufps   xmm1,xmm1,xmm0,44
       vdpps     xmm0,xmm10,xmm1,71
       vxorps    xmm0,xmm0,[7FFC51CB35E0]
       vmovss    dword ptr [rsp+50],xmm0
       vmovss    xmm0,dword ptr [rsi+8]
       vmovsd    xmm1,qword ptr [rsi]
       vshufps   xmm1,xmm1,xmm0,44
       vdpps     xmm0,xmm11,xmm1,71
       vxorps    xmm0,xmm0,[7FFC51CB35E0]
       vmovss    dword ptr [rsp+54],xmm0
       vmovss    xmm0,dword ptr [rsi+8]
       vmovsd    xmm1,qword ptr [rsi]
       vshufps   xmm1,xmm1,xmm0,44
       vdpps     xmm0,xmm6,xmm1,71
       vxorps    xmm0,xmm0,[7FFC51CB35E0]
       vmovss    dword ptr [rsp+58],xmm0
       vmovdqu   ymm0,ymmword ptr [rsp+20]
       vmovdqu   ymmword ptr [rdi],ymm0
       vmovdqu   ymm0,ymmword ptr [rsp+40]
       vmovdqu   ymmword ptr [rdi+20],ymm0
       mov       rax,rdi
       vmovaps   xmm6,[rsp+0B0]
       vmovaps   xmm7,[rsp+0A0]
       vmovaps   xmm8,[rsp+90]
       vmovaps   xmm9,[rsp+80]
       vmovaps   xmm10,[rsp+70]
       vmovaps   xmm11,[rsp+60]
       add       rsp,0C8
       pop       rsi
       pop       rdi
       ret
; Total bytes of code 603

System.Numerics.Tests.Perf_Matrix4x4.CreateBillboardBenchmark


Description of detection logic

IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsRegressionBase: Marked as not a regression because the compare was not 5% greater than the baseline, or the value was too small.
IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsImprovementWindowed:Marked as improvement because 20.532278443711558 < 21.753609662882017.
IsChangePoint: Marked as a change because one of 12/28/2022 5:13:50 PM, 1/3/2023 6:29:42 AM falls between 12/23/2022 9:40:21 PM and 1/3/2023 6:29:42 AM.
IsImprovementStdDev: Marked as improvement because 2.1484834712218572 (T) = (0 -21.392544886476852) / Math.Sqrt((0.49130840670645715 / (299)) + (7.969156026645255 / (14))) is greater than 1.9676211333067681 = MathNet.Numerics.Distributions.StudentT.InvCDF(0, 1, (299) + (14) - 2, .975) and 0.07052986970187088 = (23.01584977197186 - 21.392544886476852) / 23.01584977197186 is greater than 0.05.
IsChangeEdgeDetector: Marked not as a regression because Edge Detector said so.

```### Baseline Jit Disasm

```assembly
; System.Numerics.Tests.Perf_Matrix4x4.CreateBillboardBenchmark()
       push      rsi
       sub       rsp,70
       vzeroupper
       mov       rsi,rdx
       vmovupd   xmm0,[7FFE0B2C3360]
       vmovupd   xmm1,[7FFE0B2C3370]
       vmovupd   xmm2,[7FFE0B2C3380]
       vxorps    xmm3,xmm3,xmm3
       vmovapd   [rsp+60],xmm3
       vmovapd   [rsp+50],xmm0
       vmovapd   [rsp+40],xmm1
       vmovapd   [rsp+30],xmm2
       mov       rcx,rsi
       lea       rdx,[rsp+60]
       lea       r8,[rsp+50]
       lea       r9,[rsp+40]
       lea       rax,[rsp+30]
       mov       [rsp+20],rax
       call      qword ptr [7FFE0BA0B180]; System.Numerics.Matrix4x4.CreateBillboard(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
       mov       rax,rsi
       add       rsp,70
       pop       rsi
       ret
; Total bytes of code 106
; System.Numerics.Matrix4x4.CreateBillboard(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
       sub       rsp,88
       vzeroupper
       vmovaps   [rsp+70],xmm6
       vmovaps   [rsp+60],xmm7
       vmovaps   [rsp+50],xmm8
       vmovaps   [rsp+40],xmm9
       vmovss    xmm0,dword ptr [rdx+8]
       vmovsd    xmm1,qword ptr [rdx]
       vshufps   xmm1,xmm1,xmm0,44
       vmovss    xmm0,dword ptr [r8+8]
       vmovsd    xmm2,qword ptr [r8]
       vshufps   xmm2,xmm2,xmm0,44
       vsubps    xmm0,xmm1,xmm2
       vdpps     xmm2,xmm0,xmm0,71
       vmovss    xmm3,dword ptr [7FFE0B2C3600]
       vucomiss  xmm3,xmm2
       jbe       short M01_L00
       mov       rax,[rsp+0B0]
       vmovss    xmm0,dword ptr [rax+8]
       vmovsd    xmm2,qword ptr [rax]
       vshufps   xmm2,xmm2,xmm0,44
       vxorps    xmm0,xmm0,xmm0
       vsubps    xmm0,xmm0,xmm2
       jmp       short M01_L01
M01_L00:
       vsqrtss   xmm2,xmm2,xmm2
       vmovss    xmm3,dword ptr [7FFE0B2C3604]
       vdivss    xmm2,xmm3,xmm2
       vbroadcastss xmm2,xmm2
       vmulps    xmm0,xmm0,xmm2
M01_L01:
       vmovss    xmm2,dword ptr [r9+8]
       vmovsd    xmm3,qword ptr [r9]
       vshufps   xmm3,xmm3,xmm2,44
       vmovshdup xmm2,xmm3
       vunpckhps xmm4,xmm0,xmm0
       vmulss    xmm5,xmm2,xmm4
       vunpckhps xmm6,xmm3,xmm3
       vmovshdup xmm7,xmm0
       vmulss    xmm8,xmm6,xmm7
       vsubss    xmm5,xmm5,xmm8
       vmovaps   xmm8,xmm0
       vmulss    xmm6,xmm6,xmm8
       vmulss    xmm9,xmm3,xmm4
       vsubss    xmm6,xmm6,xmm9
       vmulss    xmm3,xmm3,xmm7
       vmulss    xmm2,xmm2,xmm8
       vsubss    xmm2,xmm3,xmm2
       vxorps    xmm3,xmm3,xmm3
       vmovss    xmm3,xmm3,xmm2
       vpslldq   xmm3,xmm3,4
       vmovss    xmm3,xmm3,xmm6
       vpslldq   xmm3,xmm3,4
       vmovss    xmm3,xmm3,xmm5
       vmovaps   xmm2,xmm3
       vdpps     xmm3,xmm2,xmm2,71
       vsqrtss   xmm3,xmm3,xmm3
       vbroadcastss xmm3,xmm3
       vdivps    xmm2,xmm2,xmm3
       vpslldq   xmm2,xmm2,4
       vpsrldq   xmm2,xmm2,4
       vunpckhps xmm3,xmm2,xmm2
       vmulss    xmm5,xmm7,xmm3
       vmovshdup xmm6,xmm2
       vmulss    xmm9,xmm4,xmm6
       vsubss    xmm5,xmm5,xmm9
       vmovaps   xmm9,xmm2
       vmulss    xmm4,xmm4,xmm9
       vmulss    xmm3,xmm8,xmm3
       vsubss    xmm3,xmm4,xmm3
       vmulss    xmm4,xmm8,xmm6
       vmulss    xmm6,xmm7,xmm9
       vsubss    xmm4,xmm4,xmm6
       vxorps    xmm6,xmm6,xmm6
       vmovss    xmm6,xmm6,xmm4
       vpslldq   xmm6,xmm6,4
       vmovss    xmm6,xmm6,xmm3
       vpslldq   xmm6,xmm6,4
       vmovss    xmm6,xmm6,xmm5
       vmovaps   xmm3,xmm6
       vmovsd    qword ptr [rsp],xmm2
       vpshufd   xmm4,xmm2,2
       vmovss    dword ptr [rsp+8],xmm4
       xor       eax,eax
       mov       [rsp+0C],eax
       vmovsd    qword ptr [rsp+10],xmm3
       vpshufd   xmm2,xmm3,2
       vmovss    dword ptr [rsp+18],xmm2
       mov       [rsp+1C],eax
       vmovsd    qword ptr [rsp+20],xmm0
       vpshufd   xmm2,xmm0,2
       vmovss    dword ptr [rsp+28],xmm2
       mov       [rsp+2C],eax
       vmovsd    qword ptr [rsp+30],xmm1
       vpshufd   xmm0,xmm1,2
       vmovss    dword ptr [rsp+38],xmm0
       mov       dword ptr [rsp+3C],3F800000
       vmovdqu   ymm0,ymmword ptr [rsp]
       vmovdqu   ymmword ptr [rcx],ymm0
       vmovdqu   ymm0,ymmword ptr [rsp+20]
       vmovdqu   ymmword ptr [rcx+20],ymm0
       mov       rax,rcx
       vmovaps   xmm6,[rsp+70]
       vmovaps   xmm7,[rsp+60]
       vmovaps   xmm8,[rsp+50]
       vmovaps   xmm9,[rsp+40]
       add       rsp,88
       ret
; Total bytes of code 505

Compare Jit Disasm

; System.Numerics.Tests.Perf_Matrix4x4.CreateBillboardBenchmark()
       push      rsi
       sub       rsp,70
       vzeroupper
       mov       rsi,rdx
       vxorps    xmm0,xmm0,xmm0
       vmovapd   [rsp+60],xmm0
       vmovupd   xmm0,[7FFE2B6F3320]
       vmovapd   [rsp+50],xmm0
       vmovupd   xmm0,[7FFE2B6F3330]
       vmovapd   [rsp+40],xmm0
       vmovupd   xmm0,[7FFE2B6F3320]
       vmovapd   [rsp+30],xmm0
       mov       rcx,rsi
       lea       rdx,[rsp+60]
       lea       r8,[rsp+50]
       lea       r9,[rsp+40]
       lea       rax,[rsp+30]
       mov       [rsp+20],rax
       call      qword ptr [7FFE2BE3B180]; System.Numerics.Matrix4x4.CreateBillboard(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
       mov       rax,rsi
       add       rsp,70
       pop       rsi
       ret
; Total bytes of code 106
; System.Numerics.Matrix4x4.CreateBillboard(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
       sub       rsp,88
       vzeroupper
       vmovaps   [rsp+70],xmm6
       vmovaps   [rsp+60],xmm7
       vmovaps   [rsp+50],xmm8
       vmovaps   [rsp+40],xmm9
       vmovss    xmm0,dword ptr [rdx+8]
       vmovsd    xmm1,qword ptr [rdx]
       vshufps   xmm1,xmm1,xmm0,44
       vmovss    xmm0,dword ptr [r8+8]
       vmovsd    xmm2,qword ptr [r8]
       vshufps   xmm2,xmm2,xmm0,44
       vsubps    xmm0,xmm1,xmm2
       vdpps     xmm2,xmm0,xmm0,71
       vmovss    xmm3,dword ptr [7FFE2B6F3588]
       vucomiss  xmm3,xmm2
       jbe       short M01_L00
       mov       rax,[rsp+0B0]
       vmovss    xmm0,dword ptr [rax+8]
       vmovsd    xmm2,qword ptr [rax]
       vshufps   xmm2,xmm2,xmm0,44
       vxorps    xmm0,xmm0,xmm0
       vsubps    xmm0,xmm0,xmm2
       jmp       short M01_L01
M01_L00:
       vsqrtss   xmm2,xmm2,xmm2
       vmovss    xmm3,dword ptr [7FFE2B6F358C]
       vdivss    xmm2,xmm3,xmm2
       vbroadcastss xmm2,xmm2
       vmulps    xmm0,xmm0,xmm2
M01_L01:
       vmovss    xmm2,dword ptr [r9+8]
       vmovsd    xmm3,qword ptr [r9]
       vshufps   xmm3,xmm3,xmm2,44
       vmovshdup xmm2,xmm3
       vunpckhps xmm4,xmm0,xmm0
       vmulss    xmm5,xmm2,xmm4
       vunpckhps xmm6,xmm3,xmm3
       vmovshdup xmm7,xmm0
       vmulss    xmm8,xmm6,xmm7
       vsubss    xmm5,xmm5,xmm8
       vmovaps   xmm8,xmm0
       vmulss    xmm6,xmm6,xmm8
       vmulss    xmm9,xmm3,xmm4
       vsubss    xmm6,xmm6,xmm9
       vinsertps xmm5,xmm5,xmm6,10
       vmulss    xmm3,xmm3,xmm7
       vmulss    xmm2,xmm2,xmm8
       vsubss    xmm2,xmm3,xmm2
       vinsertps xmm2,xmm5,xmm2,28
       vdpps     xmm3,xmm2,xmm2,71
       vsqrtss   xmm3,xmm3,xmm3
       vbroadcastss xmm3,xmm3
       vdivps    xmm2,xmm2,xmm3
       vpslldq   xmm2,xmm2,4
       vpsrldq   xmm2,xmm2,4
       vunpckhps xmm3,xmm2,xmm2
       vmulss    xmm5,xmm7,xmm3
       vmovshdup xmm6,xmm2
       vmulss    xmm9,xmm4,xmm6
       vsubss    xmm5,xmm5,xmm9
       vmovaps   xmm9,xmm2
       vmulss    xmm4,xmm4,xmm9
       vmulss    xmm3,xmm8,xmm3
       vsubss    xmm3,xmm4,xmm3
       vinsertps xmm3,xmm5,xmm3,10
       vmulss    xmm4,xmm8,xmm6
       vmulss    xmm5,xmm7,xmm9
       vsubss    xmm4,xmm4,xmm5
       vinsertps xmm3,xmm3,xmm4,28
       vmovsd    qword ptr [rsp],xmm2
       vpshufd   xmm4,xmm2,2
       vmovss    dword ptr [rsp+8],xmm4
       xor       eax,eax
       mov       [rsp+0C],eax
       vmovsd    qword ptr [rsp+10],xmm3
       vpshufd   xmm2,xmm3,2
       vmovss    dword ptr [rsp+18],xmm2
       mov       [rsp+1C],eax
       vmovsd    qword ptr [rsp+20],xmm0
       vpshufd   xmm2,xmm0,2
       vmovss    dword ptr [rsp+28],xmm2
       mov       [rsp+2C],eax
       vmovsd    qword ptr [rsp+30],xmm1
       vpshufd   xmm0,xmm1,2
       vmovss    dword ptr [rsp+38],xmm0
       mov       dword ptr [rsp+3C],3F800000
       vmovdqu   ymm0,ymmword ptr [rsp]
       vmovdqu   ymmword ptr [rcx],ymm0
       vmovdqu   ymm0,ymmword ptr [rsp+20]
       vmovdqu   ymmword ptr [rcx+20],ymm0
       mov       rax,rcx
       vmovaps   xmm6,[rsp+70]
       vmovaps   xmm7,[rsp+60]
       vmovaps   xmm8,[rsp+50]
       vmovaps   xmm9,[rsp+40]
       add       rsp,88
       ret
; Total bytes of code 469

System.Numerics.Tests.Perf_Matrix4x4.CreateWorldBenchmark


Description of detection logic

IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsRegressionBase: Marked as not a regression because the compare was not 5% greater than the baseline, or the value was too small.
IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsImprovementWindowed:Marked as improvement because 20.347633209236008 < 22.398269254881626.
IsChangePoint: Marked as a change because one of 12/28/2022 5:13:50 PM, 1/3/2023 6:29:42 AM falls between 12/23/2022 9:40:21 PM and 1/3/2023 6:29:42 AM.
IsImprovementStdDev: Marked as improvement because 24.12215320316142 (T) = (0 -20.914799837469435) / Math.Sqrt((0.4263786384621453 / (299)) + (0.15510467445082196 / (14))) is greater than 1.9676211333067681 = MathNet.Numerics.Distributions.StudentT.InvCDF(0, 1, (299) + (14) - 2, .975) and 0.11424016351104417 = (23.612269348735833 - 20.914799837469435) / 23.612269348735833 is greater than 0.05.
IsChangeEdgeDetector: Marked not as a regression because Edge Detector said so.

```### Baseline Jit Disasm

```assembly
; System.Numerics.Tests.Perf_Matrix4x4.CreateWorldBenchmark()
       push      rsi
       sub       rsp,50
       vzeroupper
       mov       rsi,rdx
       vmovupd   xmm0,[7FFAA94532E0]
       vmovupd   xmm1,[7FFAA94532F0]
       vxorps    xmm2,xmm2,xmm2
       vmovapd   [rsp+40],xmm2
       vmovapd   [rsp+30],xmm0
       vmovapd   [rsp+20],xmm1
       mov       rcx,rsi
       lea       rdx,[rsp+40]
       lea       r8,[rsp+30]
       lea       r9,[rsp+20]
       call      qword ptr [7FFAA9B9B408]; System.Numerics.Matrix4x4.CreateWorld(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
       mov       rax,rsi
       add       rsp,50
       pop       rsi
       ret
; Total bytes of code 82
; System.Numerics.Matrix4x4.CreateWorld(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
       push      rdi
       push      rsi
       sub       rsp,98
       vzeroupper
       vmovaps   [rsp+80],xmm6
       vmovaps   [rsp+70],xmm7
       vmovaps   [rsp+60],xmm8
       mov       rdi,rcx
       mov       rsi,rdx
       vmovss    xmm0,dword ptr [r8+8]
       vmovsd    xmm1,qword ptr [r8]
       vshufps   xmm1,xmm1,xmm0,44
       vxorps    xmm0,xmm0,xmm0
       vsubps    xmm0,xmm0,xmm1
       vdpps     xmm1,xmm0,xmm0,71
       vsqrtss   xmm1,xmm1,xmm1
       vbroadcastss xmm1,xmm1
       vdivps    xmm0,xmm0,xmm1
       vpslldq   xmm0,xmm0,4
       vpsrldq   xmm6,xmm0,4
       vmovss    xmm0,dword ptr [r9+8]
       vmovsd    xmm1,qword ptr [r9]
       vshufps   xmm1,xmm1,xmm0,44
       vmovshdup xmm0,xmm1
       vunpckhps xmm2,xmm6,xmm6
       vmulss    xmm3,xmm0,xmm2
       vunpckhps xmm4,xmm1,xmm1
       vmovshdup xmm5,xmm6
       vmulss    xmm7,xmm4,xmm5
       vsubss    xmm3,xmm3,xmm7
       vmovaps   xmm7,xmm6
       vmulss    xmm4,xmm4,xmm7
       vmulss    xmm8,xmm1,xmm2
       vsubss    xmm4,xmm4,xmm8
       vmulss    xmm1,xmm1,xmm5
       vmulss    xmm0,xmm0,xmm7
       vsubss    xmm0,xmm1,xmm0
       vxorps    xmm1,xmm1,xmm1
       vmovss    xmm1,xmm1,xmm0
       vpslldq   xmm1,xmm1,4
       vmovss    xmm1,xmm1,xmm4
       vpslldq   xmm1,xmm1,4
       vmovss    xmm1,xmm1,xmm3
       vmovaps   xmm0,xmm1
       vdpps     xmm1,xmm0,xmm0,71
       vsqrtss   xmm1,xmm1,xmm1
       vbroadcastss xmm1,xmm1
       vdivps    xmm0,xmm0,xmm1
       vpslldq   xmm0,xmm0,4
       vpsrldq   xmm8,xmm0,4
       vunpckhps xmm0,xmm8,xmm8
       vmulss    xmm1,xmm5,xmm0
       vmovshdup xmm3,xmm8
       vmulss    xmm4,xmm2,xmm3
       vsubss    xmm1,xmm1,xmm4
       vmovaps   xmm4,xmm8
       vmulss    xmm2,xmm2,xmm4
       vmulss    xmm0,xmm7,xmm0
       vsubss    xmm0,xmm2,xmm0
       vmulss    xmm2,xmm7,xmm3
       vmulss    xmm3,xmm5,xmm4
       vsubss    xmm2,xmm2,xmm3
       vxorps    xmm3,xmm3,xmm3
       vmovss    xmm3,xmm3,xmm2
       vpslldq   xmm3,xmm3,4
       vmovss    xmm3,xmm3,xmm0
       vpslldq   xmm3,xmm3,4
       vmovss    xmm3,xmm3,xmm1
       vmovaps   xmm7,xmm3
       lea       rcx,[rsp+20]
       call      qword ptr [7FFAA9B9B030]; System.Numerics.Matrix4x4.get_Identity()
       vmovsd    qword ptr [rsp+20],xmm8
       vpshufd   xmm0,xmm8,2
       vmovss    dword ptr [rsp+28],xmm0
       vmovsd    qword ptr [rsp+30],xmm7
       vpshufd   xmm0,xmm7,2
       vmovss    dword ptr [rsp+38],xmm0
       vmovsd    qword ptr [rsp+40],xmm6
       vpshufd   xmm0,xmm6,2
       vmovss    dword ptr [rsp+48],xmm0
       vmovss    xmm0,dword ptr [rsi+8]
       vmovsd    xmm1,qword ptr [rsi]
       vshufps   xmm1,xmm1,xmm0,44
       vmovsd    qword ptr [rsp+50],xmm1
       vpshufd   xmm0,xmm1,2
       vmovss    dword ptr [rsp+58],xmm0
       vmovdqu   ymm0,ymmword ptr [rsp+20]
       vmovdqu   ymmword ptr [rdi],ymm0
       vmovdqu   ymm0,ymmword ptr [rsp+40]
       vmovdqu   ymmword ptr [rdi+20],ymm0
       mov       rax,rdi
       vmovaps   xmm6,[rsp+80]
       vmovaps   xmm7,[rsp+70]
       vmovaps   xmm8,[rsp+60]
       add       rsp,98
       pop       rsi
       pop       rdi
       ret
; Total bytes of code 454

Compare Jit Disasm

; System.Numerics.Tests.Perf_Matrix4x4.CreateWorldBenchmark()
       push      rsi
       sub       rsp,50
       vzeroupper
       mov       rsi,rdx
       vxorps    xmm0,xmm0,xmm0
       vmovapd   [rsp+40],xmm0
       vmovupd   xmm0,[7FF93C0332A0]
       vmovapd   [rsp+30],xmm0
       vmovupd   xmm0,[7FF93C0332B0]
       vmovapd   [rsp+20],xmm0
       mov       rcx,rsi
       lea       rdx,[rsp+40]
       lea       r8,[rsp+30]
       lea       r9,[rsp+20]
       call      qword ptr [7FF93C77B408]; System.Numerics.Matrix4x4.CreateWorld(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
       mov       rax,rsi
       add       rsp,50
       pop       rsi
       ret
; Total bytes of code 82
; System.Numerics.Matrix4x4.CreateWorld(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
       push      rdi
       push      rsi
       sub       rsp,98
       vzeroupper
       vmovaps   [rsp+80],xmm6
       vmovaps   [rsp+70],xmm7
       vmovaps   [rsp+60],xmm8
       mov       rdi,rcx
       mov       rsi,rdx
       vmovss    xmm0,dword ptr [r8+8]
       vmovsd    xmm1,qword ptr [r8]
       vshufps   xmm1,xmm1,xmm0,44
       vxorps    xmm0,xmm0,xmm0
       vsubps    xmm0,xmm0,xmm1
       vdpps     xmm1,xmm0,xmm0,71
       vsqrtss   xmm1,xmm1,xmm1
       vbroadcastss xmm1,xmm1
       vdivps    xmm0,xmm0,xmm1
       vpslldq   xmm0,xmm0,4
       vpsrldq   xmm6,xmm0,4
       vmovss    xmm0,dword ptr [r9+8]
       vmovsd    xmm1,qword ptr [r9]
       vshufps   xmm1,xmm1,xmm0,44
       vmovshdup xmm0,xmm1
       vunpckhps xmm2,xmm6,xmm6
       vmulss    xmm3,xmm0,xmm2
       vunpckhps xmm4,xmm1,xmm1
       vmovshdup xmm5,xmm6
       vmulss    xmm7,xmm4,xmm5
       vsubss    xmm3,xmm3,xmm7
       vmovaps   xmm7,xmm6
       vmulss    xmm4,xmm4,xmm7
       vmulss    xmm8,xmm1,xmm2
       vsubss    xmm4,xmm4,xmm8
       vinsertps xmm3,xmm3,xmm4,10
       vmulss    xmm1,xmm1,xmm5
       vmulss    xmm0,xmm0,xmm7
       vsubss    xmm0,xmm1,xmm0
       vinsertps xmm0,xmm3,xmm0,28
       vdpps     xmm1,xmm0,xmm0,71
       vsqrtss   xmm1,xmm1,xmm1
       vbroadcastss xmm1,xmm1
       vdivps    xmm0,xmm0,xmm1
       vpslldq   xmm0,xmm0,4
       vpsrldq   xmm8,xmm0,4
       vunpckhps xmm0,xmm8,xmm8
       vmulss    xmm1,xmm5,xmm0
       vmovshdup xmm3,xmm8
       vmulss    xmm4,xmm2,xmm3
       vsubss    xmm1,xmm1,xmm4
       vmovaps   xmm4,xmm8
       vmulss    xmm2,xmm2,xmm4
       vmulss    xmm0,xmm7,xmm0
       vsubss    xmm0,xmm2,xmm0
       vinsertps xmm0,xmm1,xmm0,10
       vmulss    xmm1,xmm7,xmm3
       vmulss    xmm2,xmm5,xmm4
       vsubss    xmm1,xmm1,xmm2
       vinsertps xmm7,xmm0,xmm1,28
       lea       rcx,[rsp+20]
       call      qword ptr [7FF93C77B030]; System.Numerics.Matrix4x4.get_Identity()
       vmovsd    qword ptr [rsp+20],xmm8
       vpshufd   xmm0,xmm8,2
       vmovss    dword ptr [rsp+28],xmm0
       vmovsd    qword ptr [rsp+30],xmm7
       vpshufd   xmm0,xmm7,2
       vmovss    dword ptr [rsp+38],xmm0
       vmovsd    qword ptr [rsp+40],xmm6
       vpshufd   xmm0,xmm6,2
       vmovss    dword ptr [rsp+48],xmm0
       vmovss    xmm0,dword ptr [rsi+8]
       vmovsd    xmm1,qword ptr [rsi]
       vshufps   xmm1,xmm1,xmm0,44
       vmovsd    qword ptr [rsp+50],xmm1
       vpshufd   xmm0,xmm1,2
       vmovss    dword ptr [rsp+58],xmm0
       vmovdqu   ymm0,ymmword ptr [rsp+20]
       vmovdqu   ymmword ptr [rdi],ymm0
       vmovdqu   ymm0,ymmword ptr [rsp+40]
       vmovdqu   ymmword ptr [rdi+20],ymm0
       mov       rax,rdi
       vmovaps   xmm6,[rsp+80]
       vmovaps   xmm7,[rsp+70]
       vmovaps   xmm8,[rsp+60]
       add       rsp,98
       pop       rsi
       pop       rdi
       ret
; Total bytes of code 418

System.Numerics.Tests.Perf_Matrix4x4.CreateConstrainedBillboardBenchmark


Description of detection logic

IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsRegressionBase: Marked as not a regression because the compare was not 5% greater than the baseline, or the value was too small.
IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsImprovementWindowed:Marked as improvement because 29.194004052835457 < 29.32247219066154.
IsChangePoint: Marked as a change because one of 10/31/2022 10:23:25 PM, 12/28/2022 5:13:50 PM, 1/3/2023 6:29:42 AM falls between 12/23/2022 9:40:21 PM and 1/3/2023 6:29:42 AM.
IsImprovementStdDev: Marked as improvement because 14.512061347326483 (T) = (0 -29.236423889734663) / Math.Sqrt((3.323298949304289 / (299)) + (0.06780201896632884 / (14))) is greater than 1.9676211333067681 = MathNet.Numerics.Distributions.StudentT.InvCDF(0, 1, (299) + (14) - 2, .975) and 0.05900355398482968 = (31.069643263310823 - 29.236423889734663) / 31.069643263310823 is greater than 0.05.
IsChangeEdgeDetector: Marked not as a regression because Edge Detector said so.

```### Baseline Jit Disasm

```assembly
; System.Numerics.Tests.Perf_Matrix4x4.CreateConstrainedBillboardBenchmark()
       push      rsi
       sub       rsp,80
       vzeroupper
       mov       rsi,rdx
       vmovupd   xmm0,[7FFD3CDD33A0]
       vmovupd   xmm1,[7FFD3CDD33B0]
       vmovupd   xmm2,[7FFD3CDD33C0]
       vmovupd   xmm3,[7FFD3CDD33D0]
       vxorps    xmm4,xmm4,xmm4
       vmovapd   [rsp+70],xmm4
       vmovapd   [rsp+60],xmm0
       vmovapd   [rsp+50],xmm1
       vmovapd   [rsp+40],xmm2
       vmovapd   [rsp+30],xmm3
       mov       rcx,rsi
       lea       rdx,[rsp+70]
       lea       r8,[rsp+60]
       lea       r9,[rsp+50]
       lea       rax,[rsp+40]
       mov       [rsp+20],rax
       lea       rax,[rsp+30]
       mov       [rsp+28],rax
       call      qword ptr [7FFD3D51B198]; System.Numerics.Matrix4x4.CreateConstrainedBillboard(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
       mov       rax,rsi
       add       rsp,80
       pop       rsi
       ret
; Total bytes of code 136
; System.Numerics.Matrix4x4.CreateConstrainedBillboard(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
       sub       rsp,88
       vzeroupper
       vmovaps   [rsp+70],xmm6
       vmovaps   [rsp+60],xmm7
       vmovaps   [rsp+50],xmm8
       vmovaps   [rsp+40],xmm9
       vmovss    xmm0,dword ptr [rdx+8]
       vmovsd    xmm1,qword ptr [rdx]
       vshufps   xmm1,xmm1,xmm0,44
       vmovss    xmm0,dword ptr [r8+8]
       vmovsd    xmm2,qword ptr [r8]
       vshufps   xmm2,xmm2,xmm0,44
       vsubps    xmm0,xmm1,xmm2
       vdpps     xmm2,xmm0,xmm0,71
       vmovss    xmm3,dword ptr [7FFD3CDD3840]
       vucomiss  xmm3,xmm2
       jbe       short M01_L00
       mov       rax,[rsp+0B0]
       vmovss    xmm0,dword ptr [rax+8]
       vmovsd    xmm2,qword ptr [rax]
       vshufps   xmm2,xmm2,xmm0,44
       vxorps    xmm0,xmm0,xmm0
       vsubps    xmm0,xmm0,xmm2
       jmp       short M01_L01
M01_L00:
       vsqrtss   xmm2,xmm2,xmm2
       vmovss    xmm3,dword ptr [7FFD3CDD3830]
       vdivss    xmm2,xmm3,xmm2
       vbroadcastss xmm2,xmm2
       vmulps    xmm0,xmm0,xmm2
M01_L01:
       vmovss    xmm2,dword ptr [r9+8]
       vmovsd    xmm3,qword ptr [r9]
       vshufps   xmm3,xmm3,xmm2,44
       vmovaps   xmm2,xmm3
       vdpps     xmm4,xmm3,xmm0,71
       vandps    xmm4,xmm4,[7FFD3CDD3850]
       vmovss    xmm5,dword ptr [7FFD3CDD3860]
       vucomiss  xmm4,xmm5
       jbe       near ptr M01_L04
       mov       rax,[rsp+0B8]
       vmovss    xmm0,dword ptr [rax+8]
       vmovsd    xmm4,qword ptr [rax]
       vshufps   xmm4,xmm4,xmm0,44
       vdpps     xmm0,xmm3,xmm4,71
       vandps    xmm0,xmm0,[7FFD3CDD3850]
       vucomiss  xmm0,xmm5
       jbe       short M01_L03
       vunpckhps xmm0,xmm3,xmm3
       vandps    xmm0,xmm0,[7FFD3CDD3850]
       vucomiss  xmm0,xmm5
       ja        short M01_L02
       vmovupd   xmm4,[7FFD3CDD3820]
       jmp       short M01_L03
M01_L02:
       vmovupd   xmm4,[7FFD3CDD3830]
M01_L03:
       vmovshdup xmm0,xmm3
       vunpckhps xmm5,xmm4,xmm4
       vmulss    xmm6,xmm0,xmm5
       vunpckhps xmm7,xmm3,xmm3
       vmovshdup xmm8,xmm4
       vmulss    xmm9,xmm7,xmm8
       vsubss    xmm6,xmm6,xmm9
       vmulss    xmm9,xmm7,xmm4
       vmulss    xmm5,xmm3,xmm5
       vsubss    xmm5,xmm9,xmm5
       vmulss    xmm8,xmm3,xmm8
       vmulss    xmm4,xmm0,xmm4
       vsubss    xmm4,xmm8,xmm4
       vxorps    xmm8,xmm8,xmm8
       vmovss    xmm8,xmm8,xmm4
       vpslldq   xmm8,xmm8,4
       vmovss    xmm8,xmm8,xmm5
       vpslldq   xmm8,xmm8,4
       vmovss    xmm8,xmm8,xmm6
       vmovaps   xmm4,xmm8
       vdpps     xmm5,xmm4,xmm4,71
       vsqrtss   xmm5,xmm5,xmm5
       vbroadcastss xmm5,xmm5
       vdivps    xmm4,xmm4,xmm5
       vpslldq   xmm4,xmm4,4
       vpsrldq   xmm4,xmm4,4
       vmovshdup xmm5,xmm4
       vmulss    xmm6,xmm5,xmm7
       vunpckhps xmm8,xmm4,xmm4
       vmulss    xmm9,xmm8,xmm0
       vsubss    xmm6,xmm6,xmm9
       vmulss    xmm8,xmm8,xmm3
       vmovaps   xmm9,xmm4
       vmulss    xmm7,xmm9,xmm7
       vsubss    xmm7,xmm8,xmm7
       vmulss    xmm0,xmm9,xmm0
       vmulss    xmm3,xmm5,xmm3
       vsubss    xmm0,xmm0,xmm3
       vxorps    xmm3,xmm3,xmm3
       vmovss    xmm3,xmm3,xmm0
       vpslldq   xmm3,xmm3,4
       vmovss    xmm3,xmm3,xmm7
       vpslldq   xmm3,xmm3,4
       vmovss    xmm3,xmm3,xmm6
       vmovaps   xmm0,xmm3
       vdpps     xmm3,xmm0,xmm0,71
       vsqrtss   xmm3,xmm3,xmm3
       vbroadcastss xmm3,xmm3
       vdivps    xmm0,xmm0,xmm3
       vpslldq   xmm0,xmm0,4
       vpsrldq   xmm0,xmm0,4
       jmp       near ptr M01_L05
M01_L04:
       vmovshdup xmm4,xmm3
       vunpckhps xmm5,xmm0,xmm0
       vmulss    xmm6,xmm4,xmm5
       vunpckhps xmm7,xmm3,xmm3
       vmovshdup xmm8,xmm0
       vmulss    xmm9,xmm7,xmm8
       vsubss    xmm6,xmm6,xmm9
       vmulss    xmm9,xmm7,xmm0
       vmulss    xmm5,xmm3,xmm5
       vsubss    xmm5,xmm9,xmm5
       vmulss    xmm8,xmm3,xmm8
       vmulss    xmm0,xmm4,xmm0
       vsubss    xmm0,xmm8,xmm0
       vxorps    xmm8,xmm8,xmm8
       vmovss    xmm8,xmm8,xmm0
       vpslldq   xmm8,xmm8,4
       vmovss    xmm8,xmm8,xmm5
       vpslldq   xmm8,xmm8,4
       vmovss    xmm8,xmm8,xmm6
       vmovaps   xmm0,xmm8
       vdpps     xmm5,xmm0,xmm0,71
       vsqrtss   xmm5,xmm5,xmm5
       vbroadcastss xmm5,xmm5
       vdivps    xmm0,xmm0,xmm5
       vpslldq   xmm0,xmm0,4
       vpsrldq   xmm0,xmm0,4
       vmovshdup xmm5,xmm0
       vmulss    xmm6,xmm5,xmm7
       vunpckhps xmm8,xmm0,xmm0
       vmulss    xmm9,xmm8,xmm4
       vsubss    xmm6,xmm6,xmm9
       vmulss    xmm8,xmm8,xmm3
       vmovaps   xmm9,xmm0
       vmulss    xmm7,xmm9,xmm7
       vsubss    xmm7,xmm8,xmm7
       vmulss    xmm4,xmm9,xmm4
       vmulss    xmm3,xmm5,xmm3
       vsubss    xmm3,xmm4,xmm3
       vxorps    xmm4,xmm4,xmm4
       vmovss    xmm4,xmm4,xmm3
       vpslldq   xmm4,xmm4,4
       vmovss    xmm4,xmm4,xmm7
       vpslldq   xmm4,xmm4,4
       vmovss    xmm4,xmm4,xmm6
       vmovaps   xmm3,xmm4
       vdpps     xmm4,xmm3,xmm3,71
       vsqrtss   xmm4,xmm4,xmm4
       vbroadcastss xmm4,xmm4
       vdivps    xmm3,xmm3,xmm4
       vpslldq   xmm3,xmm3,4
       vpsrldq   xmm3,xmm3,4
       vmovaps   xmm4,xmm0
       vmovaps   xmm0,xmm3
M01_L05:
       vmovsd    qword ptr [rsp],xmm4
       vpshufd   xmm3,xmm4,2
       vmovss    dword ptr [rsp+8],xmm3
       xor       eax,eax
       mov       [rsp+0C],eax
       vmovsd    qword ptr [rsp+10],xmm2
       vpshufd   xmm3,xmm2,2
       vmovss    dword ptr [rsp+18],xmm3
       mov       [rsp+1C],eax
       vmovsd    qword ptr [rsp+20],xmm0
       vpshufd   xmm2,xmm0,2
       vmovss    dword ptr [rsp+28],xmm2
       mov       [rsp+2C],eax
       vmovsd    qword ptr [rsp+30],xmm1
       vpshufd   xmm0,xmm1,2
       vmovss    dword ptr [rsp+38],xmm0
       mov       dword ptr [rsp+3C],3F800000
       vmovdqu   ymm0,ymmword ptr [rsp]
       vmovdqu   ymmword ptr [rcx],ymm0
       vmovdqu   ymm0,ymmword ptr [rsp+20]
       vmovdqu   ymmword ptr [rcx+20],ymm0
       mov       rax,rcx
       vmovaps   xmm6,[rsp+70]
       vmovaps   xmm7,[rsp+60]
       vmovaps   xmm8,[rsp+50]
       vmovaps   xmm9,[rsp+40]
       add       rsp,88
       ret
; Total bytes of code 884

Compare Jit Disasm

; System.Numerics.Tests.Perf_Matrix4x4.CreateConstrainedBillboardBenchmark()
       push      rsi
       sub       rsp,80
       vzeroupper
       mov       rsi,rdx
       vxorps    xmm0,xmm0,xmm0
       vmovapd   [rsp+70],xmm0
       vmovupd   xmm0,[7FFA676C3360]
       vmovapd   [rsp+60],xmm0
       vmovupd   xmm0,[7FFA676C3370]
       vmovapd   [rsp+50],xmm0
       vmovupd   xmm0,[7FFA676C3380]
       vmovapd   [rsp+40],xmm0
       vmovupd   xmm0,[7FFA676C3360]
       vmovapd   [rsp+30],xmm0
       mov       rcx,rsi
       lea       rdx,[rsp+70]
       lea       r8,[rsp+60]
       lea       r9,[rsp+50]
       lea       rax,[rsp+40]
       mov       [rsp+20],rax
       lea       rax,[rsp+30]
       mov       [rsp+28],rax
       call      qword ptr [7FFA67E0B198]; System.Numerics.Matrix4x4.CreateConstrainedBillboard(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
       mov       rax,rsi
       add       rsp,80
       pop       rsi
       ret
; Total bytes of code 136
; System.Numerics.Matrix4x4.CreateConstrainedBillboard(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
       sub       rsp,88
       vzeroupper
       vmovaps   [rsp+70],xmm6
       vmovaps   [rsp+60],xmm7
       vmovaps   [rsp+50],xmm8
       vmovaps   [rsp+40],xmm9
       vmovss    xmm0,dword ptr [rdx+8]
       vmovsd    xmm1,qword ptr [rdx]
       vshufps   xmm1,xmm1,xmm0,44
       vmovss    xmm0,dword ptr [r8+8]
       vmovsd    xmm2,qword ptr [r8]
       vshufps   xmm2,xmm2,xmm0,44
       vsubps    xmm0,xmm1,xmm2
       vdpps     xmm2,xmm0,xmm0,71
       vmovss    xmm3,dword ptr [7FFA676C3780]
       vucomiss  xmm3,xmm2
       jbe       short M01_L00
       mov       rax,[rsp+0B0]
       vmovss    xmm0,dword ptr [rax+8]
       vmovsd    xmm2,qword ptr [rax]
       vshufps   xmm2,xmm2,xmm0,44
       vxorps    xmm0,xmm0,xmm0
       vsubps    xmm0,xmm0,xmm2
       jmp       short M01_L01
M01_L00:
       vsqrtss   xmm2,xmm2,xmm2
       vmovss    xmm3,dword ptr [7FFA676C3784]
       vdivss    xmm2,xmm3,xmm2
       vbroadcastss xmm2,xmm2
       vmulps    xmm0,xmm0,xmm2
M01_L01:
       vmovss    xmm2,dword ptr [r9+8]
       vmovsd    xmm3,qword ptr [r9]
       vshufps   xmm3,xmm3,xmm2,44
       vmovaps   xmm2,xmm3
       vdpps     xmm4,xmm3,xmm0,71
       vandps    xmm4,xmm4,[7FFA676C3790]
       vmovss    xmm5,dword ptr [7FFA676C37A0]
       vucomiss  xmm4,xmm5
       jbe       near ptr M01_L04
       mov       rax,[rsp+0B8]
       vmovss    xmm0,dword ptr [rax+8]
       vmovsd    xmm4,qword ptr [rax]
       vshufps   xmm4,xmm4,xmm0,44
       vdpps     xmm0,xmm3,xmm4,71
       vandps    xmm0,xmm0,[7FFA676C3790]
       vucomiss  xmm0,xmm5
       jbe       short M01_L03
       vunpckhps xmm0,xmm3,xmm3
       vandps    xmm0,xmm0,[7FFA676C3790]
       vucomiss  xmm0,xmm5
       ja        short M01_L02
       vmovupd   xmm4,[7FFA676C37B0]
       jmp       short M01_L03
M01_L02:
       vmovupd   xmm4,[7FFA676C37C0]
M01_L03:
       vmovshdup xmm0,xmm3
       vunpckhps xmm5,xmm4,xmm4
       vmulss    xmm6,xmm0,xmm5
       vunpckhps xmm7,xmm3,xmm3
       vmovshdup xmm8,xmm4
       vmulss    xmm9,xmm7,xmm8
       vsubss    xmm6,xmm6,xmm9
       vmulss    xmm9,xmm7,xmm4
       vmulss    xmm5,xmm3,xmm5
       vsubss    xmm5,xmm9,xmm5
       vinsertps xmm5,xmm6,xmm5,10
       vmulss    xmm6,xmm3,xmm8
       vmulss    xmm4,xmm0,xmm4
       vsubss    xmm4,xmm6,xmm4
       vinsertps xmm4,xmm5,xmm4,28
       vdpps     xmm5,xmm4,xmm4,71
       vsqrtss   xmm5,xmm5,xmm5
       vbroadcastss xmm5,xmm5
       vdivps    xmm4,xmm4,xmm5
       vpslldq   xmm4,xmm4,4
       vpsrldq   xmm4,xmm4,4
       vmovshdup xmm5,xmm4
       vmulss    xmm6,xmm5,xmm7
       vunpckhps xmm8,xmm4,xmm4
       vmulss    xmm9,xmm8,xmm0
       vsubss    xmm6,xmm6,xmm9
       vmulss    xmm8,xmm8,xmm3
       vmovaps   xmm9,xmm4
       vmulss    xmm7,xmm9,xmm7
       vsubss    xmm7,xmm8,xmm7
       vinsertps xmm6,xmm6,xmm7,10
       vmulss    xmm0,xmm9,xmm0
       vmulss    xmm3,xmm5,xmm3
       vsubss    xmm0,xmm0,xmm3
       vinsertps xmm0,xmm6,xmm0,28
       vdpps     xmm3,xmm0,xmm0,71
       vsqrtss   xmm3,xmm3,xmm3
       vbroadcastss xmm3,xmm3
       vdivps    xmm0,xmm0,xmm3
       vpslldq   xmm0,xmm0,4
       vpsrldq   xmm0,xmm0,4
       jmp       near ptr M01_L05
M01_L04:
       vmovshdup xmm4,xmm3
       vunpckhps xmm5,xmm0,xmm0
       vmulss    xmm6,xmm4,xmm5
       vunpckhps xmm7,xmm3,xmm3
       vmovshdup xmm8,xmm0
       vmulss    xmm9,xmm7,xmm8
       vsubss    xmm6,xmm6,xmm9
       vmulss    xmm9,xmm7,xmm0
       vmulss    xmm5,xmm3,xmm5
       vsubss    xmm5,xmm9,xmm5
       vinsertps xmm5,xmm6,xmm5,10
       vmulss    xmm6,xmm3,xmm8
       vmulss    xmm0,xmm4,xmm0
       vsubss    xmm0,xmm6,xmm0
       vinsertps xmm0,xmm5,xmm0,28
       vdpps     xmm5,xmm0,xmm0,71
       vsqrtss   xmm5,xmm5,xmm5
       vbroadcastss xmm5,xmm5
       vdivps    xmm0,xmm0,xmm5
       vpslldq   xmm0,xmm0,4
       vpsrldq   xmm0,xmm0,4
       vmovshdup xmm5,xmm0
       vmulss    xmm6,xmm5,xmm7
       vunpckhps xmm8,xmm0,xmm0
       vmulss    xmm9,xmm8,xmm4
       vsubss    xmm6,xmm6,xmm9
       vmulss    xmm8,xmm8,xmm3
       vmovaps   xmm9,xmm0
       vmulss    xmm7,xmm9,xmm7
       vsubss    xmm7,xmm8,xmm7
       vinsertps xmm6,xmm6,xmm7,10
       vmulss    xmm4,xmm9,xmm4
       vmulss    xmm3,xmm5,xmm3
       vsubss    xmm3,xmm4,xmm3
       vinsertps xmm3,xmm6,xmm3,28
       vdpps     xmm4,xmm3,xmm3,71
       vsqrtss   xmm4,xmm4,xmm4
       vbroadcastss xmm4,xmm4
       vdivps    xmm3,xmm3,xmm4
       vpslldq   xmm3,xmm3,4
       vpsrldq   xmm3,xmm3,4
       vmovaps   xmm4,xmm0
       vmovaps   xmm0,xmm3
M01_L05:
       vmovsd    qword ptr [rsp],xmm4
       vpshufd   xmm3,xmm4,2
       vmovss    dword ptr [rsp+8],xmm3
       xor       eax,eax
       mov       [rsp+0C],eax
       vmovsd    qword ptr [rsp+10],xmm2
       vpshufd   xmm3,xmm2,2
       vmovss    dword ptr [rsp+18],xmm3
       mov       [rsp+1C],eax
       vmovsd    qword ptr [rsp+20],xmm0
       vpshufd   xmm2,xmm0,2
       vmovss    dword ptr [rsp+28],xmm2
       mov       [rsp+2C],eax
       vmovsd    qword ptr [rsp+30],xmm1
       vpshufd   xmm0,xmm1,2
       vmovss    dword ptr [rsp+38],xmm0
       mov       dword ptr [rsp+3C],3F800000
       vmovdqu   ymm0,ymmword ptr [rsp]
       vmovdqu   ymmword ptr [rcx],ymm0
       vmovdqu   ymm0,ymmword ptr [rsp+20]
       vmovdqu   ymmword ptr [rcx+20],ymm0
       mov       rax,rcx
       vmovaps   xmm6,[rsp+70]
       vmovaps   xmm7,[rsp+60]
       vmovaps   xmm8,[rsp+50]
       vmovaps   xmm9,[rsp+40]
       add       rsp,88
       ret
; Total bytes of code 804

Docs

Profiling workflow for dotnet/runtime repository
Benchmarking workflow for dotnet/runtime repository

Metadata

Metadata

Assignees

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions