Closed
Description
Run Information
Architecture | x64 |
---|---|
OS | Windows 10.0.18362 |
Baseline | 8c58fc2347820ce48e09605d8adddb993df9ebb5 |
Compare | 1d15f2140f7eb30a976c66290491ec89cd628da0 |
Diff | Diff |
Improvements in System.Numerics.Tests.Perf_Matrix4x4
Benchmark | Baseline | Test | Test/Base | Test Quality | Edge Detector | Baseline IR | Compare IR | IR Ratio | Baseline ETL | Compare ETL |
---|---|---|---|---|---|---|---|---|---|---|
CreateLookAtBenchmark - Duration of single invocation | 31.56 ns | 29.75 ns | 0.94 | 0.14 | False | 149.18093115744372 | 138.9753481735635 | 0.9315892258836395 | Trace | Trace |
CreateBillboardBenchmark - Duration of single invocation | 23.04 ns | 20.53 ns | 0.89 | 0.11 | False | 116.78119720330676 | 106.01327812553706 | 0.907794068431893 | Trace | Trace |
CreateWorldBenchmark - Duration of single invocation | 23.81 ns | 20.35 ns | 0.85 | 0.09 | False | 119.7388710165708 | 110.01858998207872 | 0.9188210064787827 | Trace | Trace |
CreateConstrainedBillboardBenchmark - Duration of single invocation | 30.75 ns | 29.19 ns | 0.95 | 0.01 | False | Trace | Trace |
Repro
git clone https://github.com/dotnet/performance.git
py .\performance\scripts\benchmarks_ci.py -f net8.0 --filter 'System.Numerics.Tests.Perf_Matrix4x4*'
Payloads
Histogram
System.Numerics.Tests.Perf_Matrix4x4.CreateLookAtBenchmark
Description of detection logic
IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsRegressionBase: Marked as not a regression because the compare was not 5% greater than the baseline, or the value was too small.
IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsImprovementWindowed:Marked as improvement because 29.745970644173973 < 30.579009425098956.
IsChangePoint: Marked as a change because one of 12/28/2022 5:13:50 PM, 1/3/2023 6:29:42 AM falls between 12/23/2022 9:40:21 PM and 1/3/2023 6:29:42 AM.
IsImprovementStdDev: Marked as improvement because 8.03377540262315 (T) = (0 -29.786236112966343) / Math.Sqrt((0.6142171521206813 / (299)) + (1.2209179767904108 / (14))) is greater than 1.9676211333067681 = MathNet.Numerics.Distributions.StudentT.InvCDF(0, 1, (299) + (14) - 2, .975) and 0.07457292595103529 = (32.18647578857234 - 29.786236112966343) / 32.18647578857234 is greater than 0.05.
IsChangeEdgeDetector: Marked not as a regression because Edge Detector said so.
```### Baseline Jit Disasm
```assembly
; System.Numerics.Tests.Perf_Matrix4x4.CreateLookAtBenchmark()
push rsi
sub rsp,50
vzeroupper
mov rsi,rdx
vmovupd xmm0,[7FFB23F432E0]
vmovupd xmm1,[7FFB23F432F0]
vmovapd [rsp+40],xmm0
vxorps xmm0,xmm0,xmm0
vmovapd [rsp+30],xmm0
vmovapd [rsp+20],xmm1
mov rcx,rsi
lea rdx,[rsp+40]
lea r8,[rsp+30]
lea r9,[rsp+20]
call qword ptr [7FFB2468B1F8]; System.Numerics.Matrix4x4.CreateLookAt(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
mov rax,rsi
add rsp,50
pop rsi
ret
; Total bytes of code 82
; System.Numerics.Matrix4x4.CreateLookAt(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
push rdi
push rsi
sub rsp,0C8
vzeroupper
vmovaps [rsp+0B0],xmm6
vmovaps [rsp+0A0],xmm7
vmovaps [rsp+90],xmm8
vmovaps [rsp+80],xmm9
vmovaps [rsp+70],xmm10
vmovaps [rsp+60],xmm11
mov rdi,rcx
mov rsi,rdx
vmovss xmm0,dword ptr [rsi+8]
vmovsd xmm1,qword ptr [rsi]
vshufps xmm1,xmm1,xmm0,44
vmovss xmm0,dword ptr [r8+8]
vmovsd xmm2,qword ptr [r8]
vshufps xmm2,xmm2,xmm0,44
vsubps xmm0,xmm1,xmm2
vdpps xmm1,xmm0,xmm0,71
vsqrtss xmm1,xmm1,xmm1
vbroadcastss xmm1,xmm1
vdivps xmm0,xmm0,xmm1
vpslldq xmm0,xmm0,4
vpsrldq xmm6,xmm0,4
vmovss xmm0,dword ptr [r9+8]
vmovsd xmm1,qword ptr [r9]
vshufps xmm1,xmm1,xmm0,44
vmovshdup xmm0,xmm1
vunpckhps xmm7,xmm6,xmm6
vmulss xmm2,xmm0,xmm7
vunpckhps xmm3,xmm1,xmm1
vmovshdup xmm8,xmm6
vmulss xmm4,xmm3,xmm8
vsubss xmm2,xmm2,xmm4
vmovaps xmm9,xmm6
vmulss xmm3,xmm3,xmm9
vmulss xmm4,xmm1,xmm7
vsubss xmm3,xmm3,xmm4
vmulss xmm1,xmm1,xmm8
vmulss xmm0,xmm0,xmm9
vsubss xmm0,xmm1,xmm0
vxorps xmm1,xmm1,xmm1
vmovss xmm1,xmm1,xmm0
vpslldq xmm1,xmm1,4
vmovss xmm1,xmm1,xmm3
vpslldq xmm1,xmm1,4
vmovss xmm1,xmm1,xmm2
vmovaps xmm0,xmm1
vdpps xmm1,xmm0,xmm0,71
vsqrtss xmm1,xmm1,xmm1
vbroadcastss xmm1,xmm1
vdivps xmm0,xmm0,xmm1
vpslldq xmm0,xmm0,4
vpsrldq xmm10,xmm0,4
vunpckhps xmm0,xmm10,xmm10
vmulss xmm0,xmm8,xmm0
vmovshdup xmm1,xmm10
vmulss xmm1,xmm7,xmm1
vsubss xmm0,xmm0,xmm1
vmovaps xmm1,xmm10
vmulss xmm1,xmm7,xmm1
vunpckhps xmm2,xmm10,xmm10
vmulss xmm2,xmm9,xmm2
vsubss xmm1,xmm1,xmm2
vmovshdup xmm2,xmm10
vmulss xmm2,xmm9,xmm2
vmovaps xmm3,xmm10
vmulss xmm3,xmm8,xmm3
vsubss xmm2,xmm2,xmm3
vxorps xmm3,xmm3,xmm3
vmovss xmm3,xmm3,xmm2
vpslldq xmm3,xmm3,4
vmovss xmm3,xmm3,xmm1
vpslldq xmm3,xmm3,4
vmovss xmm3,xmm3,xmm0
vmovaps xmm11,xmm3
lea rcx,[rsp+20]
call qword ptr [7FFB2468B030]; System.Numerics.Matrix4x4.get_Identity()
vmovaps xmm0,xmm10
vmovss dword ptr [rsp+20],xmm0
vmovaps xmm0,xmm11
vmovss dword ptr [rsp+24],xmm0
vmovss dword ptr [rsp+28],xmm9
vmovshdup xmm0,xmm10
vmovss dword ptr [rsp+30],xmm0
vmovshdup xmm0,xmm11
vmovss dword ptr [rsp+34],xmm0
vmovss dword ptr [rsp+38],xmm8
vunpckhps xmm0,xmm10,xmm10
vmovss dword ptr [rsp+40],xmm0
vunpckhps xmm0,xmm11,xmm11
vmovss dword ptr [rsp+44],xmm0
vmovss dword ptr [rsp+48],xmm7
vmovss xmm0,dword ptr [rsi+8]
vmovsd xmm1,qword ptr [rsi]
vshufps xmm1,xmm1,xmm0,44
vdpps xmm0,xmm10,xmm1,71
vxorps xmm0,xmm0,[7FFB23F43640]
vmovss dword ptr [rsp+50],xmm0
vmovss xmm0,dword ptr [rsi+8]
vmovsd xmm1,qword ptr [rsi]
vshufps xmm1,xmm1,xmm0,44
vdpps xmm0,xmm11,xmm1,71
vxorps xmm0,xmm0,[7FFB23F43640]
vmovss dword ptr [rsp+54],xmm0
vmovss xmm0,dword ptr [rsi+8]
vmovsd xmm1,qword ptr [rsi]
vshufps xmm1,xmm1,xmm0,44
vdpps xmm0,xmm6,xmm1,71
vxorps xmm0,xmm0,[7FFB23F43640]
vmovss dword ptr [rsp+58],xmm0
vmovdqu ymm0,ymmword ptr [rsp+20]
vmovdqu ymmword ptr [rdi],ymm0
vmovdqu ymm0,ymmword ptr [rsp+40]
vmovdqu ymmword ptr [rdi+20],ymm0
mov rax,rdi
vmovaps xmm6,[rsp+0B0]
vmovaps xmm7,[rsp+0A0]
vmovaps xmm8,[rsp+90]
vmovaps xmm9,[rsp+80]
vmovaps xmm10,[rsp+70]
vmovaps xmm11,[rsp+60]
add rsp,0C8
pop rsi
pop rdi
ret
; Total bytes of code 639
Compare Jit Disasm
; System.Numerics.Tests.Perf_Matrix4x4.CreateLookAtBenchmark()
push rsi
sub rsp,50
vzeroupper
mov rsi,rdx
vmovupd xmm0,[7FFC51CB32A0]
vmovapd [rsp+40],xmm0
vxorps xmm0,xmm0,xmm0
vmovapd [rsp+30],xmm0
vmovupd xmm0,[7FFC51CB32B0]
vmovapd [rsp+20],xmm0
mov rcx,rsi
lea rdx,[rsp+40]
lea r8,[rsp+30]
lea r9,[rsp+20]
call qword ptr [7FFC523FB1F8]; System.Numerics.Matrix4x4.CreateLookAt(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
mov rax,rsi
add rsp,50
pop rsi
ret
; Total bytes of code 82
; System.Numerics.Matrix4x4.CreateLookAt(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
push rdi
push rsi
sub rsp,0C8
vzeroupper
vmovaps [rsp+0B0],xmm6
vmovaps [rsp+0A0],xmm7
vmovaps [rsp+90],xmm8
vmovaps [rsp+80],xmm9
vmovaps [rsp+70],xmm10
vmovaps [rsp+60],xmm11
mov rdi,rcx
mov rsi,rdx
vmovss xmm0,dword ptr [rsi+8]
vmovsd xmm1,qword ptr [rsi]
vshufps xmm1,xmm1,xmm0,44
vmovss xmm0,dword ptr [r8+8]
vmovsd xmm2,qword ptr [r8]
vshufps xmm2,xmm2,xmm0,44
vsubps xmm0,xmm1,xmm2
vdpps xmm1,xmm0,xmm0,71
vsqrtss xmm1,xmm1,xmm1
vbroadcastss xmm1,xmm1
vdivps xmm0,xmm0,xmm1
vpslldq xmm0,xmm0,4
vpsrldq xmm6,xmm0,4
vmovss xmm0,dword ptr [r9+8]
vmovsd xmm1,qword ptr [r9]
vshufps xmm1,xmm1,xmm0,44
vmovshdup xmm0,xmm1
vunpckhps xmm7,xmm6,xmm6
vmulss xmm2,xmm0,xmm7
vunpckhps xmm3,xmm1,xmm1
vmovshdup xmm8,xmm6
vmulss xmm4,xmm3,xmm8
vsubss xmm2,xmm2,xmm4
vmovaps xmm9,xmm6
vmulss xmm3,xmm3,xmm9
vmulss xmm4,xmm1,xmm7
vsubss xmm3,xmm3,xmm4
vinsertps xmm2,xmm2,xmm3,10
vmulss xmm1,xmm1,xmm8
vmulss xmm0,xmm0,xmm9
vsubss xmm0,xmm1,xmm0
vinsertps xmm0,xmm2,xmm0,28
vdpps xmm1,xmm0,xmm0,71
vsqrtss xmm1,xmm1,xmm1
vbroadcastss xmm1,xmm1
vdivps xmm0,xmm0,xmm1
vpslldq xmm0,xmm0,4
vpsrldq xmm10,xmm0,4
vunpckhps xmm0,xmm10,xmm10
vmulss xmm0,xmm8,xmm0
vmovshdup xmm1,xmm10
vmulss xmm1,xmm7,xmm1
vsubss xmm0,xmm0,xmm1
vmovaps xmm1,xmm10
vmulss xmm1,xmm7,xmm1
vunpckhps xmm2,xmm10,xmm10
vmulss xmm2,xmm9,xmm2
vsubss xmm1,xmm1,xmm2
vinsertps xmm0,xmm0,xmm1,10
vmovshdup xmm1,xmm10
vmulss xmm1,xmm9,xmm1
vmovaps xmm2,xmm10
vmulss xmm2,xmm8,xmm2
vsubss xmm1,xmm1,xmm2
vinsertps xmm11,xmm0,xmm1,28
lea rcx,[rsp+20]
call qword ptr [7FFC523FB030]; System.Numerics.Matrix4x4.get_Identity()
vmovaps xmm0,xmm10
vmovss dword ptr [rsp+20],xmm0
vmovaps xmm0,xmm11
vmovss dword ptr [rsp+24],xmm0
vmovss dword ptr [rsp+28],xmm9
vmovshdup xmm0,xmm10
vmovss dword ptr [rsp+30],xmm0
vmovshdup xmm0,xmm11
vmovss dword ptr [rsp+34],xmm0
vmovss dword ptr [rsp+38],xmm8
vunpckhps xmm0,xmm10,xmm10
vmovss dword ptr [rsp+40],xmm0
vunpckhps xmm0,xmm11,xmm11
vmovss dword ptr [rsp+44],xmm0
vmovss dword ptr [rsp+48],xmm7
vmovss xmm0,dword ptr [rsi+8]
vmovsd xmm1,qword ptr [rsi]
vshufps xmm1,xmm1,xmm0,44
vdpps xmm0,xmm10,xmm1,71
vxorps xmm0,xmm0,[7FFC51CB35E0]
vmovss dword ptr [rsp+50],xmm0
vmovss xmm0,dword ptr [rsi+8]
vmovsd xmm1,qword ptr [rsi]
vshufps xmm1,xmm1,xmm0,44
vdpps xmm0,xmm11,xmm1,71
vxorps xmm0,xmm0,[7FFC51CB35E0]
vmovss dword ptr [rsp+54],xmm0
vmovss xmm0,dword ptr [rsi+8]
vmovsd xmm1,qword ptr [rsi]
vshufps xmm1,xmm1,xmm0,44
vdpps xmm0,xmm6,xmm1,71
vxorps xmm0,xmm0,[7FFC51CB35E0]
vmovss dword ptr [rsp+58],xmm0
vmovdqu ymm0,ymmword ptr [rsp+20]
vmovdqu ymmword ptr [rdi],ymm0
vmovdqu ymm0,ymmword ptr [rsp+40]
vmovdqu ymmword ptr [rdi+20],ymm0
mov rax,rdi
vmovaps xmm6,[rsp+0B0]
vmovaps xmm7,[rsp+0A0]
vmovaps xmm8,[rsp+90]
vmovaps xmm9,[rsp+80]
vmovaps xmm10,[rsp+70]
vmovaps xmm11,[rsp+60]
add rsp,0C8
pop rsi
pop rdi
ret
; Total bytes of code 603
System.Numerics.Tests.Perf_Matrix4x4.CreateBillboardBenchmark
Description of detection logic
IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsRegressionBase: Marked as not a regression because the compare was not 5% greater than the baseline, or the value was too small.
IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsImprovementWindowed:Marked as improvement because 20.532278443711558 < 21.753609662882017.
IsChangePoint: Marked as a change because one of 12/28/2022 5:13:50 PM, 1/3/2023 6:29:42 AM falls between 12/23/2022 9:40:21 PM and 1/3/2023 6:29:42 AM.
IsImprovementStdDev: Marked as improvement because 2.1484834712218572 (T) = (0 -21.392544886476852) / Math.Sqrt((0.49130840670645715 / (299)) + (7.969156026645255 / (14))) is greater than 1.9676211333067681 = MathNet.Numerics.Distributions.StudentT.InvCDF(0, 1, (299) + (14) - 2, .975) and 0.07052986970187088 = (23.01584977197186 - 21.392544886476852) / 23.01584977197186 is greater than 0.05.
IsChangeEdgeDetector: Marked not as a regression because Edge Detector said so.
```### Baseline Jit Disasm
```assembly
; System.Numerics.Tests.Perf_Matrix4x4.CreateBillboardBenchmark()
push rsi
sub rsp,70
vzeroupper
mov rsi,rdx
vmovupd xmm0,[7FFE0B2C3360]
vmovupd xmm1,[7FFE0B2C3370]
vmovupd xmm2,[7FFE0B2C3380]
vxorps xmm3,xmm3,xmm3
vmovapd [rsp+60],xmm3
vmovapd [rsp+50],xmm0
vmovapd [rsp+40],xmm1
vmovapd [rsp+30],xmm2
mov rcx,rsi
lea rdx,[rsp+60]
lea r8,[rsp+50]
lea r9,[rsp+40]
lea rax,[rsp+30]
mov [rsp+20],rax
call qword ptr [7FFE0BA0B180]; System.Numerics.Matrix4x4.CreateBillboard(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
mov rax,rsi
add rsp,70
pop rsi
ret
; Total bytes of code 106
; System.Numerics.Matrix4x4.CreateBillboard(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
sub rsp,88
vzeroupper
vmovaps [rsp+70],xmm6
vmovaps [rsp+60],xmm7
vmovaps [rsp+50],xmm8
vmovaps [rsp+40],xmm9
vmovss xmm0,dword ptr [rdx+8]
vmovsd xmm1,qword ptr [rdx]
vshufps xmm1,xmm1,xmm0,44
vmovss xmm0,dword ptr [r8+8]
vmovsd xmm2,qword ptr [r8]
vshufps xmm2,xmm2,xmm0,44
vsubps xmm0,xmm1,xmm2
vdpps xmm2,xmm0,xmm0,71
vmovss xmm3,dword ptr [7FFE0B2C3600]
vucomiss xmm3,xmm2
jbe short M01_L00
mov rax,[rsp+0B0]
vmovss xmm0,dword ptr [rax+8]
vmovsd xmm2,qword ptr [rax]
vshufps xmm2,xmm2,xmm0,44
vxorps xmm0,xmm0,xmm0
vsubps xmm0,xmm0,xmm2
jmp short M01_L01
M01_L00:
vsqrtss xmm2,xmm2,xmm2
vmovss xmm3,dword ptr [7FFE0B2C3604]
vdivss xmm2,xmm3,xmm2
vbroadcastss xmm2,xmm2
vmulps xmm0,xmm0,xmm2
M01_L01:
vmovss xmm2,dword ptr [r9+8]
vmovsd xmm3,qword ptr [r9]
vshufps xmm3,xmm3,xmm2,44
vmovshdup xmm2,xmm3
vunpckhps xmm4,xmm0,xmm0
vmulss xmm5,xmm2,xmm4
vunpckhps xmm6,xmm3,xmm3
vmovshdup xmm7,xmm0
vmulss xmm8,xmm6,xmm7
vsubss xmm5,xmm5,xmm8
vmovaps xmm8,xmm0
vmulss xmm6,xmm6,xmm8
vmulss xmm9,xmm3,xmm4
vsubss xmm6,xmm6,xmm9
vmulss xmm3,xmm3,xmm7
vmulss xmm2,xmm2,xmm8
vsubss xmm2,xmm3,xmm2
vxorps xmm3,xmm3,xmm3
vmovss xmm3,xmm3,xmm2
vpslldq xmm3,xmm3,4
vmovss xmm3,xmm3,xmm6
vpslldq xmm3,xmm3,4
vmovss xmm3,xmm3,xmm5
vmovaps xmm2,xmm3
vdpps xmm3,xmm2,xmm2,71
vsqrtss xmm3,xmm3,xmm3
vbroadcastss xmm3,xmm3
vdivps xmm2,xmm2,xmm3
vpslldq xmm2,xmm2,4
vpsrldq xmm2,xmm2,4
vunpckhps xmm3,xmm2,xmm2
vmulss xmm5,xmm7,xmm3
vmovshdup xmm6,xmm2
vmulss xmm9,xmm4,xmm6
vsubss xmm5,xmm5,xmm9
vmovaps xmm9,xmm2
vmulss xmm4,xmm4,xmm9
vmulss xmm3,xmm8,xmm3
vsubss xmm3,xmm4,xmm3
vmulss xmm4,xmm8,xmm6
vmulss xmm6,xmm7,xmm9
vsubss xmm4,xmm4,xmm6
vxorps xmm6,xmm6,xmm6
vmovss xmm6,xmm6,xmm4
vpslldq xmm6,xmm6,4
vmovss xmm6,xmm6,xmm3
vpslldq xmm6,xmm6,4
vmovss xmm6,xmm6,xmm5
vmovaps xmm3,xmm6
vmovsd qword ptr [rsp],xmm2
vpshufd xmm4,xmm2,2
vmovss dword ptr [rsp+8],xmm4
xor eax,eax
mov [rsp+0C],eax
vmovsd qword ptr [rsp+10],xmm3
vpshufd xmm2,xmm3,2
vmovss dword ptr [rsp+18],xmm2
mov [rsp+1C],eax
vmovsd qword ptr [rsp+20],xmm0
vpshufd xmm2,xmm0,2
vmovss dword ptr [rsp+28],xmm2
mov [rsp+2C],eax
vmovsd qword ptr [rsp+30],xmm1
vpshufd xmm0,xmm1,2
vmovss dword ptr [rsp+38],xmm0
mov dword ptr [rsp+3C],3F800000
vmovdqu ymm0,ymmword ptr [rsp]
vmovdqu ymmword ptr [rcx],ymm0
vmovdqu ymm0,ymmword ptr [rsp+20]
vmovdqu ymmword ptr [rcx+20],ymm0
mov rax,rcx
vmovaps xmm6,[rsp+70]
vmovaps xmm7,[rsp+60]
vmovaps xmm8,[rsp+50]
vmovaps xmm9,[rsp+40]
add rsp,88
ret
; Total bytes of code 505
Compare Jit Disasm
; System.Numerics.Tests.Perf_Matrix4x4.CreateBillboardBenchmark()
push rsi
sub rsp,70
vzeroupper
mov rsi,rdx
vxorps xmm0,xmm0,xmm0
vmovapd [rsp+60],xmm0
vmovupd xmm0,[7FFE2B6F3320]
vmovapd [rsp+50],xmm0
vmovupd xmm0,[7FFE2B6F3330]
vmovapd [rsp+40],xmm0
vmovupd xmm0,[7FFE2B6F3320]
vmovapd [rsp+30],xmm0
mov rcx,rsi
lea rdx,[rsp+60]
lea r8,[rsp+50]
lea r9,[rsp+40]
lea rax,[rsp+30]
mov [rsp+20],rax
call qword ptr [7FFE2BE3B180]; System.Numerics.Matrix4x4.CreateBillboard(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
mov rax,rsi
add rsp,70
pop rsi
ret
; Total bytes of code 106
; System.Numerics.Matrix4x4.CreateBillboard(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
sub rsp,88
vzeroupper
vmovaps [rsp+70],xmm6
vmovaps [rsp+60],xmm7
vmovaps [rsp+50],xmm8
vmovaps [rsp+40],xmm9
vmovss xmm0,dword ptr [rdx+8]
vmovsd xmm1,qword ptr [rdx]
vshufps xmm1,xmm1,xmm0,44
vmovss xmm0,dword ptr [r8+8]
vmovsd xmm2,qword ptr [r8]
vshufps xmm2,xmm2,xmm0,44
vsubps xmm0,xmm1,xmm2
vdpps xmm2,xmm0,xmm0,71
vmovss xmm3,dword ptr [7FFE2B6F3588]
vucomiss xmm3,xmm2
jbe short M01_L00
mov rax,[rsp+0B0]
vmovss xmm0,dword ptr [rax+8]
vmovsd xmm2,qword ptr [rax]
vshufps xmm2,xmm2,xmm0,44
vxorps xmm0,xmm0,xmm0
vsubps xmm0,xmm0,xmm2
jmp short M01_L01
M01_L00:
vsqrtss xmm2,xmm2,xmm2
vmovss xmm3,dword ptr [7FFE2B6F358C]
vdivss xmm2,xmm3,xmm2
vbroadcastss xmm2,xmm2
vmulps xmm0,xmm0,xmm2
M01_L01:
vmovss xmm2,dword ptr [r9+8]
vmovsd xmm3,qword ptr [r9]
vshufps xmm3,xmm3,xmm2,44
vmovshdup xmm2,xmm3
vunpckhps xmm4,xmm0,xmm0
vmulss xmm5,xmm2,xmm4
vunpckhps xmm6,xmm3,xmm3
vmovshdup xmm7,xmm0
vmulss xmm8,xmm6,xmm7
vsubss xmm5,xmm5,xmm8
vmovaps xmm8,xmm0
vmulss xmm6,xmm6,xmm8
vmulss xmm9,xmm3,xmm4
vsubss xmm6,xmm6,xmm9
vinsertps xmm5,xmm5,xmm6,10
vmulss xmm3,xmm3,xmm7
vmulss xmm2,xmm2,xmm8
vsubss xmm2,xmm3,xmm2
vinsertps xmm2,xmm5,xmm2,28
vdpps xmm3,xmm2,xmm2,71
vsqrtss xmm3,xmm3,xmm3
vbroadcastss xmm3,xmm3
vdivps xmm2,xmm2,xmm3
vpslldq xmm2,xmm2,4
vpsrldq xmm2,xmm2,4
vunpckhps xmm3,xmm2,xmm2
vmulss xmm5,xmm7,xmm3
vmovshdup xmm6,xmm2
vmulss xmm9,xmm4,xmm6
vsubss xmm5,xmm5,xmm9
vmovaps xmm9,xmm2
vmulss xmm4,xmm4,xmm9
vmulss xmm3,xmm8,xmm3
vsubss xmm3,xmm4,xmm3
vinsertps xmm3,xmm5,xmm3,10
vmulss xmm4,xmm8,xmm6
vmulss xmm5,xmm7,xmm9
vsubss xmm4,xmm4,xmm5
vinsertps xmm3,xmm3,xmm4,28
vmovsd qword ptr [rsp],xmm2
vpshufd xmm4,xmm2,2
vmovss dword ptr [rsp+8],xmm4
xor eax,eax
mov [rsp+0C],eax
vmovsd qword ptr [rsp+10],xmm3
vpshufd xmm2,xmm3,2
vmovss dword ptr [rsp+18],xmm2
mov [rsp+1C],eax
vmovsd qword ptr [rsp+20],xmm0
vpshufd xmm2,xmm0,2
vmovss dword ptr [rsp+28],xmm2
mov [rsp+2C],eax
vmovsd qword ptr [rsp+30],xmm1
vpshufd xmm0,xmm1,2
vmovss dword ptr [rsp+38],xmm0
mov dword ptr [rsp+3C],3F800000
vmovdqu ymm0,ymmword ptr [rsp]
vmovdqu ymmword ptr [rcx],ymm0
vmovdqu ymm0,ymmword ptr [rsp+20]
vmovdqu ymmword ptr [rcx+20],ymm0
mov rax,rcx
vmovaps xmm6,[rsp+70]
vmovaps xmm7,[rsp+60]
vmovaps xmm8,[rsp+50]
vmovaps xmm9,[rsp+40]
add rsp,88
ret
; Total bytes of code 469
System.Numerics.Tests.Perf_Matrix4x4.CreateWorldBenchmark
Description of detection logic
IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsRegressionBase: Marked as not a regression because the compare was not 5% greater than the baseline, or the value was too small.
IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsImprovementWindowed:Marked as improvement because 20.347633209236008 < 22.398269254881626.
IsChangePoint: Marked as a change because one of 12/28/2022 5:13:50 PM, 1/3/2023 6:29:42 AM falls between 12/23/2022 9:40:21 PM and 1/3/2023 6:29:42 AM.
IsImprovementStdDev: Marked as improvement because 24.12215320316142 (T) = (0 -20.914799837469435) / Math.Sqrt((0.4263786384621453 / (299)) + (0.15510467445082196 / (14))) is greater than 1.9676211333067681 = MathNet.Numerics.Distributions.StudentT.InvCDF(0, 1, (299) + (14) - 2, .975) and 0.11424016351104417 = (23.612269348735833 - 20.914799837469435) / 23.612269348735833 is greater than 0.05.
IsChangeEdgeDetector: Marked not as a regression because Edge Detector said so.
```### Baseline Jit Disasm
```assembly
; System.Numerics.Tests.Perf_Matrix4x4.CreateWorldBenchmark()
push rsi
sub rsp,50
vzeroupper
mov rsi,rdx
vmovupd xmm0,[7FFAA94532E0]
vmovupd xmm1,[7FFAA94532F0]
vxorps xmm2,xmm2,xmm2
vmovapd [rsp+40],xmm2
vmovapd [rsp+30],xmm0
vmovapd [rsp+20],xmm1
mov rcx,rsi
lea rdx,[rsp+40]
lea r8,[rsp+30]
lea r9,[rsp+20]
call qword ptr [7FFAA9B9B408]; System.Numerics.Matrix4x4.CreateWorld(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
mov rax,rsi
add rsp,50
pop rsi
ret
; Total bytes of code 82
; System.Numerics.Matrix4x4.CreateWorld(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
push rdi
push rsi
sub rsp,98
vzeroupper
vmovaps [rsp+80],xmm6
vmovaps [rsp+70],xmm7
vmovaps [rsp+60],xmm8
mov rdi,rcx
mov rsi,rdx
vmovss xmm0,dword ptr [r8+8]
vmovsd xmm1,qword ptr [r8]
vshufps xmm1,xmm1,xmm0,44
vxorps xmm0,xmm0,xmm0
vsubps xmm0,xmm0,xmm1
vdpps xmm1,xmm0,xmm0,71
vsqrtss xmm1,xmm1,xmm1
vbroadcastss xmm1,xmm1
vdivps xmm0,xmm0,xmm1
vpslldq xmm0,xmm0,4
vpsrldq xmm6,xmm0,4
vmovss xmm0,dword ptr [r9+8]
vmovsd xmm1,qword ptr [r9]
vshufps xmm1,xmm1,xmm0,44
vmovshdup xmm0,xmm1
vunpckhps xmm2,xmm6,xmm6
vmulss xmm3,xmm0,xmm2
vunpckhps xmm4,xmm1,xmm1
vmovshdup xmm5,xmm6
vmulss xmm7,xmm4,xmm5
vsubss xmm3,xmm3,xmm7
vmovaps xmm7,xmm6
vmulss xmm4,xmm4,xmm7
vmulss xmm8,xmm1,xmm2
vsubss xmm4,xmm4,xmm8
vmulss xmm1,xmm1,xmm5
vmulss xmm0,xmm0,xmm7
vsubss xmm0,xmm1,xmm0
vxorps xmm1,xmm1,xmm1
vmovss xmm1,xmm1,xmm0
vpslldq xmm1,xmm1,4
vmovss xmm1,xmm1,xmm4
vpslldq xmm1,xmm1,4
vmovss xmm1,xmm1,xmm3
vmovaps xmm0,xmm1
vdpps xmm1,xmm0,xmm0,71
vsqrtss xmm1,xmm1,xmm1
vbroadcastss xmm1,xmm1
vdivps xmm0,xmm0,xmm1
vpslldq xmm0,xmm0,4
vpsrldq xmm8,xmm0,4
vunpckhps xmm0,xmm8,xmm8
vmulss xmm1,xmm5,xmm0
vmovshdup xmm3,xmm8
vmulss xmm4,xmm2,xmm3
vsubss xmm1,xmm1,xmm4
vmovaps xmm4,xmm8
vmulss xmm2,xmm2,xmm4
vmulss xmm0,xmm7,xmm0
vsubss xmm0,xmm2,xmm0
vmulss xmm2,xmm7,xmm3
vmulss xmm3,xmm5,xmm4
vsubss xmm2,xmm2,xmm3
vxorps xmm3,xmm3,xmm3
vmovss xmm3,xmm3,xmm2
vpslldq xmm3,xmm3,4
vmovss xmm3,xmm3,xmm0
vpslldq xmm3,xmm3,4
vmovss xmm3,xmm3,xmm1
vmovaps xmm7,xmm3
lea rcx,[rsp+20]
call qword ptr [7FFAA9B9B030]; System.Numerics.Matrix4x4.get_Identity()
vmovsd qword ptr [rsp+20],xmm8
vpshufd xmm0,xmm8,2
vmovss dword ptr [rsp+28],xmm0
vmovsd qword ptr [rsp+30],xmm7
vpshufd xmm0,xmm7,2
vmovss dword ptr [rsp+38],xmm0
vmovsd qword ptr [rsp+40],xmm6
vpshufd xmm0,xmm6,2
vmovss dword ptr [rsp+48],xmm0
vmovss xmm0,dword ptr [rsi+8]
vmovsd xmm1,qword ptr [rsi]
vshufps xmm1,xmm1,xmm0,44
vmovsd qword ptr [rsp+50],xmm1
vpshufd xmm0,xmm1,2
vmovss dword ptr [rsp+58],xmm0
vmovdqu ymm0,ymmword ptr [rsp+20]
vmovdqu ymmword ptr [rdi],ymm0
vmovdqu ymm0,ymmword ptr [rsp+40]
vmovdqu ymmword ptr [rdi+20],ymm0
mov rax,rdi
vmovaps xmm6,[rsp+80]
vmovaps xmm7,[rsp+70]
vmovaps xmm8,[rsp+60]
add rsp,98
pop rsi
pop rdi
ret
; Total bytes of code 454
Compare Jit Disasm
; System.Numerics.Tests.Perf_Matrix4x4.CreateWorldBenchmark()
push rsi
sub rsp,50
vzeroupper
mov rsi,rdx
vxorps xmm0,xmm0,xmm0
vmovapd [rsp+40],xmm0
vmovupd xmm0,[7FF93C0332A0]
vmovapd [rsp+30],xmm0
vmovupd xmm0,[7FF93C0332B0]
vmovapd [rsp+20],xmm0
mov rcx,rsi
lea rdx,[rsp+40]
lea r8,[rsp+30]
lea r9,[rsp+20]
call qword ptr [7FF93C77B408]; System.Numerics.Matrix4x4.CreateWorld(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
mov rax,rsi
add rsp,50
pop rsi
ret
; Total bytes of code 82
; System.Numerics.Matrix4x4.CreateWorld(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
push rdi
push rsi
sub rsp,98
vzeroupper
vmovaps [rsp+80],xmm6
vmovaps [rsp+70],xmm7
vmovaps [rsp+60],xmm8
mov rdi,rcx
mov rsi,rdx
vmovss xmm0,dword ptr [r8+8]
vmovsd xmm1,qword ptr [r8]
vshufps xmm1,xmm1,xmm0,44
vxorps xmm0,xmm0,xmm0
vsubps xmm0,xmm0,xmm1
vdpps xmm1,xmm0,xmm0,71
vsqrtss xmm1,xmm1,xmm1
vbroadcastss xmm1,xmm1
vdivps xmm0,xmm0,xmm1
vpslldq xmm0,xmm0,4
vpsrldq xmm6,xmm0,4
vmovss xmm0,dword ptr [r9+8]
vmovsd xmm1,qword ptr [r9]
vshufps xmm1,xmm1,xmm0,44
vmovshdup xmm0,xmm1
vunpckhps xmm2,xmm6,xmm6
vmulss xmm3,xmm0,xmm2
vunpckhps xmm4,xmm1,xmm1
vmovshdup xmm5,xmm6
vmulss xmm7,xmm4,xmm5
vsubss xmm3,xmm3,xmm7
vmovaps xmm7,xmm6
vmulss xmm4,xmm4,xmm7
vmulss xmm8,xmm1,xmm2
vsubss xmm4,xmm4,xmm8
vinsertps xmm3,xmm3,xmm4,10
vmulss xmm1,xmm1,xmm5
vmulss xmm0,xmm0,xmm7
vsubss xmm0,xmm1,xmm0
vinsertps xmm0,xmm3,xmm0,28
vdpps xmm1,xmm0,xmm0,71
vsqrtss xmm1,xmm1,xmm1
vbroadcastss xmm1,xmm1
vdivps xmm0,xmm0,xmm1
vpslldq xmm0,xmm0,4
vpsrldq xmm8,xmm0,4
vunpckhps xmm0,xmm8,xmm8
vmulss xmm1,xmm5,xmm0
vmovshdup xmm3,xmm8
vmulss xmm4,xmm2,xmm3
vsubss xmm1,xmm1,xmm4
vmovaps xmm4,xmm8
vmulss xmm2,xmm2,xmm4
vmulss xmm0,xmm7,xmm0
vsubss xmm0,xmm2,xmm0
vinsertps xmm0,xmm1,xmm0,10
vmulss xmm1,xmm7,xmm3
vmulss xmm2,xmm5,xmm4
vsubss xmm1,xmm1,xmm2
vinsertps xmm7,xmm0,xmm1,28
lea rcx,[rsp+20]
call qword ptr [7FF93C77B030]; System.Numerics.Matrix4x4.get_Identity()
vmovsd qword ptr [rsp+20],xmm8
vpshufd xmm0,xmm8,2
vmovss dword ptr [rsp+28],xmm0
vmovsd qword ptr [rsp+30],xmm7
vpshufd xmm0,xmm7,2
vmovss dword ptr [rsp+38],xmm0
vmovsd qword ptr [rsp+40],xmm6
vpshufd xmm0,xmm6,2
vmovss dword ptr [rsp+48],xmm0
vmovss xmm0,dword ptr [rsi+8]
vmovsd xmm1,qword ptr [rsi]
vshufps xmm1,xmm1,xmm0,44
vmovsd qword ptr [rsp+50],xmm1
vpshufd xmm0,xmm1,2
vmovss dword ptr [rsp+58],xmm0
vmovdqu ymm0,ymmword ptr [rsp+20]
vmovdqu ymmword ptr [rdi],ymm0
vmovdqu ymm0,ymmword ptr [rsp+40]
vmovdqu ymmword ptr [rdi+20],ymm0
mov rax,rdi
vmovaps xmm6,[rsp+80]
vmovaps xmm7,[rsp+70]
vmovaps xmm8,[rsp+60]
add rsp,98
pop rsi
pop rdi
ret
; Total bytes of code 418
System.Numerics.Tests.Perf_Matrix4x4.CreateConstrainedBillboardBenchmark
Description of detection logic
IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsRegressionBase: Marked as not a regression because the compare was not 5% greater than the baseline, or the value was too small.
IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsImprovementWindowed:Marked as improvement because 29.194004052835457 < 29.32247219066154.
IsChangePoint: Marked as a change because one of 10/31/2022 10:23:25 PM, 12/28/2022 5:13:50 PM, 1/3/2023 6:29:42 AM falls between 12/23/2022 9:40:21 PM and 1/3/2023 6:29:42 AM.
IsImprovementStdDev: Marked as improvement because 14.512061347326483 (T) = (0 -29.236423889734663) / Math.Sqrt((3.323298949304289 / (299)) + (0.06780201896632884 / (14))) is greater than 1.9676211333067681 = MathNet.Numerics.Distributions.StudentT.InvCDF(0, 1, (299) + (14) - 2, .975) and 0.05900355398482968 = (31.069643263310823 - 29.236423889734663) / 31.069643263310823 is greater than 0.05.
IsChangeEdgeDetector: Marked not as a regression because Edge Detector said so.
```### Baseline Jit Disasm
```assembly
; System.Numerics.Tests.Perf_Matrix4x4.CreateConstrainedBillboardBenchmark()
push rsi
sub rsp,80
vzeroupper
mov rsi,rdx
vmovupd xmm0,[7FFD3CDD33A0]
vmovupd xmm1,[7FFD3CDD33B0]
vmovupd xmm2,[7FFD3CDD33C0]
vmovupd xmm3,[7FFD3CDD33D0]
vxorps xmm4,xmm4,xmm4
vmovapd [rsp+70],xmm4
vmovapd [rsp+60],xmm0
vmovapd [rsp+50],xmm1
vmovapd [rsp+40],xmm2
vmovapd [rsp+30],xmm3
mov rcx,rsi
lea rdx,[rsp+70]
lea r8,[rsp+60]
lea r9,[rsp+50]
lea rax,[rsp+40]
mov [rsp+20],rax
lea rax,[rsp+30]
mov [rsp+28],rax
call qword ptr [7FFD3D51B198]; System.Numerics.Matrix4x4.CreateConstrainedBillboard(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
mov rax,rsi
add rsp,80
pop rsi
ret
; Total bytes of code 136
; System.Numerics.Matrix4x4.CreateConstrainedBillboard(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
sub rsp,88
vzeroupper
vmovaps [rsp+70],xmm6
vmovaps [rsp+60],xmm7
vmovaps [rsp+50],xmm8
vmovaps [rsp+40],xmm9
vmovss xmm0,dword ptr [rdx+8]
vmovsd xmm1,qword ptr [rdx]
vshufps xmm1,xmm1,xmm0,44
vmovss xmm0,dword ptr [r8+8]
vmovsd xmm2,qword ptr [r8]
vshufps xmm2,xmm2,xmm0,44
vsubps xmm0,xmm1,xmm2
vdpps xmm2,xmm0,xmm0,71
vmovss xmm3,dword ptr [7FFD3CDD3840]
vucomiss xmm3,xmm2
jbe short M01_L00
mov rax,[rsp+0B0]
vmovss xmm0,dword ptr [rax+8]
vmovsd xmm2,qword ptr [rax]
vshufps xmm2,xmm2,xmm0,44
vxorps xmm0,xmm0,xmm0
vsubps xmm0,xmm0,xmm2
jmp short M01_L01
M01_L00:
vsqrtss xmm2,xmm2,xmm2
vmovss xmm3,dword ptr [7FFD3CDD3830]
vdivss xmm2,xmm3,xmm2
vbroadcastss xmm2,xmm2
vmulps xmm0,xmm0,xmm2
M01_L01:
vmovss xmm2,dword ptr [r9+8]
vmovsd xmm3,qword ptr [r9]
vshufps xmm3,xmm3,xmm2,44
vmovaps xmm2,xmm3
vdpps xmm4,xmm3,xmm0,71
vandps xmm4,xmm4,[7FFD3CDD3850]
vmovss xmm5,dword ptr [7FFD3CDD3860]
vucomiss xmm4,xmm5
jbe near ptr M01_L04
mov rax,[rsp+0B8]
vmovss xmm0,dword ptr [rax+8]
vmovsd xmm4,qword ptr [rax]
vshufps xmm4,xmm4,xmm0,44
vdpps xmm0,xmm3,xmm4,71
vandps xmm0,xmm0,[7FFD3CDD3850]
vucomiss xmm0,xmm5
jbe short M01_L03
vunpckhps xmm0,xmm3,xmm3
vandps xmm0,xmm0,[7FFD3CDD3850]
vucomiss xmm0,xmm5
ja short M01_L02
vmovupd xmm4,[7FFD3CDD3820]
jmp short M01_L03
M01_L02:
vmovupd xmm4,[7FFD3CDD3830]
M01_L03:
vmovshdup xmm0,xmm3
vunpckhps xmm5,xmm4,xmm4
vmulss xmm6,xmm0,xmm5
vunpckhps xmm7,xmm3,xmm3
vmovshdup xmm8,xmm4
vmulss xmm9,xmm7,xmm8
vsubss xmm6,xmm6,xmm9
vmulss xmm9,xmm7,xmm4
vmulss xmm5,xmm3,xmm5
vsubss xmm5,xmm9,xmm5
vmulss xmm8,xmm3,xmm8
vmulss xmm4,xmm0,xmm4
vsubss xmm4,xmm8,xmm4
vxorps xmm8,xmm8,xmm8
vmovss xmm8,xmm8,xmm4
vpslldq xmm8,xmm8,4
vmovss xmm8,xmm8,xmm5
vpslldq xmm8,xmm8,4
vmovss xmm8,xmm8,xmm6
vmovaps xmm4,xmm8
vdpps xmm5,xmm4,xmm4,71
vsqrtss xmm5,xmm5,xmm5
vbroadcastss xmm5,xmm5
vdivps xmm4,xmm4,xmm5
vpslldq xmm4,xmm4,4
vpsrldq xmm4,xmm4,4
vmovshdup xmm5,xmm4
vmulss xmm6,xmm5,xmm7
vunpckhps xmm8,xmm4,xmm4
vmulss xmm9,xmm8,xmm0
vsubss xmm6,xmm6,xmm9
vmulss xmm8,xmm8,xmm3
vmovaps xmm9,xmm4
vmulss xmm7,xmm9,xmm7
vsubss xmm7,xmm8,xmm7
vmulss xmm0,xmm9,xmm0
vmulss xmm3,xmm5,xmm3
vsubss xmm0,xmm0,xmm3
vxorps xmm3,xmm3,xmm3
vmovss xmm3,xmm3,xmm0
vpslldq xmm3,xmm3,4
vmovss xmm3,xmm3,xmm7
vpslldq xmm3,xmm3,4
vmovss xmm3,xmm3,xmm6
vmovaps xmm0,xmm3
vdpps xmm3,xmm0,xmm0,71
vsqrtss xmm3,xmm3,xmm3
vbroadcastss xmm3,xmm3
vdivps xmm0,xmm0,xmm3
vpslldq xmm0,xmm0,4
vpsrldq xmm0,xmm0,4
jmp near ptr M01_L05
M01_L04:
vmovshdup xmm4,xmm3
vunpckhps xmm5,xmm0,xmm0
vmulss xmm6,xmm4,xmm5
vunpckhps xmm7,xmm3,xmm3
vmovshdup xmm8,xmm0
vmulss xmm9,xmm7,xmm8
vsubss xmm6,xmm6,xmm9
vmulss xmm9,xmm7,xmm0
vmulss xmm5,xmm3,xmm5
vsubss xmm5,xmm9,xmm5
vmulss xmm8,xmm3,xmm8
vmulss xmm0,xmm4,xmm0
vsubss xmm0,xmm8,xmm0
vxorps xmm8,xmm8,xmm8
vmovss xmm8,xmm8,xmm0
vpslldq xmm8,xmm8,4
vmovss xmm8,xmm8,xmm5
vpslldq xmm8,xmm8,4
vmovss xmm8,xmm8,xmm6
vmovaps xmm0,xmm8
vdpps xmm5,xmm0,xmm0,71
vsqrtss xmm5,xmm5,xmm5
vbroadcastss xmm5,xmm5
vdivps xmm0,xmm0,xmm5
vpslldq xmm0,xmm0,4
vpsrldq xmm0,xmm0,4
vmovshdup xmm5,xmm0
vmulss xmm6,xmm5,xmm7
vunpckhps xmm8,xmm0,xmm0
vmulss xmm9,xmm8,xmm4
vsubss xmm6,xmm6,xmm9
vmulss xmm8,xmm8,xmm3
vmovaps xmm9,xmm0
vmulss xmm7,xmm9,xmm7
vsubss xmm7,xmm8,xmm7
vmulss xmm4,xmm9,xmm4
vmulss xmm3,xmm5,xmm3
vsubss xmm3,xmm4,xmm3
vxorps xmm4,xmm4,xmm4
vmovss xmm4,xmm4,xmm3
vpslldq xmm4,xmm4,4
vmovss xmm4,xmm4,xmm7
vpslldq xmm4,xmm4,4
vmovss xmm4,xmm4,xmm6
vmovaps xmm3,xmm4
vdpps xmm4,xmm3,xmm3,71
vsqrtss xmm4,xmm4,xmm4
vbroadcastss xmm4,xmm4
vdivps xmm3,xmm3,xmm4
vpslldq xmm3,xmm3,4
vpsrldq xmm3,xmm3,4
vmovaps xmm4,xmm0
vmovaps xmm0,xmm3
M01_L05:
vmovsd qword ptr [rsp],xmm4
vpshufd xmm3,xmm4,2
vmovss dword ptr [rsp+8],xmm3
xor eax,eax
mov [rsp+0C],eax
vmovsd qword ptr [rsp+10],xmm2
vpshufd xmm3,xmm2,2
vmovss dword ptr [rsp+18],xmm3
mov [rsp+1C],eax
vmovsd qword ptr [rsp+20],xmm0
vpshufd xmm2,xmm0,2
vmovss dword ptr [rsp+28],xmm2
mov [rsp+2C],eax
vmovsd qword ptr [rsp+30],xmm1
vpshufd xmm0,xmm1,2
vmovss dword ptr [rsp+38],xmm0
mov dword ptr [rsp+3C],3F800000
vmovdqu ymm0,ymmword ptr [rsp]
vmovdqu ymmword ptr [rcx],ymm0
vmovdqu ymm0,ymmword ptr [rsp+20]
vmovdqu ymmword ptr [rcx+20],ymm0
mov rax,rcx
vmovaps xmm6,[rsp+70]
vmovaps xmm7,[rsp+60]
vmovaps xmm8,[rsp+50]
vmovaps xmm9,[rsp+40]
add rsp,88
ret
; Total bytes of code 884
Compare Jit Disasm
; System.Numerics.Tests.Perf_Matrix4x4.CreateConstrainedBillboardBenchmark()
push rsi
sub rsp,80
vzeroupper
mov rsi,rdx
vxorps xmm0,xmm0,xmm0
vmovapd [rsp+70],xmm0
vmovupd xmm0,[7FFA676C3360]
vmovapd [rsp+60],xmm0
vmovupd xmm0,[7FFA676C3370]
vmovapd [rsp+50],xmm0
vmovupd xmm0,[7FFA676C3380]
vmovapd [rsp+40],xmm0
vmovupd xmm0,[7FFA676C3360]
vmovapd [rsp+30],xmm0
mov rcx,rsi
lea rdx,[rsp+70]
lea r8,[rsp+60]
lea r9,[rsp+50]
lea rax,[rsp+40]
mov [rsp+20],rax
lea rax,[rsp+30]
mov [rsp+28],rax
call qword ptr [7FFA67E0B198]; System.Numerics.Matrix4x4.CreateConstrainedBillboard(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
mov rax,rsi
add rsp,80
pop rsi
ret
; Total bytes of code 136
; System.Numerics.Matrix4x4.CreateConstrainedBillboard(System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3, System.Numerics.Vector3)
sub rsp,88
vzeroupper
vmovaps [rsp+70],xmm6
vmovaps [rsp+60],xmm7
vmovaps [rsp+50],xmm8
vmovaps [rsp+40],xmm9
vmovss xmm0,dword ptr [rdx+8]
vmovsd xmm1,qword ptr [rdx]
vshufps xmm1,xmm1,xmm0,44
vmovss xmm0,dword ptr [r8+8]
vmovsd xmm2,qword ptr [r8]
vshufps xmm2,xmm2,xmm0,44
vsubps xmm0,xmm1,xmm2
vdpps xmm2,xmm0,xmm0,71
vmovss xmm3,dword ptr [7FFA676C3780]
vucomiss xmm3,xmm2
jbe short M01_L00
mov rax,[rsp+0B0]
vmovss xmm0,dword ptr [rax+8]
vmovsd xmm2,qword ptr [rax]
vshufps xmm2,xmm2,xmm0,44
vxorps xmm0,xmm0,xmm0
vsubps xmm0,xmm0,xmm2
jmp short M01_L01
M01_L00:
vsqrtss xmm2,xmm2,xmm2
vmovss xmm3,dword ptr [7FFA676C3784]
vdivss xmm2,xmm3,xmm2
vbroadcastss xmm2,xmm2
vmulps xmm0,xmm0,xmm2
M01_L01:
vmovss xmm2,dword ptr [r9+8]
vmovsd xmm3,qword ptr [r9]
vshufps xmm3,xmm3,xmm2,44
vmovaps xmm2,xmm3
vdpps xmm4,xmm3,xmm0,71
vandps xmm4,xmm4,[7FFA676C3790]
vmovss xmm5,dword ptr [7FFA676C37A0]
vucomiss xmm4,xmm5
jbe near ptr M01_L04
mov rax,[rsp+0B8]
vmovss xmm0,dword ptr [rax+8]
vmovsd xmm4,qword ptr [rax]
vshufps xmm4,xmm4,xmm0,44
vdpps xmm0,xmm3,xmm4,71
vandps xmm0,xmm0,[7FFA676C3790]
vucomiss xmm0,xmm5
jbe short M01_L03
vunpckhps xmm0,xmm3,xmm3
vandps xmm0,xmm0,[7FFA676C3790]
vucomiss xmm0,xmm5
ja short M01_L02
vmovupd xmm4,[7FFA676C37B0]
jmp short M01_L03
M01_L02:
vmovupd xmm4,[7FFA676C37C0]
M01_L03:
vmovshdup xmm0,xmm3
vunpckhps xmm5,xmm4,xmm4
vmulss xmm6,xmm0,xmm5
vunpckhps xmm7,xmm3,xmm3
vmovshdup xmm8,xmm4
vmulss xmm9,xmm7,xmm8
vsubss xmm6,xmm6,xmm9
vmulss xmm9,xmm7,xmm4
vmulss xmm5,xmm3,xmm5
vsubss xmm5,xmm9,xmm5
vinsertps xmm5,xmm6,xmm5,10
vmulss xmm6,xmm3,xmm8
vmulss xmm4,xmm0,xmm4
vsubss xmm4,xmm6,xmm4
vinsertps xmm4,xmm5,xmm4,28
vdpps xmm5,xmm4,xmm4,71
vsqrtss xmm5,xmm5,xmm5
vbroadcastss xmm5,xmm5
vdivps xmm4,xmm4,xmm5
vpslldq xmm4,xmm4,4
vpsrldq xmm4,xmm4,4
vmovshdup xmm5,xmm4
vmulss xmm6,xmm5,xmm7
vunpckhps xmm8,xmm4,xmm4
vmulss xmm9,xmm8,xmm0
vsubss xmm6,xmm6,xmm9
vmulss xmm8,xmm8,xmm3
vmovaps xmm9,xmm4
vmulss xmm7,xmm9,xmm7
vsubss xmm7,xmm8,xmm7
vinsertps xmm6,xmm6,xmm7,10
vmulss xmm0,xmm9,xmm0
vmulss xmm3,xmm5,xmm3
vsubss xmm0,xmm0,xmm3
vinsertps xmm0,xmm6,xmm0,28
vdpps xmm3,xmm0,xmm0,71
vsqrtss xmm3,xmm3,xmm3
vbroadcastss xmm3,xmm3
vdivps xmm0,xmm0,xmm3
vpslldq xmm0,xmm0,4
vpsrldq xmm0,xmm0,4
jmp near ptr M01_L05
M01_L04:
vmovshdup xmm4,xmm3
vunpckhps xmm5,xmm0,xmm0
vmulss xmm6,xmm4,xmm5
vunpckhps xmm7,xmm3,xmm3
vmovshdup xmm8,xmm0
vmulss xmm9,xmm7,xmm8
vsubss xmm6,xmm6,xmm9
vmulss xmm9,xmm7,xmm0
vmulss xmm5,xmm3,xmm5
vsubss xmm5,xmm9,xmm5
vinsertps xmm5,xmm6,xmm5,10
vmulss xmm6,xmm3,xmm8
vmulss xmm0,xmm4,xmm0
vsubss xmm0,xmm6,xmm0
vinsertps xmm0,xmm5,xmm0,28
vdpps xmm5,xmm0,xmm0,71
vsqrtss xmm5,xmm5,xmm5
vbroadcastss xmm5,xmm5
vdivps xmm0,xmm0,xmm5
vpslldq xmm0,xmm0,4
vpsrldq xmm0,xmm0,4
vmovshdup xmm5,xmm0
vmulss xmm6,xmm5,xmm7
vunpckhps xmm8,xmm0,xmm0
vmulss xmm9,xmm8,xmm4
vsubss xmm6,xmm6,xmm9
vmulss xmm8,xmm8,xmm3
vmovaps xmm9,xmm0
vmulss xmm7,xmm9,xmm7
vsubss xmm7,xmm8,xmm7
vinsertps xmm6,xmm6,xmm7,10
vmulss xmm4,xmm9,xmm4
vmulss xmm3,xmm5,xmm3
vsubss xmm3,xmm4,xmm3
vinsertps xmm3,xmm6,xmm3,28
vdpps xmm4,xmm3,xmm3,71
vsqrtss xmm4,xmm4,xmm4
vbroadcastss xmm4,xmm4
vdivps xmm3,xmm3,xmm4
vpslldq xmm3,xmm3,4
vpsrldq xmm3,xmm3,4
vmovaps xmm4,xmm0
vmovaps xmm0,xmm3
M01_L05:
vmovsd qword ptr [rsp],xmm4
vpshufd xmm3,xmm4,2
vmovss dword ptr [rsp+8],xmm3
xor eax,eax
mov [rsp+0C],eax
vmovsd qword ptr [rsp+10],xmm2
vpshufd xmm3,xmm2,2
vmovss dword ptr [rsp+18],xmm3
mov [rsp+1C],eax
vmovsd qword ptr [rsp+20],xmm0
vpshufd xmm2,xmm0,2
vmovss dword ptr [rsp+28],xmm2
mov [rsp+2C],eax
vmovsd qword ptr [rsp+30],xmm1
vpshufd xmm0,xmm1,2
vmovss dword ptr [rsp+38],xmm0
mov dword ptr [rsp+3C],3F800000
vmovdqu ymm0,ymmword ptr [rsp]
vmovdqu ymmword ptr [rcx],ymm0
vmovdqu ymm0,ymmword ptr [rsp+20]
vmovdqu ymmword ptr [rcx+20],ymm0
mov rax,rcx
vmovaps xmm6,[rsp+70]
vmovaps xmm7,[rsp+60]
vmovaps xmm8,[rsp+50]
vmovaps xmm9,[rsp+40]
add rsp,88
ret
; Total bytes of code 804
Docs
Profiling workflow for dotnet/runtime repository
Benchmarking workflow for dotnet/runtime repository