Closed
Description
As I created this example code I realized that it happens in similar circumstances as #32414, but I don't know if they have the same cause or not.
Given the following code:
SharpLab link
public readonly ref struct Wrapper
{
private readonly ReadOnlySpan<byte> _buffer;
public int Length => _buffer.Length;
public byte this[int i] => _buffer[i];
public byte GetUnsafe(int i) => Unsafe.Add(ref MemoryMarshal.GetReference(_buffer), i);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int SumFirst4(Wrapper p)
{
return p.GetUnsafe(0) + p.GetUnsafe(1) + p.GetUnsafe(2) + p.GetUnsafe(3);
}
public static int SumFirst4Caller(Wrapper p) => SumFirst4(p);
SumFirst4
produces this. There's some weird stuff going on with register allocation, but nothing too bad.
G_M29037_IG01:
;; bbWeight=1 PerfScore 0.00
G_M29037_IG02:
mov rax, bword ptr [rcx]
mov rdx, rax
movzx rdx, byte ptr [rdx]
mov rcx, rax
movzx rcx, byte ptr [rcx+1]
add edx, ecx
mov rcx, rax
movzx rcx, byte ptr [rcx+2]
add edx, ecx
movzx rax, byte ptr [rax+3]
add eax, edx
;; bbWeight=1 PerfScore 11.50
G_M29037_IG03:
ret
;; bbWeight=1 PerfScore 1.00
; Total bytes of code 34
SumFirst4Caller
produces this, copying the span to the stack and re-dereferencing the pointer for every element accessed.
G_M45217_IG01:
sub rsp, 24
vzeroupper
xor rax, rax
mov qword ptr [rsp+08H], rax
;; bbWeight=1 PerfScore 2.50
G_M45217_IG02:
vmovdqu xmm0, xmmword ptr [rcx]
vmovdqu xmmword ptr [rsp+08H], xmm0
;; bbWeight=1 PerfScore 3.00
G_M45217_IG03:
lea rax, bword ptr [rsp+08H]
mov rax, bword ptr [rax]
movzx rax, byte ptr [rax]
lea rdx, bword ptr [rsp+08H]
mov rdx, bword ptr [rdx]
movzx rdx, byte ptr [rdx+1]
add eax, edx
lea rdx, bword ptr [rsp+08H]
mov rdx, bword ptr [rdx]
movzx rdx, byte ptr [rdx+2]
add eax, edx
lea rdx, bword ptr [rsp+08H]
mov rdx, bword ptr [rdx]
movzx rdx, byte ptr [rdx+3]
add eax, edx
;; bbWeight=1 PerfScore 18.75
G_M45217_IG04:
add rsp, 24
ret
;; bbWeight=1 PerfScore 1.25
; Total bytes of code 82
For reference, adding this extension and swapping Wrapper
for ReadOnlySpan<byte>
gives the following code for both SumFirst4
and SumFirst4Caller
.
public static byte GetUnsafe(this ReadOnlySpan<byte> span, int i)
{
return Unsafe.Add(ref MemoryMarshal.GetReference(span), i);
}
G_M57413_IG01:
;; bbWeight=1 PerfScore 0.00
G_M57413_IG02:
mov rax, bword ptr [rcx]
movzx rdx, byte ptr [rax]
movzx rcx, byte ptr [rax+1]
add edx, ecx
movzx rcx, byte ptr [rax+2]
add edx, ecx
movzx rax, byte ptr [rax+3]
add eax, edx
;; bbWeight=1 PerfScore 10.75
G_M57413_IG03:
ret
;; bbWeight=1 PerfScore 1.00
; Total bytes of code 25
category:cq
theme:structs
skill-level:expert
cost:large
impact:large