Skip to content

Commit

Permalink
unroll byref struct copies (dotnet#86820)
Browse files Browse the repository at this point in the history
If a struct contains a byref, then it is known to be on the stack/regs (not in the heap), so GC write barriers are not required.  This adds that case to lower*.cpp and attempts to make the code more similar.  I didn't actually factor them (especially with a few subtle differences such as the call to `getUnrollThreshold`).

This partially handles dotnet#80086.  It improves the code for common cases, but since the strategy is not always used, the correctness issue in it is not completely handled.  Next step is to apply the fix for that and see how bad the regressions are; this change will reduce the impact.

Example:

``` C#
static Span<int> Copy1(Span<int> s) => s;
```
``` asm
G_M44162_IG01:  ;; offset=0000H
       vzeroupper 
						;; size=3 bbWeight=1 PerfScore 1.00
G_M44162_IG02:  ;; offset=0003H
       vmovdqu  xmm0, xmmword ptr [rdx]
       vmovdqu  xmmword ptr [rcx], xmm0
						;; size=8 bbWeight=1 PerfScore 6.00
G_M44162_IG03:  ;; offset=000BH
       mov      rax, rcx
						;; size=3 bbWeight=1 PerfScore 0.25
G_M44162_IG04:  ;; offset=000EH
       ret      
						;; size=1 bbWeight=1 PerfScore 1.00

; Total bytes of code 15, prolog size 3, PerfScore 9.75, instruction count 5, allocated bytes for code 15 (MethodHash=4d5b537d) for method
```

Platform      | Overall | MinOpts | FullOpts
--------------|---------|---------|---------
linux arm64   |  -5,232 |  -3,260 |   -1,972
linux x64     |  -1,142 |    -750 |     -392
osx arm64     |  -5,732 |  -3,276 |   -2,456
windows arm64 |  -4,416 |  -2,580 |   -1,836
windows x64   |  -8,993 |  -5,772 |   -3,221
linux arm     | -13,518 |  -9,530 |   -3,988
windows x86   |       0 |       0 |        0
  • Loading branch information
markples authored Jul 18, 2023
1 parent 7358e1b commit 8724933
Show file tree
Hide file tree
Showing 8 changed files with 169 additions and 32 deletions.
19 changes: 19 additions & 0 deletions src/coreclr/jit/layout.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,25 @@ void ClassLayout::InitializeGCPtrs(Compiler* compiler)
INDEBUG(m_gcPtrsInitialized = true;)
}

//------------------------------------------------------------------------
// HasGCByRef: does the layout contain at least one GC ByRef
//
// Return value:
// true if at least one GC ByRef, false otherwise.
bool ClassLayout::HasGCByRef() const
{
unsigned slots = GetSlotCount();
for (unsigned i = 0; i < slots; i++)
{
if (IsGCByRef(i))
{
return true;
}
}

return false;
}

//------------------------------------------------------------------------
// AreCompatible: check if 2 layouts are the same for copying.
//
Expand Down
12 changes: 12 additions & 0 deletions src/coreclr/jit/layout.h
Original file line number Diff line number Diff line change
Expand Up @@ -184,11 +184,23 @@ class ClassLayout
return m_gcPtrCount != 0;
}

bool HasGCByRef() const;

bool IsGCPtr(unsigned slot) const
{
return GetGCPtr(slot) != TYPE_GC_NONE;
}

bool IsGCRef(unsigned slot) const
{
return GetGCPtr(slot) == TYPE_GC_REF;
}

bool IsGCByRef(unsigned slot) const
{
return GetGCPtr(slot) == TYPE_GC_BYREF;
}

var_types GetGCPtrType(unsigned slot) const
{
switch (GetGCPtr(slot))
Expand Down
22 changes: 15 additions & 7 deletions src/coreclr/jit/lowerarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -552,8 +552,6 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
GenTree* src = blkNode->Data();
unsigned size = blkNode->Size();

const bool isDstAddrLocal = dstAddr->OperIs(GT_LCL_ADDR);

if (blkNode->OperIsInitBlkOp())
{
if (src->OperIs(GT_INIT_VAL))
Expand Down Expand Up @@ -617,12 +615,22 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
comp->lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DoNotEnregisterReason::BlockOp));
}

bool doCpObj = !blkNode->OperIs(GT_STORE_DYN_BLK) && blkNode->GetLayout()->HasGCPtr();
unsigned copyBlockUnrollLimit = comp->getUnrollThreshold(Compiler::UnrollKind::Memcpy);
if (doCpObj && isDstAddrLocal && (size <= copyBlockUnrollLimit))
ClassLayout* layout = blkNode->GetLayout();
bool doCpObj = !blkNode->OperIs(GT_STORE_DYN_BLK) && layout->HasGCPtr();
unsigned copyBlockUnrollLimit = comp->getUnrollThreshold(Compiler::UnrollKind::Memcpy);

if (doCpObj && (size <= copyBlockUnrollLimit))
{
doCpObj = false;
blkNode->gtBlkOpGcUnsafe = true;
// No write barriers are needed on the stack.
// If the layout contains a byref, then we know it must live on the stack.
if (dstAddr->OperIs(GT_LCL_ADDR) || layout->HasGCByRef())
{
// If the size is small enough to unroll then we need to mark the block as non-interruptible
// to actually allow unrolling. The generated code does not report GC references loaded in the
// temporary register(s) used for copying.
doCpObj = false;
blkNode->gtBlkOpGcUnsafe = true;
}
}

if (doCpObj)
Expand Down
23 changes: 15 additions & 8 deletions src/coreclr/jit/lowerloongarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -343,15 +343,22 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
comp->lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DoNotEnregisterReason::BlockOp));
}

bool doCpObj = !blkNode->OperIs(GT_STORE_DYN_BLK) && blkNode->GetLayout()->HasGCPtr();
unsigned copyBlockUnrollLimit = comp->getUnrollThreshold(Compiler::UnrollKind::Memcpy);
if (doCpObj && dstAddr->OperIs(GT_LCL_ADDR) && (size <= copyBlockUnrollLimit))
ClassLayout* layout = blkNode->GetLayout();
bool doCpObj = !blkNode->OperIs(GT_STORE_DYN_BLK) && layout->HasGCPtr();
unsigned copyBlockUnrollLimit = comp->getUnrollThreshold(Compiler::UnrollKind::Memcpy);

if (doCpObj && (size <= copyBlockUnrollLimit))
{
// If the size is small enough to unroll then we need to mark the block as non-interruptible
// to actually allow unrolling. The generated code does not report GC references loaded in the
// temporary register(s) used for copying.
doCpObj = false;
blkNode->gtBlkOpGcUnsafe = true;
// No write barriers are needed on the stack.
// If the layout contains a byref, then we know it must live on the stack.
if (dstAddr->OperIs(GT_LCL_ADDR) || layout->HasGCByRef())
{
// If the size is small enough to unroll then we need to mark the block as non-interruptible
// to actually allow unrolling. The generated code does not report GC references loaded in the
// temporary register(s) used for copying.
doCpObj = false;
blkNode->gtBlkOpGcUnsafe = true;
}
}

// CopyObj or CopyBlk
Expand Down
21 changes: 14 additions & 7 deletions src/coreclr/jit/lowerriscv64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -297,14 +297,21 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
comp->lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DoNotEnregisterReason::BlockOp));
}

bool doCpObj = !blkNode->OperIs(GT_STORE_DYN_BLK) && blkNode->GetLayout()->HasGCPtr();
if (doCpObj && dstAddr->OperIs(GT_LCL_ADDR) && (size <= CPBLK_UNROLL_LIMIT))
ClassLayout* layout = blkNode->GetLayout();
bool doCpObj = !blkNode->OperIs(GT_STORE_DYN_BLK) && layout->HasGCPtr();

if (doCpObj && (size <= CPBLK_UNROLL_LIMIT))
{
// If the size is small enough to unroll then we need to mark the block as non-interruptible
// to actually allow unrolling. The generated code does not report GC references loaded in the
// temporary register(s) used for copying.
doCpObj = false;
blkNode->gtBlkOpGcUnsafe = true;
// No write barriers are needed on the stack.
// If the layout contains a byref, then we know it must live on the stack.
if (dstAddr->OperIs(GT_LCL_ADDR) || layout->HasGCByRef())
{
// If the size is small enough to unroll then we need to mark the block as non-interruptible
// to actually allow unrolling. The generated code does not report GC references loaded in the
// temporary register(s) used for copying.
doCpObj = false;
blkNode->gtBlkOpGcUnsafe = true;
}
}

// CopyObj or CopyBlk
Expand Down
25 changes: 15 additions & 10 deletions src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -394,18 +394,23 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
comp->lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DoNotEnregisterReason::StoreBlkSrc));
}

ClassLayout* layout = blkNode->GetLayout();
bool doCpObj = !blkNode->OperIs(GT_STORE_DYN_BLK) && layout->HasGCPtr();
ClassLayout* layout = blkNode->GetLayout();
bool doCpObj = !blkNode->OperIs(GT_STORE_DYN_BLK) && layout->HasGCPtr();
unsigned copyBlockUnrollLimit = comp->getUnrollThreshold(Compiler::UnrollKind::Memcpy, false);

#ifndef JIT32_GCENCODER
if (doCpObj && dstAddr->OperIs(GT_LCL_ADDR) &&
(size <= comp->getUnrollThreshold(Compiler::UnrollKind::Memcpy, false)))
{
// If the size is small enough to unroll then we need to mark the block as non-interruptible
// to actually allow unrolling. The generated code does not report GC references loaded in the
// temporary register(s) used for copying. This is not supported for the JIT32_GCENCODER.
doCpObj = false;
blkNode->gtBlkOpGcUnsafe = true;
if (doCpObj && (size <= copyBlockUnrollLimit))
{
// No write barriers are needed on the stack.
// If the layout contains a byref, then we know it must live on the stack.
if (dstAddr->OperIs(GT_LCL_ADDR) || layout->HasGCByRef())
{
// If the size is small enough to unroll then we need to mark the block as non-interruptible
// to actually allow unrolling. The generated code does not report GC references loaded in the
// temporary register(s) used for copying. This is not supported for the JIT32_GCENCODER.
doCpObj = false;
blkNode->gtBlkOpGcUnsafe = true;
}
}
#endif

Expand Down
70 changes: 70 additions & 0 deletions src/tests/JIT/opt/Misc/Runtime_80086/Runtime_80086.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using Xunit;

#pragma warning disable CS8500

namespace Runtime_80086
{
public static unsafe class Test
{
[MethodImpl(MethodImplOptions.NoInlining)]
public static Span<int> Marshal1(Span<int> a)
{
return MemoryMarshal.CreateSpan(ref MemoryMarshal.GetReference(a), a.Length);
}

[MethodImpl(MethodImplOptions.NoInlining)]
public static Span<int> Marshal2(Span<int>* a)
{
return MemoryMarshal.CreateSpan(ref MemoryMarshal.GetReference(*a), a->Length);
}

[MethodImpl(MethodImplOptions.NoInlining)]
static Span<int> Copy1(Span<int> s) => s;

[MethodImpl(MethodImplOptions.NoInlining)]
static Span<int> Copy2(Span<int> a)
{
ref Span<int> ra = ref a;
return ra;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static Span<int> Copy3(Span<int>* a)
{
return *a;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static Span<int> Copy4(scoped ref Span<int> a)
{
return a;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static Span<int> Copy5(ReadOnlySpan<int> a)
{
// Example is used to check code generation but shouldn't be used elsewhere
return *(Span<int>*)&a;
}

[Fact]
public static void TestEntryPoint()
{
Span<int> s = new int[1] { 13 };
s = Marshal1(s);
s = Marshal2(&s);
s = Copy1(s);
s = Copy2(s);
s = Copy3(&s);
s = Copy4(ref s);
s = Copy5(s);
Assert.Equal(13, s[0]);
}
}
}
9 changes: 9 additions & 0 deletions src/tests/JIT/opt/Misc/Runtime_80086/Runtime_80086.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<AllowUnsafeBlocks>True</AllowUnsafeBlocks>
<Optimize>True</Optimize>
</PropertyGroup>
<ItemGroup>
<Compile Include="$(MSBuildProjectName).cs" />
</ItemGroup>
</Project>

0 comments on commit 8724933

Please sign in to comment.