Skip to content

[NativeAOT] implement Interlocked.ReadMemoryBarrier intrinsic #86842

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
May 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -208,25 +208,7 @@ public static long Read(ref long location) =>
CompareExchange(ref location, 0, 0);
#endregion

#region MemoryBarrier
/// <summary>
/// Synchronizes memory access as follows:
/// The processor that executes the current thread cannot reorder instructions in such a way that memory accesses before
/// the call to <see cref="MemoryBarrier"/> execute after memory accesses that follow the call to <see cref="MemoryBarrier"/>.
/// </summary>
[Intrinsic]
[MethodImpl(MethodImplOptions.InternalCall)]
public static extern void MemoryBarrier();

/// <summary>
/// Synchronizes memory access as follows:
/// The processor that executes the current thread cannot reorder instructions in such a way that memory reads before
/// the call to <see cref="ReadMemoryBarrier"/> execute after memory accesses that follow the call to <see cref="ReadMemoryBarrier"/>.
/// </summary>
[Intrinsic]
[MethodImpl(MethodImplOptions.InternalCall)]
internal static extern void ReadMemoryBarrier();

#region MemoryBarrierProcessWide
[LibraryImport(RuntimeHelpers.QCall, EntryPoint = "Interlocked_MemoryBarrierProcessWide")]
private static partial void _MemoryBarrierProcessWide();

Expand Down
8 changes: 0 additions & 8 deletions src/coreclr/nativeaot/Runtime/portable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -462,14 +462,6 @@ COOP_PINVOKE_HELPER(int64_t, RhpLockCmpXchg64, (int64_t * location, int64_t valu
return PalInterlockedCompareExchange64(location, value, comparand);
}

#endif // USE_PORTABLE_HELPERS

COOP_PINVOKE_HELPER(void, RhpMemoryBarrier, ())
{
PalMemoryBarrier();
}

#if defined(USE_PORTABLE_HELPERS)
EXTERN_C NATIVEAOT_API void* __cdecl RhAllocateThunksMapping()
{
return NULL;
Expand Down
13 changes: 8 additions & 5 deletions src/coreclr/nativeaot/Runtime/windows/PalRedhawkInline.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

#if defined(HOST_ARM64)
#include <arm64intr.h>
#endif

// Implementation of Redhawk PAL inline functions

EXTERN_C long __cdecl _InterlockedIncrement(long volatile *);
Expand Down Expand Up @@ -121,7 +125,6 @@ EXTERN_C void __faststorefence();
#pragma intrinsic(__faststorefence)
#define PalMemoryBarrier() __faststorefence()


#elif defined(HOST_ARM)

EXTERN_C void __yield(void);
Expand All @@ -130,11 +133,11 @@ EXTERN_C void __dmb(unsigned int _Type);
#pragma intrinsic(__dmb)
FORCEINLINE void PalYieldProcessor()
{
__dmb(0xA /* _ARM_BARRIER_ISHST */);
__dmb(_ARM_BARRIER_ISHST);
__yield();
}

#define PalMemoryBarrier() __dmb(0xF /* _ARM_BARRIER_SY */)
#define PalMemoryBarrier() __dmb(_ARM_BARRIER_ISH)

#elif defined(HOST_ARM64)

Expand All @@ -144,11 +147,11 @@ EXTERN_C void __dmb(unsigned int _Type);
#pragma intrinsic(__dmb)
FORCEINLINE void PalYieldProcessor()
{
__dmb(0xA /* _ARM64_BARRIER_ISHST */);
__dmb(_ARM64_BARRIER_ISHST);
__yield();
}

#define PalMemoryBarrier() __dmb(0xF /* _ARM64_BARRIER_SY */)
#define PalMemoryBarrier() __dmb(_ARM64_BARRIER_ISH)

#else
#error Unsupported architecture
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -791,10 +791,6 @@ internal static unsafe partial void RhEventPipeInternal_WriteEventData(
[RuntimeImport(RuntimeLibrary, "RhpCheckedXchg")]
internal static extern object InterlockedExchange([NotNullIfNotNull(nameof(value))] ref object? location1, object? value);

[MethodImplAttribute(MethodImplOptions.InternalCall)]
[RuntimeImport(RuntimeLibrary, "RhpMemoryBarrier")]
internal static extern void MemoryBarrier();

[Intrinsic]
[MethodImplAttribute(MethodImplOptions.InternalCall)]
[RuntimeImport(RuntimeLibrary, "acos")]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -196,14 +196,6 @@ private static long ExchangeAdd(ref long location1, long value)

#endregion

#region MemoryBarrier
[Intrinsic]
public static void MemoryBarrier()
{
RuntimeImports.MemoryBarrier();
}
#endregion

#region Read
public static long Read(ref long location)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,6 @@ internal static unsafe Array RhNewArray(EETypePtr pEEType, int length)
[RuntimeImport(RuntimeLibrary, "RhpLockCmpXchg64")]
internal static extern long InterlockedCompareExchange(ref long location1, long value, long comparand);

[MethodImplAttribute(MethodImplOptions.InternalCall)]
[RuntimeImport(RuntimeLibrary, "RhpMemoryBarrier")]
internal static extern void MemoryBarrier();

// Moves memory from smem to dmem. Size must be a positive value.
// This copy uses an intrinsic to be safe for copying arbitrary bits of
// heap memory
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,6 @@ public static long CompareExchange(ref long location1, long value, long comparan
}

[Intrinsic]
public static void MemoryBarrier()
{
RuntimeImports.MemoryBarrier();
}
public static void MemoryBarrier() => MemoryBarrier();
}
}
18 changes: 0 additions & 18 deletions src/coreclr/vm/comutilnative.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1637,24 +1637,6 @@ FCIMPL2_IV(INT64,COMInterlocked::ExchangeAdd64, INT64 *location, INT64 value)
}
FCIMPLEND

FCIMPL0(void, COMInterlocked::FCMemoryBarrier)
{
FCALL_CONTRACT;

MemoryBarrier();
FC_GC_POLL();
}
FCIMPLEND

FCIMPL0(void, COMInterlocked::FCMemoryBarrierLoad)
{
FCALL_CONTRACT;

VolatileLoadBarrier();
FC_GC_POLL();
}
FCIMPLEND

#include <optdefault.h>

extern "C" void QCALLTYPE Interlocked_MemoryBarrierProcessWide()
Expand Down
3 changes: 0 additions & 3 deletions src/coreclr/vm/comutilnative.h
Original file line number Diff line number Diff line change
Expand Up @@ -235,9 +235,6 @@ class COMInterlocked
static FCDECL3(LPVOID, CompareExchangeObject, LPVOID* location, LPVOID value, LPVOID comparand);
static FCDECL2(INT32, ExchangeAdd32, INT32 *location, INT32 value);
static FCDECL2_IV(INT64, ExchangeAdd64, INT64 *location, INT64 value);

static FCDECL0(void, FCMemoryBarrier);
static FCDECL0(void, FCMemoryBarrierLoad);
};

extern "C" void QCALLTYPE Interlocked_MemoryBarrierProcessWide();
Expand Down
2 changes: 0 additions & 2 deletions src/coreclr/vm/ecalllist.h
Original file line number Diff line number Diff line change
Expand Up @@ -519,8 +519,6 @@ FCFuncStart(gInterlockedFuncs)
FCFuncElementSig("CompareExchange", &gsig_SM_RefObj_Obj_Obj_RetObj, COMInterlocked::CompareExchangeObject)
FCFuncElementSig("ExchangeAdd", &gsig_SM_RefInt_Int_RetInt, COMInterlocked::ExchangeAdd32)
FCFuncElementSig("ExchangeAdd", &gsig_SM_RefLong_Long_RetLong, COMInterlocked::ExchangeAdd64)
FCFuncElement("MemoryBarrier", COMInterlocked::FCMemoryBarrier)
FCFuncElement("ReadMemoryBarrier", COMInterlocked::FCMemoryBarrierLoad)
FCFuncEnd()

FCFuncStart(gJitInfoFuncs)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,32 +152,16 @@ internal static CastResult TryGet(nuint source, nuint target)
// we must read in this order: version -> [entry parts] -> version
// if version is odd or changes, the entry is inconsistent and thus ignored
uint version = Volatile.Read(ref pEntry._version);

#if CORECLR
// in CoreCLR we do ordinary reads of the entry parts and
// Interlocked.ReadMemoryBarrier() before reading the version
nuint entrySource = pEntry._source;
#else
// must read this before reading the version again
nuint entrySource = Volatile.Read(ref pEntry._source);
#endif

// mask the lower version bit to make it even.
// This way we can check if version is odd or changing in just one compare.
version &= unchecked((uint)~1);

if (entrySource == source)
{

#if CORECLR
// in CoreCLR we do ordinary reads of the entry parts and
// Interlocked.ReadMemoryBarrier() before reading the version
nuint entryTargetAndResult = pEntry._targetAndResult;
#else
// must read this before reading the version again
nuint entryTargetAndResult = Volatile.Read(ref pEntry._targetAndResult);
#endif

// target never has its lower bit set.
// a matching entryTargetAndResult would the have same bits, except for the lowest one, which is the result.
entryTargetAndResult ^= target;
Expand All @@ -189,10 +173,7 @@ internal static CastResult TryGet(nuint source, nuint target)
// - use acquires for both _source and _targetAndResults or
// - issue a load barrier before reading _version
// benchmarks on available hardware (Jan 2020) show that use of a read barrier is cheaper.

#if CORECLR
Interlocked.ReadMemoryBarrier();
#endif

if (version != pEntry._version)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -325,5 +325,23 @@ public static long Or(ref long location1, long value)
public static ulong Or(ref ulong location1, ulong value) =>
(ulong)Or(ref Unsafe.As<ulong, long>(ref location1), (long)value);
#endregion

#region MemoryBarrier
/// <summary>
/// Synchronizes memory access as follows:
/// The processor that executes the current thread cannot reorder instructions in such a way that memory accesses before
/// the call to <see cref="MemoryBarrier"/> execute after memory accesses that follow the call to <see cref="MemoryBarrier"/>.
/// </summary>
[Intrinsic]
public static void MemoryBarrier() => MemoryBarrier();

/// <summary>
/// Synchronizes memory access as follows:
/// The processor that executes the current thread cannot reorder instructions in such a way that memory reads before
/// the call to <see cref="ReadMemoryBarrier"/> execute after memory accesses that follow the call to <see cref="ReadMemoryBarrier"/>.
/// </summary>
[Intrinsic]
internal static void ReadMemoryBarrier() => ReadMemoryBarrier();
#endregion
}
}
19 changes: 18 additions & 1 deletion src/libraries/System.Threading/tests/InterlockedTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

namespace System.Threading.Tests
{
public class InterlockedTests
public unsafe class InterlockedTests
{
[Fact]
public void InterlockedAdd_Int32()
Expand Down Expand Up @@ -603,6 +603,23 @@ public void InterlockedAddAndRead_Multithreaded_Int64()
Assert.Equal(ThreadCount * IterationCount * Increment, Interlocked.Read(ref value));
}

[MethodImpl(MethodImplOptions.NoInlining)]
private static Action MemoryBarrierDelegate() => Interlocked.MemoryBarrier;

[MethodImpl(MethodImplOptions.NoInlining)]
private static delegate* <void> MemoryBarrierPointer() => &Interlocked.MemoryBarrier;

[Fact()]
public void MemoryBarrierIntrinsic()
{
// Interlocked.MemoryBarrier is a self-referring intrinsic
// we should be able to call it through a delegate.
MemoryBarrierDelegate()();

// through a method pointer
MemoryBarrierPointer()();
}

[ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
public void MemoryBarrierProcessWide()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
<TestRuntime>true</TestRuntime>
<IncludeRemoteExecutor>true</IncludeRemoteExecutor>
<TargetFramework>$(NetCoreAppCurrent)</TargetFramework>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
</PropertyGroup>
<ItemGroup>
<Compile Include="AsyncLocalTests.cs" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,11 +145,6 @@ public static T Exchange<T>([NotNullIfNotNull(nameof(value))] ref T location1, T
[MethodImplAttribute(MethodImplOptions.InternalCall)]
public static extern long Add(ref long location1, long value);

[Intrinsic]
public static void MemoryBarrier()
{
}

[MethodImplAttribute(MethodImplOptions.InternalCall)]
public static extern void MemoryBarrierProcessWide();
}
Expand Down