Skip to content

[Mono] Fix c11 ARM64 atomics to issue full memory barrier. #115573

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
May 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1393,6 +1393,32 @@ static async IAsyncEnumerable<int> Iterate()
await t;
}

[ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
public async Task GitHubIssue_114262_Async()
{
var options = new ParallelOptions
{
MaxDegreeOfParallelism = 5,
CancellationToken = new CancellationTokenSource().Token
};

var range = Enumerable.Range(1, 1000);

for (int i = 0; i < 100; i++)
{
await Parallel.ForEachAsync(range, options, async (data, token) =>
{
for (int i = 0; i < 5; i++)
{
await Task.Yield();
var buffer = new byte[10_000];
await Task.Run(() => {var _ = buffer[0];} );
await Task.Yield();
}
});
}
}

private static async IAsyncEnumerable<int> EnumerableRangeAsync(int start, int count, bool yield = true)
{
for (int i = start; i < start + count; i++)
Expand Down
81 changes: 64 additions & 17 deletions src/mono/mono/utils/atomic.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,11 +95,25 @@ Apple targets have historically being problematic, xcode 4.6 would miscompile th

#include <stdatomic.h>

#if defined(HOST_ARM64)
// C11 atomics on ARM64 offers a weaker version of sequential consistent, not expected by mono atomics operations.
// C11 seq_cst on ARM64 corresponds to acquire/release semantics, but mono expects these functions to emit a full memory
// barrier preventing any kind of reordering around the atomic operation. GCC atomics on ARM64 had similar limitations,
// see comments on GCC atomics below and mono injected full memory barriers around GCC atomic functions to mitigate this.
// Since mono GCC atomics implementation ended up even stronger (full memory barrier before/after), the C11 atomics
// implementation is still a little weaker, but should correspond to the exact same semantics as implemented by JIT
// compiler for sequential consistent atomic load/store/add/exchange/cas op codes on ARM64.
#define C11_MEMORY_ORDER_SEQ_CST() atomic_thread_fence (memory_order_seq_cst)
#else
#define C11_MEMORY_ORDER_SEQ_CST()
#endif

static inline guint8
mono_atomic_cas_u8 (volatile guint8 *dest, guint8 exch, guint8 comp)
{
g_static_assert (sizeof (atomic_char) == sizeof (*dest) && ATOMIC_CHAR_LOCK_FREE == 2);
(void)atomic_compare_exchange_strong ((volatile atomic_char *)dest, (char*)&comp, exch);
C11_MEMORY_ORDER_SEQ_CST ();
return comp;
}

Expand All @@ -108,6 +122,7 @@ mono_atomic_cas_u16 (volatile guint16 *dest, guint16 exch, guint16 comp)
{
g_static_assert (sizeof (atomic_short) == sizeof (*dest) && ATOMIC_SHORT_LOCK_FREE == 2);
(void)atomic_compare_exchange_strong ((volatile atomic_short *)dest, (short*)&comp, exch);
C11_MEMORY_ORDER_SEQ_CST ();
return comp;
}

Expand All @@ -116,6 +131,7 @@ mono_atomic_cas_i32 (volatile gint32 *dest, gint32 exch, gint32 comp)
{
g_static_assert (sizeof (atomic_int) == sizeof (*dest) && ATOMIC_INT_LOCK_FREE == 2);
(void)atomic_compare_exchange_strong ((volatile atomic_int *)dest, &comp, exch);
C11_MEMORY_ORDER_SEQ_CST ();
return comp;
}

Expand All @@ -125,21 +141,22 @@ mono_atomic_cas_i64 (volatile gint64 *dest, gint64 exch, gint64 comp)
#if SIZEOF_LONG == 8
g_static_assert (sizeof (atomic_long) == sizeof (*dest) && ATOMIC_LONG_LOCK_FREE == 2);
(void)atomic_compare_exchange_strong ((volatile atomic_long *)dest, (long*)&comp, exch);
return comp;
#elif SIZEOF_LONG_LONG == 8
g_static_assert (sizeof (atomic_llong) == sizeof (*dest) && ATOMIC_LLONG_LOCK_FREE == 2);
(void)atomic_compare_exchange_strong ((volatile atomic_llong *)dest, (long long*)&comp, exch);
return comp;
#else
#error "gint64 not same size atomic_llong or atomic_long, don't define MONO_USE_STDATOMIC"
#endif
C11_MEMORY_ORDER_SEQ_CST ();
return comp;
}

static inline gpointer
mono_atomic_cas_ptr (volatile gpointer *dest, gpointer exch, gpointer comp)
{
g_static_assert(ATOMIC_POINTER_LOCK_FREE == 2);
(void)atomic_compare_exchange_strong ((volatile _Atomic(gpointer) *)dest, &comp, exch);
C11_MEMORY_ORDER_SEQ_CST ();
return comp;
}

Expand Down Expand Up @@ -191,125 +208,153 @@ static inline guint8
mono_atomic_xchg_u8 (volatile guint8 *dest, guint8 exch)
{
g_static_assert (sizeof (atomic_char) == sizeof (*dest) && ATOMIC_CHAR_LOCK_FREE == 2);
return atomic_exchange ((volatile atomic_char *)dest, exch);
guint8 old = atomic_exchange ((volatile atomic_char *)dest, exch);
C11_MEMORY_ORDER_SEQ_CST ();
return old;
}

static inline guint16
mono_atomic_xchg_u16 (volatile guint16 *dest, guint16 exch)
{
g_static_assert (sizeof (atomic_short) == sizeof (*dest) && ATOMIC_SHORT_LOCK_FREE == 2);
return atomic_exchange ((volatile atomic_short *)dest, exch);
guint16 old = atomic_exchange ((volatile atomic_short *)dest, exch);
C11_MEMORY_ORDER_SEQ_CST ();
return old;
}

static inline gint32
mono_atomic_xchg_i32 (volatile gint32 *dest, gint32 exch)
{
g_static_assert (sizeof (atomic_int) == sizeof (*dest) && ATOMIC_INT_LOCK_FREE == 2);
return atomic_exchange ((volatile atomic_int *)dest, exch);
gint32 old = atomic_exchange ((volatile atomic_int *)dest, exch);
C11_MEMORY_ORDER_SEQ_CST ();
return old;
}

static inline gint64
mono_atomic_xchg_i64 (volatile gint64 *dest, gint64 exch)
{
#if SIZEOF_LONG == 8
g_static_assert (sizeof (atomic_long) == sizeof (*dest) && ATOMIC_LONG_LOCK_FREE == 2);
return atomic_exchange ((volatile atomic_long *)dest, exch);
gint64 old = atomic_exchange ((volatile atomic_long *)dest, exch);
#elif SIZEOF_LONG_LONG == 8
g_static_assert (sizeof (atomic_llong) == sizeof (*dest) && ATOMIC_LLONG_LOCK_FREE == 2);
return atomic_exchange ((volatile atomic_llong *)dest, exch);
gint64 old = atomic_exchange ((volatile atomic_llong *)dest, exch);
#else
#error "gint64 not same size atomic_llong or atomic_long, don't define MONO_USE_STDATOMIC"
#endif
C11_MEMORY_ORDER_SEQ_CST ();
return old;
}

static inline gpointer
mono_atomic_xchg_ptr (volatile gpointer *dest, gpointer exch)
{
g_static_assert (ATOMIC_POINTER_LOCK_FREE == 2);
return atomic_exchange ((volatile _Atomic(gpointer) *)dest, exch);
gpointer old = atomic_exchange ((volatile _Atomic(gpointer) *)dest, exch);
C11_MEMORY_ORDER_SEQ_CST ();
return old;
}

static inline gint32
mono_atomic_fetch_add_i32 (volatile gint32 *dest, gint32 add)
{
g_static_assert (sizeof (atomic_int) == sizeof (*dest) && ATOMIC_INT_LOCK_FREE == 2);
return atomic_fetch_add ((volatile atomic_int *)dest, add);
gint32 old = atomic_fetch_add ((volatile atomic_int *)dest, add);
C11_MEMORY_ORDER_SEQ_CST ();
return old;
}

static inline gint64
mono_atomic_fetch_add_i64 (volatile gint64 *dest, gint64 add)
{
#if SIZEOF_LONG == 8
g_static_assert (sizeof (atomic_long) == sizeof (*dest) && ATOMIC_LONG_LOCK_FREE == 2);
return atomic_fetch_add ((volatile atomic_long *)dest, add);
gint64 old = atomic_fetch_add ((volatile atomic_long *)dest, add);
#elif SIZEOF_LONG_LONG == 8
g_static_assert (sizeof (atomic_llong) == sizeof (*dest) && ATOMIC_LLONG_LOCK_FREE == 2);
return atomic_fetch_add ((volatile atomic_llong *)dest, add);
gint64 old = atomic_fetch_add ((volatile atomic_llong *)dest, add);
#else
#error "gint64 not same size atomic_llong or atomic_long, don't define MONO_USE_STDATOMIC"
#endif
C11_MEMORY_ORDER_SEQ_CST ();
return old;
}

static inline gint8
mono_atomic_load_i8 (volatile gint8 *src)
{
g_static_assert (sizeof (atomic_char) == sizeof (*src) && ATOMIC_CHAR_LOCK_FREE == 2);
return atomic_load ((volatile atomic_char *)src);
C11_MEMORY_ORDER_SEQ_CST ();
gint8 val = atomic_load ((volatile atomic_char *)src);
return val;
}

static inline gint16
mono_atomic_load_i16 (volatile gint16 *src)
{
g_static_assert (sizeof (atomic_short) == sizeof (*src) && ATOMIC_SHORT_LOCK_FREE == 2);
return atomic_load ((volatile atomic_short *)src);
C11_MEMORY_ORDER_SEQ_CST ();
gint16 val = atomic_load ((volatile atomic_short *)src);
return val;
}

static inline gint32 mono_atomic_load_i32 (volatile gint32 *src)
{
g_static_assert (sizeof (atomic_int) == sizeof (*src) && ATOMIC_INT_LOCK_FREE == 2);
return atomic_load ((volatile atomic_int *)src);
C11_MEMORY_ORDER_SEQ_CST ();
gint32 val = atomic_load ((volatile atomic_int *)src);
return val;
}

static inline gint64
mono_atomic_load_i64 (volatile gint64 *src)
{
#if SIZEOF_LONG == 8
g_static_assert (sizeof (atomic_long) == sizeof (*src) && ATOMIC_LONG_LOCK_FREE == 2);
return atomic_load ((volatile atomic_long *)src);
C11_MEMORY_ORDER_SEQ_CST ();
gint64 val = atomic_load ((volatile atomic_long *)src);
#elif SIZEOF_LONG_LONG == 8
g_static_assert (sizeof (atomic_llong) == sizeof (*src) && ATOMIC_LLONG_LOCK_FREE == 2);
return atomic_load ((volatile atomic_llong *)src);
C11_MEMORY_ORDER_SEQ_CST ();
gint64 val = atomic_load ((volatile atomic_llong *)src);
#else
#error "gint64 not same size atomic_llong or atomic_long, don't define MONO_USE_STDATOMIC"
#endif
return val;
}

static inline gpointer
mono_atomic_load_ptr (volatile gpointer *src)
{
g_static_assert (ATOMIC_POINTER_LOCK_FREE == 2);
return atomic_load ((volatile _Atomic(gpointer) *)src);
C11_MEMORY_ORDER_SEQ_CST ();
gpointer val = atomic_load ((volatile _Atomic(gpointer) *)src);
return val;
}

static inline void
mono_atomic_store_i8 (volatile gint8 *dst, gint8 val)
{
g_static_assert (sizeof (atomic_char) == sizeof (*dst) && ATOMIC_CHAR_LOCK_FREE == 2);
atomic_store ((volatile atomic_char *)dst, val);
C11_MEMORY_ORDER_SEQ_CST ();
}

static inline void
mono_atomic_store_i16 (volatile gint16 *dst, gint16 val)
{
g_static_assert (sizeof (atomic_short) == sizeof (*dst) && ATOMIC_SHORT_LOCK_FREE == 2);
atomic_store ((volatile atomic_short *)dst, val);
C11_MEMORY_ORDER_SEQ_CST ();
}

static inline void
mono_atomic_store_i32 (volatile gint32 *dst, gint32 val)
{
g_static_assert (sizeof (atomic_int) == sizeof (*dst) && ATOMIC_INT_LOCK_FREE == 2);
atomic_store ((atomic_int *)dst, val);
C11_MEMORY_ORDER_SEQ_CST ();
}

static inline void
Expand All @@ -324,13 +369,15 @@ mono_atomic_store_i64 (volatile gint64 *dst, gint64 val)
#else
#error "gint64 not same size atomic_llong or atomic_long, don't define MONO_USE_STDATOMIC"
#endif
C11_MEMORY_ORDER_SEQ_CST ();
}

static inline void
mono_atomic_store_ptr (volatile gpointer *dst, gpointer val)
{
g_static_assert (ATOMIC_POINTER_LOCK_FREE == 2);
atomic_store ((volatile _Atomic(gpointer) *)dst, val);
C11_MEMORY_ORDER_SEQ_CST ();
}

#elif defined(MONO_USE_WIN32_ATOMIC)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Threading;
using System.Threading.Tasks;
using System.Runtime.InteropServices;
using System.Linq;

public static class Program
{
[DllImport("__Internal")]
public static extern void mono_ios_set_summary (string value);

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Since this test is not validating any observed result, but just hoping that the test don't crash, it might be good to include a line or 2 on what this test is getting validated.

private static async Task GitHubIssue_114262_Async()
{
var options = new ParallelOptions
{
MaxDegreeOfParallelism = 5,
CancellationToken = new CancellationTokenSource().Token
};

var range = Enumerable.Range(1, 1000);

for (int i = 0; i < 100; i++)
{
await Parallel.ForEachAsync(range, options, async (data, token) =>
{
for (int i = 0; i < 5; i++)
{
await Task.Yield();
var buffer = new byte[10_000];
await Task.Run(() => {var _ = buffer[0];} );
await Task.Yield();
}
});
}
}

public static async Task<int> Main(string[] args)
{
mono_ios_set_summary($"Starting functional test");

await GitHubIssue_114262_Async();

Console.WriteLine("Done!");

return 42;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TestRuntime>true</TestRuntime>
<TargetFramework>$(NetCoreAppCurrent)</TargetFramework>
<TargetOS Condition="'$(TargetOS)' == ''">ios</TargetOS>
<TargetArchitecture Condition="'$(TargetArchitecture)' == ''">arm64</TargetArchitecture>
<IncludesTestRunner>false</IncludesTestRunner>
<ExpectedExitCode>42</ExpectedExitCode>
<UseConsoleUITemplate>true</UseConsoleUITemplate>
</PropertyGroup>

<PropertyGroup Condition="'$(RuntimeFlavor)' == 'Mono'">
<MonoForceInterpreter>true</MonoForceInterpreter>
<RunAOTCompilation>false</RunAOTCompilation>
<MainLibraryFileName>iOS.Device.ParallelForEachAsync.Test.dll</MainLibraryFileName>
</PropertyGroup>

<ItemGroup>
<Compile Include="Program.cs" />
</ItemGroup>
</Project>
Loading