Skip to content

Commit b02cb52

Browse files
author
Pär Björklund
committed
bpo-30747: Attempt to fix atomic load/store
_Py_atomic_* are currently not implemented as atomic operations when building with MSVC. This patch attempts to implement parts of the functionality required.
1 parent 13e96cc commit b02cb52

File tree

2 files changed

+297
-10
lines changed

2 files changed

+297
-10
lines changed

Include/pyatomic.h

Lines changed: 295 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,12 @@
1010
#include <stdatomic.h>
1111
#endif
1212

13+
14+
#if defined(_MSC_VER)
15+
#include <intrin.h>
16+
#include <immintrin.h>
17+
#endif
18+
1319
/* This is modeled after the atomics interface from C1x, according to
1420
* the draft at
1521
* http://www.open-std.org/JTC1/SC22/wg14/www/docs/n1425.pdf.
@@ -87,8 +93,9 @@ typedef struct _Py_atomic_int {
8793
|| (ORDER) == __ATOMIC_CONSUME), \
8894
__atomic_load_n(&(ATOMIC_VAL)->_value, ORDER))
8995

90-
#else
91-
96+
/* Only support GCC (for expression statements) and x86 (for simple
97+
* atomic semantics) and MSVC x86/x64/ARM */
98+
#elif defined(__GNUC__) && (defined(__i386__) || defined(__amd64))
9299
typedef enum _Py_memory_order {
93100
_Py_memory_order_relaxed,
94101
_Py_memory_order_acquire,
@@ -105,9 +112,6 @@ typedef struct _Py_atomic_int {
105112
int _value;
106113
} _Py_atomic_int;
107114

108-
/* Only support GCC (for expression statements) and x86 (for simple
109-
* atomic semantics) for now */
110-
#if defined(__GNUC__) && (defined(__i386__) || defined(__amd64))
111115

112116
static __inline__ void
113117
_Py_atomic_signal_fence(_Py_memory_order order)
@@ -127,7 +131,7 @@ _Py_atomic_thread_fence(_Py_memory_order order)
127131
static __inline__ void
128132
_Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order)
129133
{
130-
(void)address; /* shut up -Wunused-parameter */
134+
(void)address; /* shut up -Wunused-parameter */
131135
switch(order) {
132136
case _Py_memory_order_release:
133137
case _Py_memory_order_acq_rel:
@@ -219,7 +223,291 @@ _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order)
219223
result; \
220224
})
221225

222-
#else /* !gcc x86 */
226+
#elif defined(_MSC_VER)
227+
/* _Interlocked* functions provide a full memory barrier and are therefore
228+
enough for acq_rel and seq_cst. If the HLE variants aren't available
229+
in hardware they will fall back to a full memory barrier as well.
230+
231+
This might affect performance but likely only in some very specific and
232+
hard to meassure scenario.
233+
*/
234+
#if defined(_M_IX86) || defined(_M_X64)
235+
typedef enum _Py_memory_order {
236+
_Py_memory_order_relaxed,
237+
_Py_memory_order_acquire,
238+
_Py_memory_order_release,
239+
_Py_memory_order_acq_rel,
240+
_Py_memory_order_seq_cst
241+
} _Py_memory_order;
242+
243+
typedef struct _Py_atomic_address {
244+
volatile uintptr_t _value;
245+
} _Py_atomic_address;
246+
247+
typedef struct _Py_atomic_int {
248+
volatile int _value;
249+
} _Py_atomic_int;
250+
251+
252+
#if defined(_M_X64)
253+
#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
254+
switch (ORDER) { \
255+
case _Py_memory_order_acquire: \
256+
_InterlockedExchange64_HLEAcquire((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
257+
break; \
258+
case _Py_memory_order_release: \
259+
_InterlockedExchange64_HLERelease((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
260+
break; \
261+
default: \
262+
_InterlockedExchange64((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
263+
break; \
264+
}
265+
#else
266+
#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
267+
#endif
268+
269+
#define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
270+
switch (ORDER) { \
271+
case _Py_memory_order_acquire: \
272+
_InterlockedExchange_HLEAcquire((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
273+
break; \
274+
case _Py_memory_order_release: \
275+
_InterlockedExchange_HLERelease((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
276+
break; \
277+
default: \
278+
_InterlockedExchange((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
279+
break; \
280+
}
281+
282+
#if defined(_M_X64)
283+
/* This has to be an intptr_t for now.
284+
gil_created() uses -1 as a sentinel value, if this returns
285+
a uintptr_t it will do an unsigned compare and crash
286+
*/
287+
inline intptr_t _Py_atomic_load_64bit(volatile uintptr_t* value, int order) {
288+
uintptr_t old;
289+
switch (order) {
290+
case _Py_memory_order_acquire:
291+
{
292+
do {
293+
old = *value;
294+
} while(_InterlockedCompareExchange64_HLEAcquire(value, old, old) != old);
295+
break;
296+
}
297+
case _Py_memory_order_release:
298+
{
299+
do {
300+
old = *value;
301+
} while(_InterlockedCompareExchange64_HLERelease(value, old, old) != old);
302+
break;
303+
}
304+
case _Py_memory_order_relaxed:
305+
old = *value;
306+
break;
307+
default:
308+
{
309+
do {
310+
old = *value;
311+
} while(_InterlockedCompareExchange64(value, old, old) != old);
312+
break;
313+
}
314+
}
315+
return old;
316+
}
317+
318+
#else
319+
#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) *ATOMIC_VAL
320+
#endif
321+
322+
inline int _Py_atomic_load_32bit(volatile int* value, int order) {
323+
int old;
324+
switch (order) {
325+
case _Py_memory_order_acquire:
326+
{
327+
do {
328+
old = *value;
329+
} while(_InterlockedCompareExchange_HLEAcquire(value, old, old) != old);
330+
break;
331+
}
332+
case _Py_memory_order_release:
333+
{
334+
do {
335+
old = *value;
336+
} while(_InterlockedCompareExchange_HLERelease(value, old, old) != old);
337+
break;
338+
}
339+
case _Py_memory_order_relaxed:
340+
old = *value;
341+
break;
342+
default:
343+
{
344+
do {
345+
old = *value;
346+
} while(_InterlockedCompareExchange(value, old, old) != old);
347+
break;
348+
}
349+
}
350+
return old;
351+
}
352+
353+
#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
354+
if (sizeof(*ATOMIC_VAL._value) == 8) { \
355+
_Py_atomic_store_64bit(ATOMIC_VAL._value, NEW_VAL, ORDER) } else { \
356+
_Py_atomic_store_32bit(ATOMIC_VAL._value, NEW_VAL, ORDER) }
357+
358+
#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
359+
( \
360+
sizeof(*(ATOMIC_VAL._value)) == 8 ? \
361+
_Py_atomic_load_64bit(ATOMIC_VAL._value, ORDER) : \
362+
_Py_atomic_load_32bit(ATOMIC_VAL._value, ORDER) \
363+
)
364+
#elif defined(_M_ARM) || defined(_M_ARM64)
365+
typedef enum _Py_memory_order {
366+
_Py_memory_order_relaxed,
367+
_Py_memory_order_acquire,
368+
_Py_memory_order_release,
369+
_Py_memory_order_acq_rel,
370+
_Py_memory_order_seq_cst
371+
} _Py_memory_order;
372+
373+
typedef struct _Py_atomic_address {
374+
volatile uintptr_t _value;
375+
} _Py_atomic_address;
376+
377+
typedef struct _Py_atomic_int {
378+
volatile int _value;
379+
} _Py_atomic_int;
380+
381+
382+
#if defined(_M_ARM64)
383+
#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
384+
switch (ORDER) { \
385+
case _Py_memory_order_acquire: \
386+
_InterlockedExchange64_acq((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
387+
break; \
388+
case _Py_memory_order_release: \
389+
_InterlockedExchange64_rel((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
390+
break; \
391+
default: \
392+
_InterlockedExchange64((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
393+
break; \
394+
}
395+
#else
396+
#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
397+
#endif
398+
399+
#define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
400+
switch (ORDER) { \
401+
case _Py_memory_order_acquire: \
402+
_InterlockedExchange_acq((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
403+
break; \
404+
case _Py_memory_order_release: \
405+
_InterlockedExchange_rel((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
406+
break; \
407+
default: \
408+
_InterlockedExchange((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
409+
break; \
410+
}
411+
412+
#if defined(_M_ARM64)
413+
/* This has to be an intptr_t for now.
414+
gil_created() uses -1 as a sentinel value, if this returns
415+
a uintptr_t it will do an unsigned compare and crash
416+
*/
417+
inline intptr_t _Py_atomic_load_64bit(volatile uintptr_t* value, int order) {
418+
uintptr_t old;
419+
switch (order) {
420+
case _Py_memory_order_acquire:
421+
{
422+
do {
423+
old = *value;
424+
} while(_InterlockedCompareExchange64_acq(value, old, old) != old);
425+
break;
426+
}
427+
case _Py_memory_order_release:
428+
{
429+
do {
430+
old = *value;
431+
} while(_InterlockedCompareExchange64_rel(value, old, old) != old);
432+
break;
433+
}
434+
case _Py_memory_order_relaxed:
435+
old = *value;
436+
break;
437+
default:
438+
{
439+
do {
440+
old = *value;
441+
} while(_InterlockedCompareExchange64(value, old, old) != old);
442+
break;
443+
}
444+
}
445+
return old;
446+
}
447+
448+
#else
449+
#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) *ATOMIC_VAL
450+
#endif
451+
452+
inline int _Py_atomic_load_32bit(volatile int* value, int order) {
453+
int old;
454+
switch (order) {
455+
case _Py_memory_order_acquire:
456+
{
457+
do {
458+
old = *value;
459+
} while(_InterlockedCompareExchange_acq(value, old, old) != old);
460+
break;
461+
}
462+
case _Py_memory_order_release:
463+
{
464+
do {
465+
old = *value;
466+
} while(_InterlockedCompareExchange_rel(value, old, old) != old);
467+
break;
468+
}
469+
case _Py_memory_order_relaxed:
470+
old = *value;
471+
break;
472+
default:
473+
{
474+
do {
475+
old = *value;
476+
} while(_InterlockedCompareExchange(value, old, old) != old);
477+
break;
478+
}
479+
}
480+
return old;
481+
}
482+
483+
#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
484+
if (sizeof(*ATOMIC_VAL._value) == 8) { \
485+
_Py_atomic_store_64bit(ATOMIC_VAL._value, NEW_VAL, ORDER) } else { \
486+
_Py_atomic_store_32bit(ATOMIC_VAL._value, NEW_VAL, ORDER) }
487+
488+
#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
489+
( \
490+
sizeof(*(ATOMIC_VAL._value)) == 8 ? \
491+
_Py_atomic_load_64bit(ATOMIC_VAL._value, ORDER) : \
492+
_Py_atomic_load_32bit(ATOMIC_VAL._value, ORDER) \
493+
)
494+
#endif
495+
#else /* !gcc x86 !_msc_ver */
496+
typedef enum _Py_memory_order {
497+
_Py_memory_order_relaxed,
498+
_Py_memory_order_acquire,
499+
_Py_memory_order_release,
500+
_Py_memory_order_acq_rel,
501+
_Py_memory_order_seq_cst
502+
} _Py_memory_order;
503+
504+
typedef struct _Py_atomic_address {
505+
uintptr_t _value;
506+
} _Py_atomic_address;
507+
508+
typedef struct _Py_atomic_int {
509+
int _value;
510+
} _Py_atomic_int;
223511
/* Fall back to other compilers and processors by assuming that simple
224512
volatile accesses are atomic. This is false, so people should port
225513
this. */
@@ -229,8 +517,6 @@ _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order)
229517
((ATOMIC_VAL)->_value = NEW_VAL)
230518
#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
231519
((ATOMIC_VAL)->_value)
232-
233-
#endif /* !gcc x86 */
234520
#endif
235521

236522
/* Standardized shortcuts. */
@@ -245,6 +531,5 @@ _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order)
245531
_Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, _Py_memory_order_relaxed)
246532
#define _Py_atomic_load_relaxed(ATOMIC_VAL) \
247533
_Py_atomic_load_explicit(ATOMIC_VAL, _Py_memory_order_relaxed)
248-
249534
#endif /* Py_BUILD_CORE */
250535
#endif /* Py_ATOMIC_H */
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Add a non-dummy implementation of _Py_atomic_store and _Py_atomic_load on
2+
MSVC.

0 commit comments

Comments
 (0)