10
10
#include <stdatomic.h>
11
11
#endif
12
12
13
+
14
+ #if defined(_MSC_VER )
15
+ #include <intrin.h>
16
+ #include <immintrin.h>
17
+ #endif
18
+
13
19
/* This is modeled after the atomics interface from C1x, according to
14
20
* the draft at
15
21
* http://www.open-std.org/JTC1/SC22/wg14/www/docs/n1425.pdf.
@@ -87,8 +93,9 @@ typedef struct _Py_atomic_int {
87
93
|| (ORDER) == __ATOMIC_CONSUME), \
88
94
__atomic_load_n(&(ATOMIC_VAL)->_value, ORDER))
89
95
90
- #else
91
-
96
+ /* Only support GCC (for expression statements) and x86 (for simple
97
+ * atomic semantics) and MSVC x86/x64/ARM */
98
+ #elif defined(__GNUC__ ) && (defined(__i386__ ) || defined(__amd64 ))
92
99
typedef enum _Py_memory_order {
93
100
_Py_memory_order_relaxed ,
94
101
_Py_memory_order_acquire ,
@@ -105,9 +112,6 @@ typedef struct _Py_atomic_int {
105
112
int _value ;
106
113
} _Py_atomic_int ;
107
114
108
- /* Only support GCC (for expression statements) and x86 (for simple
109
- * atomic semantics) for now */
110
- #if defined(__GNUC__ ) && (defined(__i386__ ) || defined(__amd64 ))
111
115
112
116
static __inline__ void
113
117
_Py_atomic_signal_fence (_Py_memory_order order )
@@ -127,7 +131,7 @@ _Py_atomic_thread_fence(_Py_memory_order order)
127
131
static __inline__ void
128
132
_Py_ANNOTATE_MEMORY_ORDER (const volatile void * address , _Py_memory_order order )
129
133
{
130
- (void )address ; /* shut up -Wunused-parameter */
134
+ (void )address ; /* shut up -Wunused-parameter */
131
135
switch (order ) {
132
136
case _Py_memory_order_release :
133
137
case _Py_memory_order_acq_rel :
@@ -219,7 +223,291 @@ _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order)
219
223
result ; \
220
224
})
221
225
222
- #else /* !gcc x86 */
226
+ #elif defined(_MSC_VER )
227
+ /* _Interlocked* functions provide a full memory barrier and are therefore
228
+ enough for acq_rel and seq_cst. If the HLE variants aren't available
229
+ in hardware they will fall back to a full memory barrier as well.
230
+
231
+ This might affect performance but likely only in some very specific and
232
+ hard to meassure scenario.
233
+ */
234
+ #if defined(_M_IX86 ) || defined(_M_X64 )
235
+ typedef enum _Py_memory_order {
236
+ _Py_memory_order_relaxed ,
237
+ _Py_memory_order_acquire ,
238
+ _Py_memory_order_release ,
239
+ _Py_memory_order_acq_rel ,
240
+ _Py_memory_order_seq_cst
241
+ } _Py_memory_order ;
242
+
243
+ typedef struct _Py_atomic_address {
244
+ volatile uintptr_t _value ;
245
+ } _Py_atomic_address ;
246
+
247
+ typedef struct _Py_atomic_int {
248
+ volatile int _value ;
249
+ } _Py_atomic_int ;
250
+
251
+
252
+ #if defined(_M_X64 )
253
+ #define _Py_atomic_store_64bit (ATOMIC_VAL , NEW_VAL , ORDER ) \
254
+ switch (ORDER) { \
255
+ case _Py_memory_order_acquire: \
256
+ _InterlockedExchange64_HLEAcquire((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
257
+ break; \
258
+ case _Py_memory_order_release: \
259
+ _InterlockedExchange64_HLERelease((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
260
+ break; \
261
+ default: \
262
+ _InterlockedExchange64((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
263
+ break; \
264
+ }
265
+ #else
266
+ #define _Py_atomic_store_64bit (ATOMIC_VAL , NEW_VAL , ORDER ) ((void)0);
267
+ #endif
268
+
269
+ #define _Py_atomic_store_32bit (ATOMIC_VAL , NEW_VAL , ORDER ) \
270
+ switch (ORDER) { \
271
+ case _Py_memory_order_acquire: \
272
+ _InterlockedExchange_HLEAcquire((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
273
+ break; \
274
+ case _Py_memory_order_release: \
275
+ _InterlockedExchange_HLERelease((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
276
+ break; \
277
+ default: \
278
+ _InterlockedExchange((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
279
+ break; \
280
+ }
281
+
282
+ #if defined(_M_X64 )
283
+ /* This has to be an intptr_t for now.
284
+ gil_created() uses -1 as a sentinel value, if this returns
285
+ a uintptr_t it will do an unsigned compare and crash
286
+ */
287
+ inline intptr_t _Py_atomic_load_64bit (volatile uintptr_t * value , int order ) {
288
+ uintptr_t old ;
289
+ switch (order ) {
290
+ case _Py_memory_order_acquire :
291
+ {
292
+ do {
293
+ old = * value ;
294
+ } while (_InterlockedCompareExchange64_HLEAcquire (value , old , old ) != old );
295
+ break ;
296
+ }
297
+ case _Py_memory_order_release :
298
+ {
299
+ do {
300
+ old = * value ;
301
+ } while (_InterlockedCompareExchange64_HLERelease (value , old , old ) != old );
302
+ break ;
303
+ }
304
+ case _Py_memory_order_relaxed :
305
+ old = * value ;
306
+ break ;
307
+ default :
308
+ {
309
+ do {
310
+ old = * value ;
311
+ } while (_InterlockedCompareExchange64 (value , old , old ) != old );
312
+ break ;
313
+ }
314
+ }
315
+ return old ;
316
+ }
317
+
318
+ #else
319
+ #define _Py_atomic_load_64bit (ATOMIC_VAL , ORDER ) *ATOMIC_VAL
320
+ #endif
321
+
322
+ inline int _Py_atomic_load_32bit (volatile int * value , int order ) {
323
+ int old ;
324
+ switch (order ) {
325
+ case _Py_memory_order_acquire :
326
+ {
327
+ do {
328
+ old = * value ;
329
+ } while (_InterlockedCompareExchange_HLEAcquire (value , old , old ) != old );
330
+ break ;
331
+ }
332
+ case _Py_memory_order_release :
333
+ {
334
+ do {
335
+ old = * value ;
336
+ } while (_InterlockedCompareExchange_HLERelease (value , old , old ) != old );
337
+ break ;
338
+ }
339
+ case _Py_memory_order_relaxed :
340
+ old = * value ;
341
+ break ;
342
+ default :
343
+ {
344
+ do {
345
+ old = * value ;
346
+ } while (_InterlockedCompareExchange (value , old , old ) != old );
347
+ break ;
348
+ }
349
+ }
350
+ return old ;
351
+ }
352
+
353
+ #define _Py_atomic_store_explicit (ATOMIC_VAL , NEW_VAL , ORDER ) \
354
+ if (sizeof(*ATOMIC_VAL._value) == 8) { \
355
+ _Py_atomic_store_64bit(ATOMIC_VAL._value, NEW_VAL, ORDER) } else { \
356
+ _Py_atomic_store_32bit(ATOMIC_VAL._value, NEW_VAL, ORDER) }
357
+
358
+ #define _Py_atomic_load_explicit (ATOMIC_VAL , ORDER ) \
359
+ ( \
360
+ sizeof(*(ATOMIC_VAL._value)) == 8 ? \
361
+ _Py_atomic_load_64bit(ATOMIC_VAL._value, ORDER) : \
362
+ _Py_atomic_load_32bit(ATOMIC_VAL._value, ORDER) \
363
+ )
364
+ #elif defined(_M_ARM ) || defined(_M_ARM64 )
365
+ typedef enum _Py_memory_order {
366
+ _Py_memory_order_relaxed ,
367
+ _Py_memory_order_acquire ,
368
+ _Py_memory_order_release ,
369
+ _Py_memory_order_acq_rel ,
370
+ _Py_memory_order_seq_cst
371
+ } _Py_memory_order ;
372
+
373
+ typedef struct _Py_atomic_address {
374
+ volatile uintptr_t _value ;
375
+ } _Py_atomic_address ;
376
+
377
+ typedef struct _Py_atomic_int {
378
+ volatile int _value ;
379
+ } _Py_atomic_int ;
380
+
381
+
382
+ #if defined(_M_ARM64 )
383
+ #define _Py_atomic_store_64bit (ATOMIC_VAL , NEW_VAL , ORDER ) \
384
+ switch (ORDER) { \
385
+ case _Py_memory_order_acquire: \
386
+ _InterlockedExchange64_acq((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
387
+ break; \
388
+ case _Py_memory_order_release: \
389
+ _InterlockedExchange64_rel((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
390
+ break; \
391
+ default: \
392
+ _InterlockedExchange64((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
393
+ break; \
394
+ }
395
+ #else
396
+ #define _Py_atomic_store_64bit (ATOMIC_VAL , NEW_VAL , ORDER ) ((void)0);
397
+ #endif
398
+
399
+ #define _Py_atomic_store_32bit (ATOMIC_VAL , NEW_VAL , ORDER ) \
400
+ switch (ORDER) { \
401
+ case _Py_memory_order_acquire: \
402
+ _InterlockedExchange_acq((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
403
+ break; \
404
+ case _Py_memory_order_release: \
405
+ _InterlockedExchange_rel((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
406
+ break; \
407
+ default: \
408
+ _InterlockedExchange((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
409
+ break; \
410
+ }
411
+
412
+ #if defined(_M_ARM64 )
413
+ /* This has to be an intptr_t for now.
414
+ gil_created() uses -1 as a sentinel value, if this returns
415
+ a uintptr_t it will do an unsigned compare and crash
416
+ */
417
+ inline intptr_t _Py_atomic_load_64bit (volatile uintptr_t * value , int order ) {
418
+ uintptr_t old ;
419
+ switch (order ) {
420
+ case _Py_memory_order_acquire :
421
+ {
422
+ do {
423
+ old = * value ;
424
+ } while (_InterlockedCompareExchange64_acq (value , old , old ) != old );
425
+ break ;
426
+ }
427
+ case _Py_memory_order_release :
428
+ {
429
+ do {
430
+ old = * value ;
431
+ } while (_InterlockedCompareExchange64_rel (value , old , old ) != old );
432
+ break ;
433
+ }
434
+ case _Py_memory_order_relaxed :
435
+ old = * value ;
436
+ break ;
437
+ default :
438
+ {
439
+ do {
440
+ old = * value ;
441
+ } while (_InterlockedCompareExchange64 (value , old , old ) != old );
442
+ break ;
443
+ }
444
+ }
445
+ return old ;
446
+ }
447
+
448
+ #else
449
+ #define _Py_atomic_load_64bit (ATOMIC_VAL , ORDER ) *ATOMIC_VAL
450
+ #endif
451
+
452
+ inline int _Py_atomic_load_32bit (volatile int * value , int order ) {
453
+ int old ;
454
+ switch (order ) {
455
+ case _Py_memory_order_acquire :
456
+ {
457
+ do {
458
+ old = * value ;
459
+ } while (_InterlockedCompareExchange_acq (value , old , old ) != old );
460
+ break ;
461
+ }
462
+ case _Py_memory_order_release :
463
+ {
464
+ do {
465
+ old = * value ;
466
+ } while (_InterlockedCompareExchange_rel (value , old , old ) != old );
467
+ break ;
468
+ }
469
+ case _Py_memory_order_relaxed :
470
+ old = * value ;
471
+ break ;
472
+ default :
473
+ {
474
+ do {
475
+ old = * value ;
476
+ } while (_InterlockedCompareExchange (value , old , old ) != old );
477
+ break ;
478
+ }
479
+ }
480
+ return old ;
481
+ }
482
+
483
+ #define _Py_atomic_store_explicit (ATOMIC_VAL , NEW_VAL , ORDER ) \
484
+ if (sizeof(*ATOMIC_VAL._value) == 8) { \
485
+ _Py_atomic_store_64bit(ATOMIC_VAL._value, NEW_VAL, ORDER) } else { \
486
+ _Py_atomic_store_32bit(ATOMIC_VAL._value, NEW_VAL, ORDER) }
487
+
488
+ #define _Py_atomic_load_explicit (ATOMIC_VAL , ORDER ) \
489
+ ( \
490
+ sizeof(*(ATOMIC_VAL._value)) == 8 ? \
491
+ _Py_atomic_load_64bit(ATOMIC_VAL._value, ORDER) : \
492
+ _Py_atomic_load_32bit(ATOMIC_VAL._value, ORDER) \
493
+ )
494
+ #endif
495
+ #else /* !gcc x86 !_msc_ver */
496
+ typedef enum _Py_memory_order {
497
+ _Py_memory_order_relaxed ,
498
+ _Py_memory_order_acquire ,
499
+ _Py_memory_order_release ,
500
+ _Py_memory_order_acq_rel ,
501
+ _Py_memory_order_seq_cst
502
+ } _Py_memory_order ;
503
+
504
+ typedef struct _Py_atomic_address {
505
+ uintptr_t _value ;
506
+ } _Py_atomic_address ;
507
+
508
+ typedef struct _Py_atomic_int {
509
+ int _value ;
510
+ } _Py_atomic_int ;
223
511
/* Fall back to other compilers and processors by assuming that simple
224
512
volatile accesses are atomic. This is false, so people should port
225
513
this. */
@@ -229,8 +517,6 @@ _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order)
229
517
((ATOMIC_VAL)->_value = NEW_VAL)
230
518
#define _Py_atomic_load_explicit (ATOMIC_VAL , ORDER ) \
231
519
((ATOMIC_VAL)->_value)
232
-
233
- #endif /* !gcc x86 */
234
520
#endif
235
521
236
522
/* Standardized shortcuts. */
@@ -245,6 +531,5 @@ _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order)
245
531
_Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, _Py_memory_order_relaxed)
246
532
#define _Py_atomic_load_relaxed (ATOMIC_VAL ) \
247
533
_Py_atomic_load_explicit(ATOMIC_VAL, _Py_memory_order_relaxed)
248
-
249
534
#endif /* Py_BUILD_CORE */
250
535
#endif /* Py_ATOMIC_H */
0 commit comments