Skip to content

Commit 9014dd0

Browse files
committed
Refactor and consolidate all SIMD handlers
1 parent 4b9ade9 commit 9014dd0

13 files changed

+309
-772
lines changed

src_c/alphablit.c

Lines changed: 110 additions & 111 deletions
Large diffs are not rendered by default.

src_c/simd_blitters.h

Lines changed: 5 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,9 @@
11
#define NO_PYGAME_C_API
22
#include "_surface.h"
33
#include "_blit_info.h"
4+
#include "simd_shared.h"
45

5-
#if !defined(PG_ENABLE_ARM_NEON) && defined(__aarch64__)
6-
// arm64 has neon optimisations enabled by default, even when fpu=neon is not
7-
// passed
8-
#define PG_ENABLE_ARM_NEON 1
9-
#endif
10-
11-
#if (defined(__SSE2__) || defined(PG_ENABLE_ARM_NEON))
6+
#ifdef PG_HAS_SSE2_OR_NEON
127
void
138
alphablit_alpha_sse2_argb_surf_alpha(SDL_BlitInfo *info);
149
void
@@ -37,26 +32,11 @@ void
3732
blit_blend_rgb_min_sse2(SDL_BlitInfo *info);
3833
void
3934
blit_blend_premultiplied_sse2(SDL_BlitInfo *info);
40-
#endif /* (defined(__SSE2__) || defined(PG_ENABLE_ARM_NEON)) */
41-
42-
/* Deliberately putting these outside of the preprocessor guards as I want to
43-
move to a system of trusting the runtime checks to head to the right
44-
function and having a fallback function there if pygame is not compiled
45-
with the right stuff (this is the strategy used for AVX2 right now.
46-
Potentially I might want to shift both these into a slightly different
47-
file as they are not exactly blits (though v. similar) - or I could rename
48-
the SIMD trilogy of files to replace the word blit with something more
49-
generic like surface_ops*/
50-
51-
void
52-
premul_surf_color_by_alpha_non_simd(SDL_Surface *src,
53-
PG_PixelFormat *src_format,
54-
SDL_Palette *src_palette, SDL_Surface *dst,
55-
PG_PixelFormat *dst_format,
56-
SDL_Palette *dst_palette);
5735
void
5836
premul_surf_color_by_alpha_sse2(SDL_Surface *src, SDL_Surface *dst);
37+
#endif /* PG_HAS_SSE2_OR_NEON */
5938

39+
#ifdef PG_HAS_AVX2
6040
void
6141
alphablit_alpha_avx2_argb_no_surf_alpha_opaque_dst(SDL_BlitInfo *info);
6242
void
@@ -87,3 +67,4 @@ void
8767
blit_blend_premultiplied_avx2(SDL_BlitInfo *info);
8868
void
8969
premul_surf_color_by_alpha_avx2(SDL_Surface *src, SDL_Surface *dst);
70+
#endif

src_c/simd_blitters_avx2.c

Lines changed: 2 additions & 204 deletions
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,5 @@
11
#include "simd_blitters.h"
22

3-
#if defined(HAVE_IMMINTRIN_H) && !defined(SDL_DISABLE_IMMINTRIN_H)
4-
#include <immintrin.h>
5-
#endif /* defined(HAVE_IMMINTRIN_H) && !defined(SDL_DISABLE_IMMINTRIN_H) */
6-
7-
#define BAD_AVX2_FUNCTION_CALL \
8-
printf( \
9-
"Fatal Error: Attempted calling an AVX2 function when both compile " \
10-
"time and runtime support is missing. If you are seeing this " \
11-
"message, you have stumbled across a pygame bug, please report it " \
12-
"to the devs!"); \
13-
PG_EXIT(1)
14-
15-
/* helper function that does a runtime check for AVX2. It has the added
16-
* functionality of also returning 0 if compile time support is missing */
17-
int
18-
pg_has_avx2()
19-
{
20-
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
21-
!defined(SDL_DISABLE_IMMINTRIN_H)
22-
return SDL_HasAVX2();
23-
#else
24-
return 0;
25-
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
26-
!defined(SDL_DISABLE_IMMINTRIN_H) */
27-
}
28-
29-
/* This returns 1 when avx2 is available at runtime but support for it isn't
30-
* compiled in, 0 in all other cases */
31-
int
32-
pg_avx2_at_runtime_but_uncompiled()
33-
{
34-
if (SDL_HasAVX2()) {
35-
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
36-
!defined(SDL_DISABLE_IMMINTRIN_H)
37-
return 0;
38-
#else
39-
return 1;
40-
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
41-
!defined(SDL_DISABLE_IMMINTRIN_H) */
42-
}
43-
return 0;
44-
}
45-
463
/* just prints the first/lower 128 bits, in two chunks */
474
// static void
485
//_debug_print256_num(__m256i var, const char *msg)
@@ -190,8 +147,7 @@ pg_avx2_at_runtime_but_uncompiled()
190147
_mm256_srli_epi16( \
191148
_mm256_mulhi_epu16(MM256I, _mm256_set1_epi16((short)0x8081)), 7);
192149

193-
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
194-
!defined(SDL_DISABLE_IMMINTRIN_H)
150+
#ifdef PG_HAS_AVX2
195151
void
196152
alphablit_alpha_avx2_argb_no_surf_alpha_opaque_dst(SDL_BlitInfo *info)
197153
{
@@ -258,17 +214,7 @@ alphablit_alpha_avx2_argb_no_surf_alpha_opaque_dst(SDL_BlitInfo *info)
258214
* surfaces. */
259215
pixels_dst = _mm256_and_si256(pixels_dst, mask_out_alpha);)
260216
}
261-
#else
262-
void
263-
alphablit_alpha_avx2_argb_no_surf_alpha_opaque_dst(SDL_BlitInfo *info)
264-
{
265-
BAD_AVX2_FUNCTION_CALL;
266-
}
267-
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
268-
!defined(SDL_DISABLE_IMMINTRIN_H) */
269217

270-
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
271-
!defined(SDL_DISABLE_IMMINTRIN_H)
272218
void
273219
alphablit_alpha_avx2_argb_no_surf_alpha(SDL_BlitInfo *info)
274220
{
@@ -324,17 +270,7 @@ alphablit_alpha_avx2_argb_no_surf_alpha(SDL_BlitInfo *info)
324270
shuff_dst =
325271
_mm256_blendv_epi8(shuff_dst, new_dst_alpha, combine_rgba_mask);))
326272
}
327-
#else
328-
void
329-
alphablit_alpha_avx2_argb_no_surf_alpha(SDL_BlitInfo *info)
330-
{
331-
BAD_AVX2_FUNCTION_CALL;
332-
}
333-
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
334-
!defined(SDL_DISABLE_IMMINTRIN_H) */
335273

336-
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
337-
!defined(SDL_DISABLE_IMMINTRIN_H)
338274
void
339275
alphablit_alpha_avx2_argb_surf_alpha(SDL_BlitInfo *info)
340276
{
@@ -406,17 +342,6 @@ alphablit_alpha_avx2_argb_surf_alpha(SDL_BlitInfo *info)
406342
shuff_dst =
407343
_mm256_blendv_epi8(shuff_dst, new_dst_alpha, combine_rgba_mask);))
408344
}
409-
#else
410-
void
411-
alphablit_alpha_avx2_argb_surf_alpha(SDL_BlitInfo *info)
412-
{
413-
BAD_AVX2_FUNCTION_CALL;
414-
}
415-
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
416-
!defined(SDL_DISABLE_IMMINTRIN_H) */
417-
418-
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
419-
!defined(SDL_DISABLE_IMMINTRIN_H)
420345
void
421346
blit_blend_rgba_mul_avx2(SDL_BlitInfo *info)
422347
{
@@ -524,17 +449,6 @@ blit_blend_rgba_mul_avx2(SDL_BlitInfo *info)
524449
dstp = (Uint32 *)dstp256 + dstskip;
525450
}
526451
}
527-
#else
528-
void
529-
blit_blend_rgba_mul_avx2(SDL_BlitInfo *info)
530-
{
531-
BAD_AVX2_FUNCTION_CALL;
532-
}
533-
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
534-
!defined(SDL_DISABLE_IMMINTRIN_H) */
535-
536-
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
537-
!defined(SDL_DISABLE_IMMINTRIN_H)
538452
void
539453
blit_blend_rgb_mul_avx2(SDL_BlitInfo *info)
540454
{
@@ -653,17 +567,6 @@ blit_blend_rgb_mul_avx2(SDL_BlitInfo *info)
653567
dstp = (Uint32 *)dstp256 + dstskip;
654568
}
655569
}
656-
#else
657-
void
658-
blit_blend_rgb_mul_avx2(SDL_BlitInfo *info)
659-
{
660-
BAD_AVX2_FUNCTION_CALL;
661-
}
662-
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
663-
!defined(SDL_DISABLE_IMMINTRIN_H) */
664-
665-
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
666-
!defined(SDL_DISABLE_IMMINTRIN_H)
667570
void
668571
blit_blend_rgba_add_avx2(SDL_BlitInfo *info)
669572
{
@@ -725,17 +628,6 @@ blit_blend_rgba_add_avx2(SDL_BlitInfo *info)
725628
dstp = (Uint32 *)dstp256 + dstskip;
726629
}
727630
}
728-
#else
729-
void
730-
blit_blend_rgba_add_avx2(SDL_BlitInfo *info)
731-
{
732-
BAD_AVX2_FUNCTION_CALL;
733-
}
734-
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
735-
!defined(SDL_DISABLE_IMMINTRIN_H) */
736-
737-
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
738-
!defined(SDL_DISABLE_IMMINTRIN_H)
739631
void
740632
blit_blend_rgb_add_avx2(SDL_BlitInfo *info)
741633
{
@@ -805,17 +697,6 @@ blit_blend_rgb_add_avx2(SDL_BlitInfo *info)
805697
dstp = (Uint32 *)dstp256 + dstskip;
806698
}
807699
}
808-
#else
809-
void
810-
blit_blend_rgb_add_avx2(SDL_BlitInfo *info)
811-
{
812-
BAD_AVX2_FUNCTION_CALL;
813-
}
814-
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
815-
!defined(SDL_DISABLE_IMMINTRIN_H) */
816-
817-
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
818-
!defined(SDL_DISABLE_IMMINTRIN_H)
819700
void
820701
blit_blend_rgba_sub_avx2(SDL_BlitInfo *info)
821702
{
@@ -877,17 +758,6 @@ blit_blend_rgba_sub_avx2(SDL_BlitInfo *info)
877758
dstp = (Uint32 *)dstp256 + dstskip;
878759
}
879760
}
880-
#else
881-
void
882-
blit_blend_rgba_sub_avx2(SDL_BlitInfo *info)
883-
{
884-
BAD_AVX2_FUNCTION_CALL;
885-
}
886-
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
887-
!defined(SDL_DISABLE_IMMINTRIN_H) */
888-
889-
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
890-
!defined(SDL_DISABLE_IMMINTRIN_H)
891761
void
892762
blit_blend_rgb_sub_avx2(SDL_BlitInfo *info)
893763
{
@@ -957,17 +827,6 @@ blit_blend_rgb_sub_avx2(SDL_BlitInfo *info)
957827
dstp = (Uint32 *)dstp256 + dstskip;
958828
}
959829
}
960-
#else
961-
void
962-
blit_blend_rgb_sub_avx2(SDL_BlitInfo *info)
963-
{
964-
BAD_AVX2_FUNCTION_CALL;
965-
}
966-
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
967-
!defined(SDL_DISABLE_IMMINTRIN_H) */
968-
969-
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
970-
!defined(SDL_DISABLE_IMMINTRIN_H)
971830
void
972831
blit_blend_rgba_max_avx2(SDL_BlitInfo *info)
973832
{
@@ -1029,17 +888,6 @@ blit_blend_rgba_max_avx2(SDL_BlitInfo *info)
1029888
dstp = (Uint32 *)dstp256 + dstskip;
1030889
}
1031890
}
1032-
#else
1033-
void
1034-
blit_blend_rgba_max_avx2(SDL_BlitInfo *info)
1035-
{
1036-
BAD_AVX2_FUNCTION_CALL;
1037-
}
1038-
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
1039-
!defined(SDL_DISABLE_IMMINTRIN_H) */
1040-
1041-
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
1042-
!defined(SDL_DISABLE_IMMINTRIN_H)
1043891
void
1044892
blit_blend_rgb_max_avx2(SDL_BlitInfo *info)
1045893
{
@@ -1109,17 +957,6 @@ blit_blend_rgb_max_avx2(SDL_BlitInfo *info)
1109957
dstp = (Uint32 *)dstp256 + dstskip;
1110958
}
1111959
}
1112-
#else
1113-
void
1114-
blit_blend_rgb_max_avx2(SDL_BlitInfo *info)
1115-
{
1116-
BAD_AVX2_FUNCTION_CALL;
1117-
}
1118-
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
1119-
!defined(SDL_DISABLE_IMMINTRIN_H) */
1120-
1121-
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
1122-
!defined(SDL_DISABLE_IMMINTRIN_H)
1123960
void
1124961
blit_blend_rgba_min_avx2(SDL_BlitInfo *info)
1125962
{
@@ -1181,17 +1018,6 @@ blit_blend_rgba_min_avx2(SDL_BlitInfo *info)
11811018
dstp = (Uint32 *)dstp256 + dstskip;
11821019
}
11831020
}
1184-
#else
1185-
void
1186-
blit_blend_rgba_min_avx2(SDL_BlitInfo *info)
1187-
{
1188-
BAD_AVX2_FUNCTION_CALL;
1189-
}
1190-
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
1191-
!defined(SDL_DISABLE_IMMINTRIN_H) */
1192-
1193-
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
1194-
!defined(SDL_DISABLE_IMMINTRIN_H)
11951021
void
11961022
blit_blend_rgb_min_avx2(SDL_BlitInfo *info)
11971023
{
@@ -1261,17 +1087,6 @@ blit_blend_rgb_min_avx2(SDL_BlitInfo *info)
12611087
dstp = (Uint32 *)dstp256 + dstskip;
12621088
}
12631089
}
1264-
#else
1265-
void
1266-
blit_blend_rgb_min_avx2(SDL_BlitInfo *info)
1267-
{
1268-
BAD_AVX2_FUNCTION_CALL;
1269-
}
1270-
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
1271-
!defined(SDL_DISABLE_IMMINTRIN_H) */
1272-
1273-
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
1274-
!defined(SDL_DISABLE_IMMINTRIN_H)
12751090
void
12761091
blit_blend_premultiplied_avx2(SDL_BlitInfo *info)
12771092
{
@@ -1521,14 +1336,6 @@ blit_blend_premultiplied_avx2(SDL_BlitInfo *info)
15211336
dstp = (Uint32 *)dstp256 + dstskip;
15221337
}
15231338
}
1524-
#else
1525-
void
1526-
blit_blend_premultiplied_avx2(SDL_BlitInfo *info)
1527-
{
1528-
BAD_AVX2_FUNCTION_CALL;
1529-
}
1530-
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
1531-
!defined(SDL_DISABLE_IMMINTRIN_H) */
15321339

15331340
#define PREMUL_ALPHA_CODE \
15341341
/* extract the alpha */ \
@@ -1558,8 +1365,6 @@ blit_blend_premultiplied_avx2(SDL_BlitInfo *info)
15581365
/*add the original alpha back in*/ \
15591366
mm_dst = _mm256_or_si256(mm_dst, mm_alpha_in);
15601367

1561-
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
1562-
!defined(SDL_DISABLE_IMMINTRIN_H)
15631368
void
15641369
premul_surf_color_by_alpha_avx2(SDL_Surface *src, SDL_Surface *dst)
15651370
{
@@ -1635,11 +1440,4 @@ premul_surf_color_by_alpha_avx2(SDL_Surface *src, SDL_Surface *dst)
16351440
dstp += dst_skip;
16361441
}
16371442
}
1638-
#else
1639-
void
1640-
premul_surf_color_by_alpha_avx2(SDL_Surface *src, SDL_Surface *dst)
1641-
{
1642-
BAD_AVX2_FUNCTION_CALL;
1643-
}
1644-
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
1645-
!defined(SDL_DISABLE_IMMINTRIN_H) */
1443+
#endif /* PG_HAS_AVX2 */

0 commit comments

Comments
 (0)