Skip to content

Commit 89f9848

Browse files
authored
Merge pull request #2793 from itzpr3d4t0r/simplify-fill-algorithms
Simplified SIMD fill algorithms
2 parents fdcbd7a + 43161f7 commit 89f9848

File tree

2 files changed

+19
-26
lines changed

2 files changed

+19
-26
lines changed

src_c/simd_surface_fill_avx2.c

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -85,24 +85,20 @@ _pg_has_avx2()
8585
}
8686

8787
/* Setup for RUN_16BIT_SHUFFLE_OUT */
88-
#define SETUP_SHUFFLE \
89-
__m256i shuff_dst, _shuff16_temp, mm256_colorA, mm256_colorB, mm256_zero; \
90-
mm256_zero = _mm256_setzero_si256(); \
91-
mm256_colorA = _mm256_unpacklo_epi8(mm256_color, mm256_zero); \
92-
mm256_colorB = _mm256_unpackhi_epi8(mm256_color, mm256_zero);
88+
#define SETUP_SHUFFLE \
89+
__m256i shuff_dst, _shuff16_temp, mm256_zero = _mm256_setzero_si256(); \
90+
mm256_color = _mm256_unpacklo_epi8(mm256_color, mm256_zero);
9391

9492
#define RUN_16BIT_SHUFFLE_OUT(FILL_CODE) \
9593
/* ==== shuffle pixels out into two registers each, src */ \
9694
/* and dst set up for 16 bit math, like 0A0R0G0B ==== */ \
9795
shuff_dst = _mm256_unpacklo_epi8(mm256_dst, mm256_zero); \
98-
mm256_color = mm256_colorA; \
9996
\
10097
{FILL_CODE} \
10198
\
10299
_shuff16_temp = shuff_dst; \
103100
\
104101
shuff_dst = _mm256_unpackhi_epi8(mm256_dst, mm256_zero); \
105-
mm256_color = mm256_colorB; \
106102
\
107103
{FILL_CODE} \
108104
\

src_c/simd_surface_fill_sse2.c

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -77,26 +77,23 @@ _pg_HasSSE_NEON()
7777

7878
/* Setup for RUN_16BIT_SHUFFLE_OUT */
7979
#define SETUP_SHUFFLE \
80-
__m128i shuff_dst, _shuff16_temp, mm128_colorA, mm128_colorB; \
81-
mm128_colorA = _mm_unpacklo_epi8(mm128_color, _mm_setzero_si128()); \
82-
mm128_colorB = _mm_unpackhi_epi8(mm128_color, _mm_setzero_si128());
80+
__m128i shuff_dst, _shuff16_temp, mm128_zero = _mm_setzero_si128(); \
81+
mm128_color = _mm_unpacklo_epi8(mm128_color, mm128_zero);
8382

84-
#define RUN_16BIT_SHUFFLE_OUT(FILL_CODE) \
85-
/* ==== shuffle pixels out into two registers each, src */ \
86-
/* and dst set up for 16 bit math, like 0A0R0G0B ==== */ \
87-
shuff_dst = _mm_unpacklo_epi8(mm128_dst, _mm_setzero_si128()); \
88-
mm128_color = mm128_colorA; \
89-
\
90-
{FILL_CODE} \
91-
\
92-
_shuff16_temp = shuff_dst; \
93-
\
94-
shuff_dst = _mm_unpackhi_epi8(mm128_dst, _mm_setzero_si128()); \
95-
mm128_color = mm128_colorB; \
96-
\
97-
{FILL_CODE} \
98-
\
99-
/* ==== recombine A and B pixels ==== */ \
83+
#define RUN_16BIT_SHUFFLE_OUT(FILL_CODE) \
84+
/* ==== shuffle pixels out into two registers each, src */ \
85+
/* and dst set up for 16 bit math, like 0A0R0G0B ==== */ \
86+
shuff_dst = _mm_unpacklo_epi8(mm128_dst, mm128_zero); \
87+
\
88+
{FILL_CODE} \
89+
\
90+
_shuff16_temp = shuff_dst; \
91+
\
92+
shuff_dst = _mm_unpackhi_epi8(mm128_dst, mm128_zero); \
93+
\
94+
{FILL_CODE} \
95+
\
96+
/* ==== recombine A and B pixels ==== */ \
10097
mm128_dst = _mm_packus_epi16(_shuff16_temp, shuff_dst);
10198

10299
#define FILLERS(NAME, COLOR_PROCESS_CODE, FILL_CODE) \

0 commit comments

Comments
 (0)