Skip to content

Commit 43161f7

Browse files
committed
simplified fill algorithms
1 parent 5cda34f commit 43161f7

File tree

2 files changed

+19
-26
lines changed

2 files changed

+19
-26
lines changed

src_c/simd_surface_fill_avx2.c

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -85,24 +85,20 @@ _pg_has_avx2()
8585
}
8686

8787
/* Setup for RUN_16BIT_SHUFFLE_OUT */
88-
#define SETUP_SHUFFLE \
89-
__m256i shuff_dst, _shuff16_temp, mm256_colorA, mm256_colorB, mm256_zero; \
90-
mm256_zero = _mm256_setzero_si256(); \
91-
mm256_colorA = _mm256_unpacklo_epi8(mm256_color, mm256_zero); \
92-
mm256_colorB = _mm256_unpackhi_epi8(mm256_color, mm256_zero);
88+
#define SETUP_SHUFFLE \
89+
__m256i shuff_dst, _shuff16_temp, mm256_zero = _mm256_setzero_si256(); \
90+
mm256_color = _mm256_unpacklo_epi8(mm256_color, mm256_zero);
9391

9492
#define RUN_16BIT_SHUFFLE_OUT(FILL_CODE) \
9593
/* ==== shuffle pixels out into two registers each, src */ \
9694
/* and dst set up for 16 bit math, like 0A0R0G0B ==== */ \
9795
shuff_dst = _mm256_unpacklo_epi8(mm256_dst, mm256_zero); \
98-
mm256_color = mm256_colorA; \
9996
\
10097
{FILL_CODE} \
10198
\
10299
_shuff16_temp = shuff_dst; \
103100
\
104101
shuff_dst = _mm256_unpackhi_epi8(mm256_dst, mm256_zero); \
105-
mm256_color = mm256_colorB; \
106102
\
107103
{FILL_CODE} \
108104
\

src_c/simd_surface_fill_sse2.c

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -72,26 +72,23 @@ _pg_HasSSE_NEON()
7272

7373
/* Setup for RUN_16BIT_SHUFFLE_OUT */
7474
#define SETUP_SHUFFLE \
75-
__m128i shuff_dst, _shuff16_temp, mm128_colorA, mm128_colorB; \
76-
mm128_colorA = _mm_unpacklo_epi8(mm128_color, _mm_setzero_si128()); \
77-
mm128_colorB = _mm_unpackhi_epi8(mm128_color, _mm_setzero_si128());
75+
__m128i shuff_dst, _shuff16_temp, mm128_zero = _mm_setzero_si128(); \
76+
mm128_color = _mm_unpacklo_epi8(mm128_color, mm128_zero);
7877

79-
#define RUN_16BIT_SHUFFLE_OUT(FILL_CODE) \
80-
/* ==== shuffle pixels out into two registers each, src */ \
81-
/* and dst set up for 16 bit math, like 0A0R0G0B ==== */ \
82-
shuff_dst = _mm_unpacklo_epi8(mm128_dst, _mm_setzero_si128()); \
83-
mm128_color = mm128_colorA; \
84-
\
85-
{FILL_CODE} \
86-
\
87-
_shuff16_temp = shuff_dst; \
88-
\
89-
shuff_dst = _mm_unpackhi_epi8(mm128_dst, _mm_setzero_si128()); \
90-
mm128_color = mm128_colorB; \
91-
\
92-
{FILL_CODE} \
93-
\
94-
/* ==== recombine A and B pixels ==== */ \
78+
#define RUN_16BIT_SHUFFLE_OUT(FILL_CODE) \
79+
/* ==== shuffle pixels out into two registers each, src */ \
80+
/* and dst set up for 16 bit math, like 0A0R0G0B ==== */ \
81+
shuff_dst = _mm_unpacklo_epi8(mm128_dst, mm128_zero); \
82+
\
83+
{FILL_CODE} \
84+
\
85+
_shuff16_temp = shuff_dst; \
86+
\
87+
shuff_dst = _mm_unpackhi_epi8(mm128_dst, mm128_zero); \
88+
\
89+
{FILL_CODE} \
90+
\
91+
/* ==== recombine A and B pixels ==== */ \
9592
mm128_dst = _mm_packus_epi16(_shuff16_temp, shuff_dst);
9693

9794
#define FILLERS(NAME, COLOR_PROCESS_CODE, FILL_CODE) \

0 commit comments

Comments
 (0)