Skip to content

Get transform compiling on SDL3 #3351

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src_c/_pygame.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@
#define PG_SurfaceHasRLE SDL_SurfaceHasRLE

#define PG_SoftStretchNearest(src, srcrect, dst, dstrect) \
SDL_SoftStretch(src, srcrect, dst, dstrect, SDL_SCALEMODE_NEAREST)
SDL_StretchSurface(src, srcrect, dst, dstrect, SDL_SCALEMODE_NEAREST)

/* Emulating SDL2 SDL_LockMutex API. In SDL3, it returns void. */
static inline int
Expand Down
3 changes: 0 additions & 3 deletions src_c/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,6 @@ image = py.extension_module(
subdir: pg,
)

# TODO: support SDL3
if sdl_api != 3
simd_transform_avx2 = static_library(
'simd_transform_avx2',
'simd_transform_avx2.c',
Expand Down Expand Up @@ -221,7 +219,6 @@ transform = py.extension_module(
install: true,
subdir: pg,
)
endif

mask = py.extension_module(
'mask',
Expand Down
28 changes: 24 additions & 4 deletions src_c/simd_transform.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,24 @@
#define NO_PYGAME_C_API
#include "_surface.h"

/* TODO: This compat code should probably go in some place like simd_shared.h
* That header file however is inconsistently used at the moment and not
* included wherever it should be.
* this block will be needed by simd_blitters and simd_fill */

#if PG_SDL3
// SDL3 no longer includes intrinsics by default, we need to do it explicitly
#include <SDL3/SDL_intrin.h>

/* If SDL_AVX2_INTRINSICS is defined by SDL3, we need to set macros that our
* code checks for avx2 build time support */
#ifdef SDL_AVX2_INTRINSICS
#ifndef HAVE_IMMINTRIN_H
#define HAVE_IMMINTRIN_H 1
#endif /* HAVE_IMMINTRIN_H*/
#endif /* SDL_AVX2_INTRINSICS*/
#endif /* PG_SDL3 */

/**
* MACRO borrowed from SSE2NEON - useful for making the shuffling family of
* intrinsics easier to understand by indicating clearly what will go where.
Expand All @@ -26,7 +44,8 @@
#if defined(__SSE2__) || defined(PG_ENABLE_ARM_NEON)

void
grayscale_sse2(SDL_Surface *src, SDL_Surface *newsurf);
grayscale_sse2(SDL_Surface *src, PG_PixelFormat *src_fmt,
SDL_Surface *newsurf);
// smoothscale filters
void
filter_shrink_X_SSE2(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch,
Expand All @@ -41,12 +60,13 @@ void
filter_expand_Y_SSE2(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch,
int dstpitch, int srcheight, int dstheight);
void
invert_sse2(SDL_Surface *src, SDL_Surface *newsurf);
invert_sse2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf);

#endif /* (defined(__SSE2__) || defined(PG_ENABLE_ARM_NEON)) */

// AVX2 functions
void
grayscale_avx2(SDL_Surface *src, SDL_Surface *newsurf);
grayscale_avx2(SDL_Surface *src, PG_PixelFormat *src_fmt,
SDL_Surface *newsurf);
void
invert_avx2(SDL_Surface *src, SDL_Surface *newsurf);
invert_avx2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf);
17 changes: 8 additions & 9 deletions src_c/simd_transform_avx2.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ pg_avx2_at_runtime_but_uncompiled()
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H)
void
grayscale_avx2(SDL_Surface *src, SDL_Surface *newsurf)
grayscale_avx2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf)
{
/* See the SSE2 code for a simpler overview of this algorithm
* Current AVX2 process
Expand Down Expand Up @@ -85,12 +85,11 @@ grayscale_avx2(SDL_Surface *src, SDL_Surface *newsurf)
Uint32 *srcp = (Uint32 *)src->pixels;
Uint32 *dstp = (Uint32 *)newsurf->pixels;

Uint32 amask = src->format->Amask;
Uint32 amask = src_fmt->Amask;
Uint32 rgbmask = ~amask;

int rgb_weights =
((0x4C << src->format->Rshift) | (0x96 << src->format->Gshift) |
(0x1D << src->format->Bshift));
int rgb_weights = ((0x4C << src_fmt->Rshift) | (0x96 << src_fmt->Gshift) |
(0x1D << src_fmt->Bshift));

__m256i *srcp256 = (__m256i *)src->pixels;
__m256i *dstp256 = (__m256i *)newsurf->pixels;
Expand Down Expand Up @@ -216,7 +215,7 @@ grayscale_avx2(SDL_Surface *src, SDL_Surface *newsurf)
}

void
invert_avx2(SDL_Surface *src, SDL_Surface *newsurf)
invert_avx2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf)
{
int s_row_skip = (src->pitch - src->w * 4) / 4;

Expand All @@ -237,7 +236,7 @@ invert_avx2(SDL_Surface *src, SDL_Surface *newsurf)
Uint32 *srcp = (Uint32 *)src->pixels;
Uint32 *dstp = (Uint32 *)newsurf->pixels;

Uint32 amask = src->format->Amask;
Uint32 amask = src_fmt->Amask;
Uint32 rgbmask = ~amask;

__m256i *srcp256 = (__m256i *)src->pixels;
Expand Down Expand Up @@ -300,12 +299,12 @@ invert_avx2(SDL_Surface *src, SDL_Surface *newsurf)
}
#else
void
grayscale_avx2(SDL_Surface *src, SDL_Surface *newsurf)
grayscale_avx2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf)
{
BAD_AVX2_FUNCTION_CALL;
}
void
invert_avx2(SDL_Surface *src, SDL_Surface *newsurf)
invert_avx2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf)
{
BAD_AVX2_FUNCTION_CALL;
}
Expand Down
19 changes: 9 additions & 10 deletions src_c/simd_transform_sse2.c
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ filter_expand_Y_SSE2(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch,
}

void
grayscale_sse2(SDL_Surface *src, SDL_Surface *newsurf)
grayscale_sse2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf)
{
/* For the SSE2 SIMD version of grayscale we do one pixel at a time
* Thus we can calculate the number of loops (and pixels) by multiplying
Expand Down Expand Up @@ -460,16 +460,15 @@ grayscale_sse2(SDL_Surface *src, SDL_Surface *newsurf)
Uint32 *srcp = (Uint32 *)src->pixels;
Uint32 *dstp = (Uint32 *)newsurf->pixels;

Uint64 amask64 = ((Uint64)src->format->Amask) | src->format->Amask;
Uint64 amask64 = ((Uint64)src_fmt->Amask) | src_fmt->Amask;
Uint64 rgbmask64 = ~amask64;

Uint64 rgb_weights =
((Uint64)((0x4C << src->format->Rshift) |
(0x96 << src->format->Gshift) |
(0x1D << src->format->Bshift))
((Uint64)((0x4C << src_fmt->Rshift) | (0x96 << src_fmt->Gshift) |
(0x1D << src_fmt->Bshift))
<< 32) |
((0x4C << src->format->Rshift) | (0x96 << src->format->Gshift) |
(0x1D << src->format->Bshift));
((0x4C << src_fmt->Rshift) | (0x96 << src_fmt->Gshift) |
(0x1D << src_fmt->Bshift));

Uint64 *srcp64 = (Uint64 *)src->pixels;
Uint64 *dstp64 = (Uint64 *)newsurf->pixels;
Expand Down Expand Up @@ -613,7 +612,7 @@ grayscale_sse2(SDL_Surface *src, SDL_Surface *newsurf)
}

void
invert_sse2(SDL_Surface *src, SDL_Surface *newsurf)
invert_sse2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf)
{
int s_row_skip = (src->pitch - src->w * 4) / 4;

Expand All @@ -638,8 +637,8 @@ invert_sse2(SDL_Surface *src, SDL_Surface *newsurf)
__m128i *srcp128 = (__m128i *)src->pixels;
__m128i *dstp128 = (__m128i *)newsurf->pixels;

mm_rgb_invert_mask = _mm_set1_epi32(~src->format->Amask);
mm_alpha_mask = _mm_set1_epi32(src->format->Amask);
mm_rgb_invert_mask = _mm_set1_epi32(~src_fmt->Amask);
mm_alpha_mask = _mm_set1_epi32(src_fmt->Amask);

while (num_batches--) {
perfect_4_pixels_batch_counter = perfect_4_pixels;
Expand Down
Loading
Loading