pygame-community · ankith26 · Apr 28, 2025 · Feb 26, 2025
@@ -84,7 +84,7 @@
 #define PG_SurfaceHasRLE SDL_SurfaceHasRLE
 
 #define PG_SoftStretchNearest(src, srcrect, dst, dstrect) \
-    SDL_SoftStretch(src, srcrect, dst, dstrect, SDL_SCALEMODE_NEAREST)
+    SDL_StretchSurface(src, srcrect, dst, dstrect, SDL_SCALEMODE_NEAREST)
 
 /* Emulating SDL2 SDL_LockMutex API. In SDL3, it returns void. */
 static inline int

@@ -179,8 +179,6 @@ image = py.extension_module(
     subdir: pg,
 )
 
-# TODO: support SDL3
-if sdl_api != 3
 simd_transform_avx2 = static_library(
     'simd_transform_avx2',
     'simd_transform_avx2.c',
@@ -221,7 +219,6 @@ transform = py.extension_module(
     install: true,
     subdir: pg,
 )
-endif
 
 mask = py.extension_module(
     'mask',

@@ -1,6 +1,24 @@
 #define NO_PYGAME_C_API
 #include "_surface.h"
 
+/* TODO: This compat code should probably go in some place like simd_shared.h
+ * That header file however is inconsistently used at the moment and not
+ * included wherever it should be.
+ * this block will be needed by simd_blitters and simd_fill */
+
+#if PG_SDL3
+// SDL3 no longer includes intrinsics by default, we need to do it explicitly
+#include <SDL3/SDL_intrin.h>
+
+/* If SDL_AVX2_INTRINSICS is defined by SDL3, we need to set macros that our
+ * code checks for avx2 build time support */
+#ifdef SDL_AVX2_INTRINSICS
+#ifndef HAVE_IMMINTRIN_H
+#define HAVE_IMMINTRIN_H 1
+#endif /* HAVE_IMMINTRIN_H*/
+#endif /* SDL_AVX2_INTRINSICS*/
+#endif /* PG_SDL3 */
+
 /**
  * MACRO borrowed from SSE2NEON - useful for making the shuffling family of
  * intrinsics easier to understand by indicating clearly what will go where.
@@ -26,7 +44,8 @@
 #if defined(__SSE2__) || defined(PG_ENABLE_ARM_NEON)
 
 void
-grayscale_sse2(SDL_Surface *src, SDL_Surface *newsurf);
+grayscale_sse2(SDL_Surface *src, PG_PixelFormat *src_fmt,
+               SDL_Surface *newsurf);
 // smoothscale filters
 void
 filter_shrink_X_SSE2(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch,
@@ -41,12 +60,13 @@ void
 filter_expand_Y_SSE2(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch,
                      int dstpitch, int srcheight, int dstheight);
 void
-invert_sse2(SDL_Surface *src, SDL_Surface *newsurf);
+invert_sse2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf);
 
 #endif /* (defined(__SSE2__) || defined(PG_ENABLE_ARM_NEON)) */
 
 // AVX2 functions
 void
-grayscale_avx2(SDL_Surface *src, SDL_Surface *newsurf);
+grayscale_avx2(SDL_Surface *src, PG_PixelFormat *src_fmt,
+               SDL_Surface *newsurf);
 void
-invert_avx2(SDL_Surface *src, SDL_Surface *newsurf);
+invert_avx2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf);
@@ -46,7 +46,7 @@ pg_avx2_at_runtime_but_uncompiled()
 #if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
     !defined(SDL_DISABLE_IMMINTRIN_H)
 void
-grayscale_avx2(SDL_Surface *src, SDL_Surface *newsurf)
+grayscale_avx2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf)
 {
     /* See the SSE2 code for a simpler overview of this algorithm
      * Current AVX2 process
@@ -85,12 +85,11 @@ grayscale_avx2(SDL_Surface *src, SDL_Surface *newsurf)
     Uint32 *srcp = (Uint32 *)src->pixels;
     Uint32 *dstp = (Uint32 *)newsurf->pixels;
 
-    Uint32 amask = src->format->Amask;
+    Uint32 amask = src_fmt->Amask;
     Uint32 rgbmask = ~amask;
 
-    int rgb_weights =
-        ((0x4C << src->format->Rshift) | (0x96 << src->format->Gshift) |
-         (0x1D << src->format->Bshift));
+    int rgb_weights = ((0x4C << src_fmt->Rshift) | (0x96 << src_fmt->Gshift) |
+                       (0x1D << src_fmt->Bshift));
 
     __m256i *srcp256 = (__m256i *)src->pixels;
     __m256i *dstp256 = (__m256i *)newsurf->pixels;
@@ -216,7 +215,7 @@ grayscale_avx2(SDL_Surface *src, SDL_Surface *newsurf)
 }
 
 void
-invert_avx2(SDL_Surface *src, SDL_Surface *newsurf)
+invert_avx2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf)
 {
     int s_row_skip = (src->pitch - src->w * 4) / 4;
 
@@ -237,7 +236,7 @@ invert_avx2(SDL_Surface *src, SDL_Surface *newsurf)
     Uint32 *srcp = (Uint32 *)src->pixels;
     Uint32 *dstp = (Uint32 *)newsurf->pixels;
 
-    Uint32 amask = src->format->Amask;
+    Uint32 amask = src_fmt->Amask;
     Uint32 rgbmask = ~amask;
 
     __m256i *srcp256 = (__m256i *)src->pixels;
@@ -300,12 +299,12 @@ invert_avx2(SDL_Surface *src, SDL_Surface *newsurf)
 }
 #else
 void
-grayscale_avx2(SDL_Surface *src, SDL_Surface *newsurf)
+grayscale_avx2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf)
 {
     BAD_AVX2_FUNCTION_CALL;
 }
 void
-invert_avx2(SDL_Surface *src, SDL_Surface *newsurf)
+invert_avx2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf)
 {
     BAD_AVX2_FUNCTION_CALL;
 }

@@ -424,7 +424,7 @@ filter_expand_Y_SSE2(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch,
 }
 
 void
-grayscale_sse2(SDL_Surface *src, SDL_Surface *newsurf)
+grayscale_sse2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf)
 {
     /* For the SSE2 SIMD version of grayscale we do one pixel at a time
      * Thus we can calculate the number of loops (and pixels) by multiplying
@@ -460,16 +460,15 @@ grayscale_sse2(SDL_Surface *src, SDL_Surface *newsurf)
     Uint32 *srcp = (Uint32 *)src->pixels;
     Uint32 *dstp = (Uint32 *)newsurf->pixels;
 
-    Uint64 amask64 = ((Uint64)src->format->Amask) | src->format->Amask;
+    Uint64 amask64 = ((Uint64)src_fmt->Amask) | src_fmt->Amask;
     Uint64 rgbmask64 = ~amask64;
 
     Uint64 rgb_weights =
-        ((Uint64)((0x4C << src->format->Rshift) |
-                  (0x96 << src->format->Gshift) |
-                  (0x1D << src->format->Bshift))
+        ((Uint64)((0x4C << src_fmt->Rshift) | (0x96 << src_fmt->Gshift) |
+                  (0x1D << src_fmt->Bshift))
          << 32) |
-        ((0x4C << src->format->Rshift) | (0x96 << src->format->Gshift) |
-         (0x1D << src->format->Bshift));
+        ((0x4C << src_fmt->Rshift) | (0x96 << src_fmt->Gshift) |
+         (0x1D << src_fmt->Bshift));
 
     Uint64 *srcp64 = (Uint64 *)src->pixels;
     Uint64 *dstp64 = (Uint64 *)newsurf->pixels;
@@ -613,7 +612,7 @@ grayscale_sse2(SDL_Surface *src, SDL_Surface *newsurf)
 }
 
 void
-invert_sse2(SDL_Surface *src, SDL_Surface *newsurf)
+invert_sse2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf)
 {
     int s_row_skip = (src->pitch - src->w * 4) / 4;
 
@@ -638,8 +637,8 @@ invert_sse2(SDL_Surface *src, SDL_Surface *newsurf)
     __m128i *srcp128 = (__m128i *)src->pixels;
     __m128i *dstp128 = (__m128i *)newsurf->pixels;
 
-    mm_rgb_invert_mask = _mm_set1_epi32(~src->format->Amask);
-    mm_alpha_mask = _mm_set1_epi32(src->format->Amask);
+    mm_rgb_invert_mask = _mm_set1_epi32(~src_fmt->Amask);
+    mm_alpha_mask = _mm_set1_epi32(src_fmt->Amask);
 
     while (num_batches--) {
         perfect_4_pixels_batch_counter = perfect_4_pixels;