Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -931,7 +931,17 @@ if(SDL_ASSEMBLY)
cmake_pop_check_state()

if(COMPILER_SUPPORTS_LSX AND HAVE_LSXINTRIN_H)
set_property(SOURCE "${SDL3_SOURCE_DIR}/src/video/yuv2rgb/yuv_rgb_lsx.c" APPEND PROPERTY COMPILE_OPTIONS "-mlsx")
set_property(SOURCE
"${SDL3_SOURCE_DIR}/src/video/yuv2rgb/yuv_rgb_lsx.c"
"${SDL3_SOURCE_DIR}/src/video/SDL_blit_A.c"
"${SDL3_SOURCE_DIR}/src/video/SDL_fillrect.c"
APPEND PROPERTY COMPILE_OPTIONS "-mlsx")

set_property(SOURCE
"${SDL3_SOURCE_DIR}/src/video/yuv2rgb/yuv_rgb_lsx.c"
"${SDL3_SOURCE_DIR}/src/video/SDL_blit_A.c"
"${SDL3_SOURCE_DIR}/src/video/SDL_fillrect.c"
PROPERTY SKIP_PRECOMPILE_HEADERS 1)
set(HAVE_LSX TRUE)
endif()
endif()
Expand Down
6 changes: 4 additions & 2 deletions include/SDL3/SDL_intrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -281,12 +281,14 @@ _m_prefetch(void *__P)
* \sa SDL_TARGETING
*/
#define SDL_HAS_TARGET_ATTRIBS

#elif defined(__loongarch64) && defined(__GNUC__) && (__GNUC__ >= 15)
/* LoongArch requires GCC 15+ for target attribute support */
# define SDL_HAS_TARGET_ATTRIBS
#elif defined(__clang__) && defined(__has_attribute)
# if __has_attribute(target)
# define SDL_HAS_TARGET_ATTRIBS
# endif
#elif defined(__GNUC__) && (__GNUC__ + (__GNUC_MINOR__ >= 9) > 4) /* gcc >= 4.9 */
#elif defined(__GNUC__) && !defined(__loongarch64) && (__GNUC__ + (__GNUC_MINOR__ >= 9) > 4) /* gcc >= 4.9 */
# define SDL_HAS_TARGET_ATTRIBS
#elif defined(__ICC) && __ICC >= 1600
# define SDL_HAS_TARGET_ATTRIBS
Expand Down
102 changes: 102 additions & 0 deletions src/video/SDL_blit_A.c
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,103 @@ static void SDL_TARGETING("sse2") Blit888to888SurfaceAlphaSSE2(SDL_BlitInfo *inf

#endif

#ifdef SDL_LSX_INTRINSICS

static void SDL_TARGETING("lsx") Blit8888to8888PixelAlphaSwizzleLSX(SDL_BlitInfo *info)
{
int width = info->dst_w;
int height = info->dst_h;
Uint8 *src = info->src;
int srcskip = info->src_skip;
Uint8 *dst = info->dst;
int dstskip = info->dst_skip;
const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
bool fill_alpha = !dstfmt->Amask;
Uint32 dstAmask, dstAshift;
const Uint8 offsets[] = {0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12};

SDL_Get8888AlphaMaskAndShift(dstfmt, &dstAmask, &dstAshift);

const __m128i const_0xff00 = __lsx_vreplgr2vr_h(0xff00);
const __m128i const_128 = __lsx_vreplgr2vr_b((Uint8)128);
const __m128i const_32641 = __lsx_vreplgr2vr_h(32641);
const __m128i const_257 = __lsx_vreplgr2vr_h(257);

// The byte offsets for the start of each pixel
const __m128i mask_offsets = __lsx_vld(offsets, 0);

const __m128i convert_mask = __lsx_vadd_w(
__lsx_vreplgr2vr_w(
((srcfmt->Rshift >> 3) << dstfmt->Rshift) |
((srcfmt->Gshift >> 3) << dstfmt->Gshift) |
((srcfmt->Bshift >> 3) << dstfmt->Bshift)),
mask_offsets);

const __m128i alpha_splat_mask = __lsx_vadd_b(__lsx_vreplgr2vr_b(srcfmt->Ashift >> 3), mask_offsets);
const __m128i alpha_fill_mask = __lsx_vreplgr2vr_w((int)dstAmask);

while (height--) {
int i = 0;

for (; i + 4 <= width; i += 4) {
__m128i src128 = __lsx_vld(src, 0);
__m128i dst128 = __lsx_vld(dst, 0);

__m128i srcA = __lsx_vshuf_b(src128, src128, alpha_splat_mask);
src128 = __lsx_vshuf_b(src128, src128, convert_mask);

src128 = __lsx_vor_v(src128, alpha_fill_mask);

__m128i srca_lo = __lsx_vilvl_b(srcA, srcA);
__m128i srca_hi = __lsx_vilvh_b(srcA, srcA);

srca_lo = __lsx_vxor_v(srca_lo, const_0xff00);
srca_hi = __lsx_vxor_v(srca_hi, const_0xff00);

src128 = __lsx_vsub_b(src128, const_128);
dst128 = __lsx_vsub_b(dst128, const_128);

__m128i tmp = __lsx_vilvl_b(dst128, src128);
__m128i dst_lo = __lsx_vsadd_h(__lsx_vmulwev_h_bu_b(srca_lo, tmp), __lsx_vmulwod_h_bu_b(srca_lo, tmp));
tmp = __lsx_vilvh_b(dst128, src128);
__m128i dst_hi = __lsx_vsadd_h(__lsx_vmulwev_h_bu_b(srca_hi, tmp), __lsx_vmulwod_h_bu_b(srca_hi, tmp));

dst_lo = __lsx_vadd_h(dst_lo, const_32641);
dst_hi = __lsx_vadd_h(dst_hi, const_32641);

dst_lo = __lsx_vmuh_hu(dst_lo, const_257);
dst_hi = __lsx_vmuh_hu(dst_hi, const_257);

dst128 = __lsx_vssrarni_bu_h(dst_hi, dst_lo, 0);
if (fill_alpha) {
dst128 = __lsx_vor_v(dst128, alpha_fill_mask);
}
__lsx_vst(dst128, dst, 0);

src += 16;
dst += 16;
}

for (; i < width; ++i) {
Uint32 src32 = *(Uint32 *)src;
Uint32 dst32 = *(Uint32 *)dst;
ALPHA_BLEND_SWIZZLE_8888(src32, dst32, srcfmt, dstfmt);
if (fill_alpha) {
dst32 |= dstAmask;
}
*(Uint32 *)dst = dst32;
src += 4;
dst += 4;
}

src += srcskip;
dst += dstskip;
}
}

#endif

// fast RGB888->(A)RGB888 blending with surface alpha=128 special case
static void BlitRGBtoRGBSurfaceAlpha128(SDL_BlitInfo *info)
{
Expand Down Expand Up @@ -1402,6 +1499,11 @@ SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface)
return Blit8888to8888PixelAlphaSwizzleSSE41;
}
#endif
#ifdef SDL_LSX_INTRINSICS
if (SDL_HasLSX()) {
return Blit8888to8888PixelAlphaSwizzleLSX;
}
#endif
#if defined(SDL_NEON_INTRINSICS) && (__ARM_ARCH >= 8)
// To prevent "unused function" compiler warnings/errors
(void)Blit8888to8888PixelAlpha;
Expand Down
69 changes: 69 additions & 0 deletions src/video/SDL_fillrect.c
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,69 @@ DEFINE_SSE_FILLRECT(4, Uint32)
/* *INDENT-ON* */ // clang-format on
#endif // __SSE__

#ifdef SDL_LSX_INTRINSICS
/* *INDENT-OFF* */ // clang-format off

#define LSX_BEGIN __m128i c128 = __lsx_vreplgr2vr_w(color);

#define LSX_WORK \
for (i = n / 64; i--;) { \
__lsx_vst(c128, p, 0); \
__lsx_vst(c128, p, 16); \
__lsx_vst(c128, p, 32); \
__lsx_vst(c128, p, 48); \
p += 64; \
}

#define DEFINE_LSX_FILLRECT(bpp, type) \
static void SDL_TARGETING("lsx") SDL_FillSurfaceRect##bpp##LSX(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \
{ \
int i, n; \
Uint8 *p = NULL; \
\
/* If the number of bytes per row is equal to the pitch, treat */ \
/* all rows as one long continuous row (for better performance) */ \
if ((w) * (bpp) == pitch) { \
w = w * h; \
h = 1; \
} \
\
LSX_BEGIN; \
\
while (h--) { \
n = (w) * (bpp); \
p = pixels; \
\
if (n > 63) { \
int adjust = 16 - ((uintptr_t)p & 15); \
if (adjust < 16) { \
n -= adjust; \
adjust /= (bpp); \
while (adjust--) { \
*((type *)p) = (type)color; \
p += (bpp); \
} \
} \
LSX_WORK; \
} \
if (n & 63) { \
int remainder = (n & 63); \
remainder /= (bpp); \
while (remainder--) { \
*((type *)p) = (type)color; \
p += (bpp); \
} \
} \
pixels += pitch; \
} \
\
}

DEFINE_LSX_FILLRECT(4, Uint32)

/* *INDENT-ON* */ // clang-format on
#endif /* __LSX__ */

static void SDL_FillSurfaceRect1(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
{
int n;
Expand Down Expand Up @@ -339,6 +402,12 @@ bool SDL_FillSurfaceRects(SDL_Surface *dst, const SDL_Rect *rects, int count, Ui
fill_function = SDL_FillSurfaceRect4SSE;
break;
}
#endif
#ifdef SDL_LSX_INTRINSICS
if (SDL_HasLSX()) {
fill_function = SDL_FillSurfaceRect4LSX;
break;
}
#endif
fill_function = SDL_FillSurfaceRect4;
break;
Expand Down
Loading