Skip to content

Commit

Permalink
Implement fast SSE2 lrint and lrintf, closes libsndfile#660
Browse files Browse the repository at this point in the history
lrint() and lrintf() are ridiculously slow when libsndfile is built
using Visual C++.

On x86 and AMD64 platforms this patch adds SSE2 optimized versions.

It is safe to assume that non-SSE2 processors are no longer available to
avoid unnecessary checks.
  • Loading branch information
evpobr committed Dec 8, 2020
1 parent f7fa0cb commit e7f5137
Show file tree
Hide file tree
Showing 27 changed files with 144 additions and 104 deletions.
3 changes: 3 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
Unreleased
* Implement fast SSE2 optimized psf_lrintf() and psf_lrintf() functions to
improve perfomance when libsndfile is built using Visual C++ (especially)
and other compilers on x86 and AMD64 platforms.
* Documentation:
* Move site to new URL: http://libsndfile.github.io/libsndfile/
* Convert documentation pages from HTML to Markdown
Expand Down
1 change: 1 addition & 0 deletions cmake/SndFileChecks.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ check_include_file (stdint.h HAVE_STDINT_H)
check_include_file (sys/time.h HAVE_SYS_TIME_H)
check_include_file (sys/types.h HAVE_SYS_TYPES_H)
check_include_file (unistd.h HAVE_UNISTD_H)
check_include_file (immintrin.h HAVE_IMMINTRIN_H)

# Never checked
# check_include_file (stdlib.h HAVE_STDLIB_H)
Expand Down
1 change: 1 addition & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ AC_CHECK_HEADERS([endian.h])
AC_CHECK_HEADERS([byteswap.h])
AC_CHECK_HEADERS([locale.h])
AC_CHECK_HEADERS([sys/time.h])
AC_CHECK_HEADERS([immintrin.h])

AC_HEADER_SYS_WAIT

Expand Down
8 changes: 4 additions & 4 deletions src/alaw.c
Original file line number Diff line number Diff line change
Expand Up @@ -340,9 +340,9 @@ static inline void
f2alaw_array (const float *ptr, int count, unsigned char *buffer, float normfact)
{ while (--count >= 0)
{ if (ptr [count] >= 0)
buffer [count] = alaw_encode [lrintf (normfact * ptr [count])] ;
buffer [count] = alaw_encode [psf_lrintf (normfact * ptr [count])] ;
else
buffer [count] = 0x7F & alaw_encode [- lrintf (normfact * ptr [count])] ;
buffer [count] = 0x7F & alaw_encode [- psf_lrintf (normfact * ptr [count])] ;
} ;
} /* f2alaw_array */

Expand All @@ -352,9 +352,9 @@ d2alaw_array (const double *ptr, int count, unsigned char *buffer, double normfa
{ if (!isfinite (ptr [count]))
buffer [count] = 0 ;
else if (ptr [count] >= 0)
buffer [count] = alaw_encode [lrint (normfact * ptr [count])] ;
buffer [count] = alaw_encode [psf_lrint (normfact * ptr [count])] ;
else
buffer [count] = 0x7F & alaw_encode [- lrint (normfact * ptr [count])] ;
buffer [count] = 0x7F & alaw_encode [- psf_lrint (normfact * ptr [count])] ;
} ;
} /* d2alaw_array */

Expand Down
2 changes: 1 addition & 1 deletion src/caf.c
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@ caf_read_header (SF_PRIVATE *psf)
return SFE_MALFORMED_FILE ;
} ;

psf->sf.samplerate = lrint (srate) ;
psf->sf.samplerate = psf_lrint (srate) ;

psf_binheader_readf (psf, "mE44444", &desc.fmt_id, &desc.fmt_flags, &desc.pkt_bytes, &desc.frames_per_packet,
&desc.channels_per_frame, &desc.bits_per_chan) ;
Expand Down
16 changes: 8 additions & 8 deletions src/common.c
Original file line number Diff line number Diff line change
Expand Up @@ -1606,7 +1606,7 @@ psf_f2s_array (const float *src, short *dest, int count, int normalize)

normfact = normalize ? (1.0 * 0x7FFF) : 1.0 ;
while (--count >= 0)
dest [count] = lrintf (src [count] * normfact) ;
dest [count] = psf_lrintf (src [count] * normfact) ;

return ;
} /* psf_f2s_array */
Expand All @@ -1628,7 +1628,7 @@ psf_f2s_clip_array (const float *src, short *dest, int count, int normalize)
continue ;
} ;

dest [count] = lrintf (scaled_value) ;
dest [count] = psf_lrintf (scaled_value) ;
} ;

return ;
Expand All @@ -1640,7 +1640,7 @@ psf_d2s_array (const double *src, short *dest, int count, int normalize)

normfact = normalize ? (1.0 * 0x7FFF) : 1.0 ;
while (--count >= 0)
dest [count] = lrint (src [count] * normfact) ;
dest [count] = psf_lrint (src [count] * normfact) ;

return ;
} /* psf_f2s_array */
Expand All @@ -1662,7 +1662,7 @@ psf_d2s_clip_array (const double *src, short *dest, int count, int normalize)
continue ;
} ;

dest [count] = lrint (scaled_value) ;
dest [count] = psf_lrint (scaled_value) ;
} ;

return ;
Expand All @@ -1675,7 +1675,7 @@ psf_f2i_array (const float *src, int *dest, int count, int normalize)

normfact = normalize ? (1.0 * 0x7FFFFFFF) : 1.0 ;
while (--count >= 0)
dest [count] = lrintf (src [count] * normfact) ;
dest [count] = psf_lrintf (src [count] * normfact) ;

return ;
} /* psf_f2i_array */
Expand All @@ -1697,7 +1697,7 @@ psf_f2i_clip_array (const float *src, int *dest, int count, int normalize)
continue ;
} ;

dest [count] = lrintf (scaled_value) ;
dest [count] = psf_lrintf (scaled_value) ;
} ;

return ;
Expand All @@ -1709,7 +1709,7 @@ psf_d2i_array (const double *src, int *dest, int count, int normalize)

normfact = normalize ? (1.0 * 0x7FFFFFFF) : 1.0 ;
while (--count >= 0)
dest [count] = lrint (src [count] * normfact) ;
dest [count] = psf_lrint (src [count] * normfact) ;

return ;
} /* psf_f2i_array */
Expand All @@ -1731,7 +1731,7 @@ psf_d2i_clip_array (const double *src, int *dest, int count, int normalize)
continue ;
} ;

dest [count] = lrint (scaled_value) ;
dest [count] = psf_lrint (scaled_value) ;
} ;

return ;
Expand Down
28 changes: 28 additions & 0 deletions src/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@
#include "sndfile.h"
#endif

#ifdef USE_SSE2
#include <immintrin.h>
#endif

#ifdef __cplusplus
#error "This code is not designed to be compiled with a C++ compiler."
#endif
Expand Down Expand Up @@ -546,6 +550,8 @@ typedef struct sf_private_tag
SF_CHUNK_ITERATOR * (*next_chunk_iterator) (struct sf_private_tag*, SF_CHUNK_ITERATOR * iterator) ;
int (*get_chunk_size) (struct sf_private_tag*, const SF_CHUNK_ITERATOR * iterator, SF_CHUNK_INFO * chunk_info) ;
int (*get_chunk_data) (struct sf_private_tag*, const SF_CHUNK_ITERATOR * iterator, SF_CHUNK_INFO * chunk_info) ;

int cpu_flags ;
} SF_PRIVATE ;


Expand Down Expand Up @@ -998,6 +1004,28 @@ psf_strlcpy (char *dest, size_t n, const char *src)
dest [n - 1] = 0 ;
} /* psf_strlcpy */

/*------------------------------------------------------------------------------------
** SIMD optimized math functions.
*/

static inline int psf_lrintf (float x)
{
#ifdef USE_SSE2
return _mm_cvtss_si32 (_mm_load_ss (&x)) ;
#else
return lrintf (x) ;
#endif
} /* psf_lrintf */

static inline int psf_lrint (double x)
{
#ifdef USE_SSE2
return _mm_cvtsd_si32 (_mm_load_sd (&x)) ;
#else
return lrint (x) ;
#endif
} /* psf_lrintf */

/*------------------------------------------------------------------------------------
** Other helper functions.
*/
Expand Down
3 changes: 3 additions & 0 deletions src/config.h.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,9 @@
/* Define to 1 if you have the <unistd.h> header file. */
#cmakedefine01 HAVE_UNISTD_H

/* Define to 1 if you have the <immintrin.h> header file. */
#cmakedefine01 HAVE_IMMINTRIN_H

/* Define to 1 if you have the `vsnprintf' function. */
#cmakedefine01 HAVE_VSNPRINTF

Expand Down
16 changes: 8 additions & 8 deletions src/double64.c
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ double64_be_write (double in, unsigned char *out)
out [1] |= (exponent << 4) & 0xF0 ;

in *= 0x20000000 ;
mantissa = lrint (floor (in)) ;
mantissa = psf_lrint (floor (in)) ;

out [1] |= (mantissa >> 24) & 0xF ;
out [2] = (mantissa >> 16) & 0xFF ;
Expand All @@ -377,7 +377,7 @@ double64_be_write (double in, unsigned char *out)

in = fmod (in, 1.0) ;
in *= 0x1000000 ;
mantissa = lrint (floor (in)) ;
mantissa = psf_lrint (floor (in)) ;

out [5] = (mantissa >> 16) & 0xFF ;
out [6] = (mantissa >> 8) & 0xFF ;
Expand Down Expand Up @@ -408,7 +408,7 @@ double64_le_write (double in, unsigned char *out)
out [6] |= (exponent << 4) & 0xF0 ;

in *= 0x20000000 ;
mantissa = lrint (floor (in)) ;
mantissa = psf_lrint (floor (in)) ;

out [6] |= (mantissa >> 24) & 0xF ;
out [5] = (mantissa >> 16) & 0xFF ;
Expand All @@ -417,7 +417,7 @@ double64_le_write (double in, unsigned char *out)

in = fmod (in, 1.0) ;
in *= 0x1000000 ;
mantissa = lrint (floor (in)) ;
mantissa = psf_lrint (floor (in)) ;

out [2] = (mantissa >> 16) & 0xFF ;
out [1] = (mantissa >> 8) & 0xFF ;
Expand Down Expand Up @@ -487,7 +487,7 @@ double64_get_capability (SF_PRIVATE *psf)
static void
d2s_array (const double *src, int count, short *dest, double scale)
{ while (--count >= 0)
{ dest [count] = lrint (scale * src [count]) ;
{ dest [count] = psf_lrint (scale * src [count]) ;
} ;
} /* d2s_array */

Expand All @@ -501,14 +501,14 @@ d2s_clip_array (const double *src, int count, short *dest, double scale)
else if (CPU_CLIPS_NEGATIVE == 0 && tmp < -32768.0)
dest [count] = SHRT_MIN ;
else
dest [count] = lrint (tmp) ;
dest [count] = psf_lrint (tmp) ;
} ;
} /* d2s_clip_array */

static void
d2i_array (const double *src, int count, int *dest, double scale)
{ while (--count >= 0)
{ dest [count] = lrint (scale * src [count]) ;
{ dest [count] = psf_lrint (scale * src [count]) ;
} ;
} /* d2i_array */

Expand All @@ -522,7 +522,7 @@ d2i_clip_array (const double *src, int count, int *dest, double scale)
else if (CPU_CLIPS_NEGATIVE == 0 && tmp < (-1.0 * INT_MAX))
dest [count] = INT_MIN ;
else
dest [count] = lrint (tmp) ;
dest [count] = psf_lrint (tmp) ;
} ;
} /* d2i_clip_array */

Expand Down
4 changes: 2 additions & 2 deletions src/dwvw.c
Original file line number Diff line number Diff line change
Expand Up @@ -628,7 +628,7 @@ dwvw_write_f (SF_PRIVATE *psf, const float *ptr, sf_count_t len)
while (len > 0)
{ writecount = (len >= bufferlen) ? bufferlen : len ;
for (k = 0 ; k < writecount ; k++)
iptr [k] = lrintf (normfact * ptr [total + k]) ;
iptr [k] = psf_lrintf (normfact * ptr [total + k]) ;
count = dwvw_encode_data (psf, pdwvw, iptr, writecount) ;

total += count ;
Expand Down Expand Up @@ -660,7 +660,7 @@ dwvw_write_d (SF_PRIVATE *psf, const double *ptr, sf_count_t len)
while (len > 0)
{ writecount = (len >= bufferlen) ? bufferlen : len ;
for (k = 0 ; k < writecount ; k++)
iptr [k] = lrint (normfact * ptr [total + k]) ;
iptr [k] = psf_lrint (normfact * ptr [total + k]) ;
count = dwvw_encode_data (psf, pdwvw, iptr, writecount) ;

total += count ;
Expand Down
26 changes: 13 additions & 13 deletions src/flac.c
Original file line number Diff line number Diff line change
Expand Up @@ -881,7 +881,7 @@ flac_command (SF_PRIVATE * psf, int command, void * data, int datasize)
*/
quality = (*((double *) data)) * 8.0 ;
/* Clip range. */
pflac->compression = lrint (SF_MAX (0.0, SF_MIN (8.0, quality))) ;
pflac->compression = psf_lrint (SF_MAX (0.0, SF_MIN (8.0, quality))) ;

psf_log_printf (psf, "%s : Setting SFC_SET_COMPRESSION_LEVEL to %u.\n", __func__, pflac->compression) ;

Expand Down Expand Up @@ -1186,7 +1186,7 @@ f2flac8_clip_array (const float *src, int32_t *dest, int count, int normalize)
{ dest [count] = -0x80 ;
continue ;
} ;
dest [count] = lrintf (scaled_value) ;
dest [count] = psf_lrintf (scaled_value) ;
} ;

return ;
Expand All @@ -1208,7 +1208,7 @@ f2flac16_clip_array (const float *src, int32_t *dest, int count, int normalize)
{ dest [count] = -0x8000 ;
continue ;
} ;
dest [count] = lrintf (scaled_value) ;
dest [count] = psf_lrintf (scaled_value) ;
} ;
} /* f2flac16_clip_array */

Expand All @@ -1229,7 +1229,7 @@ f2flac24_clip_array (const float *src, int32_t *dest, int count, int normalize)
{ dest [count] = -0x800000 ;
continue ;
}
dest [count] = lrintf (scaled_value) ;
dest [count] = psf_lrintf (scaled_value) ;
} ;

return ;
Expand All @@ -1240,23 +1240,23 @@ f2flac8_array (const float *src, int32_t *dest, int count, int normalize)
{ float normfact = normalize ? (1.0 * 0x7F) : 1.0 ;

while (--count >= 0)
dest [count] = lrintf (src [count] * normfact) ;
dest [count] = psf_lrintf (src [count] * normfact) ;
} /* f2flac8_array */

static void
f2flac16_array (const float *src, int32_t *dest, int count, int normalize)
{ float normfact = normalize ? (1.0 * 0x7FFF) : 1.0 ;

while (--count >= 0)
dest [count] = lrintf (src [count] * normfact) ;
dest [count] = psf_lrintf (src [count] * normfact) ;
} /* f2flac16_array */

static void
f2flac24_array (const float *src, int32_t *dest, int count, int normalize)
{ float normfact = normalize ? (1.0 * 0x7FFFFF) : 1.0 ;

while (--count >= 0)
dest [count] = lrintf (src [count] * normfact) ;
dest [count] = psf_lrintf (src [count] * normfact) ;
} /* f2flac24_array */

static sf_count_t
Expand Down Expand Up @@ -1317,7 +1317,7 @@ d2flac8_clip_array (const double *src, int32_t *dest, int count, int normalize)
{ dest [count] = -0x80 ;
continue ;
} ;
dest [count] = lrint (scaled_value) ;
dest [count] = psf_lrint (scaled_value) ;
} ;

return ;
Expand All @@ -1339,7 +1339,7 @@ d2flac16_clip_array (const double *src, int32_t *dest, int count, int normalize)
{ dest [count] = -0x8000 ;
continue ;
} ;
dest [count] = lrint (scaled_value) ;
dest [count] = psf_lrint (scaled_value) ;
} ;

return ;
Expand All @@ -1361,7 +1361,7 @@ d2flac24_clip_array (const double *src, int32_t *dest, int count, int normalize)
{ dest [count] = -0x800000 ;
continue ;
} ;
dest [count] = lrint (scaled_value) ;
dest [count] = psf_lrint (scaled_value) ;
} ;

return ;
Expand All @@ -1372,23 +1372,23 @@ d2flac8_array (const double *src, int32_t *dest, int count, int normalize)
{ double normfact = normalize ? (1.0 * 0x7F) : 1.0 ;

while (--count >= 0)
dest [count] = lrint (src [count] * normfact) ;
dest [count] = psf_lrint (src [count] * normfact) ;
} /* d2flac8_array */

static void
d2flac16_array (const double *src, int32_t *dest, int count, int normalize)
{ double normfact = normalize ? (1.0 * 0x7FFF) : 1.0 ;

while (--count >= 0)
dest [count] = lrint (src [count] * normfact) ;
dest [count] = psf_lrint (src [count] * normfact) ;
} /* d2flac16_array */

static void
d2flac24_array (const double *src, int32_t *dest, int count, int normalize)
{ double normfact = normalize ? (1.0 * 0x7FFFFF) : 1.0 ;

while (--count >= 0)
dest [count] = lrint (src [count] * normfact) ;
dest [count] = psf_lrint (src [count] * normfact) ;
} /* d2flac24_array */

static sf_count_t
Expand Down
Loading

0 comments on commit e7f5137

Please sign in to comment.