Use builtins for byteswapping

Make use of XCHG/BSWAP on x86, REV16 and REV on ARMv6-m and above, and whatever other optimized instructions on other platforms. Defines extra CONFIG variables, and removes the unused functions for endian-swapping. Fixes some oversights in using the macros.
lucic71 · Jun 29, 2022 · 903a186 · 903a186
1 parent 7e6d93d
commit 903a186
Show file tree

Hide file tree

Showing 14 changed files with 54 additions and 225 deletions.
diff --git a/binfmt/libnxflat/libnxflat_init.c b/binfmt/libnxflat/libnxflat_init.c
@@ -168,7 +168,7 @@ int nxflat_init(const char *filename, struct nxflat_loadinfo_s *loadinfo)
    */
 
   loadinfo->relocstart  = NTOHL(loadinfo->header.h_relocstart);
-  loadinfo->reloccount  = ntohs(loadinfo->header.h_reloccount);
+  loadinfo->reloccount  = NTOHS(loadinfo->header.h_reloccount);
 
   return 0;
 }
diff --git a/include/endian.h b/include/endian.h
@@ -51,23 +51,40 @@
 
 /* Common byte swapping macros */
 
-#define __SWAP_UINT16_ISMACRO 1
-#undef  __SWAP_UINT32_ISMACRO
-
-#ifdef __SWAP_UINT16_ISMACRO
+#ifdef CONFIG_HAVE_BUILTIN_BSWAP16
+#  define __swap_uint16 __builtin_bswap16
+#else
 #  define __swap_uint16(n) \
     (uint16_t)(((((uint16_t)(n)) & 0x00ff) << 8) | \
                ((((uint16_t)(n)) >> 8) & 0x00ff))
 #endif
 
-#ifdef __SWAP_UINT32_ISMACRO
+#ifdef CONFIG_HAVE_BUILTIN_BSWAP32
+#  define __swap_uint32 __builtin_bswap32
+#else
 #  define __swap_uint32(n) \
     (uint32_t)(((((uint32_t)(n)) & 0x000000ffUL) << 24) | \
                ((((uint32_t)(n)) & 0x0000ff00UL) <<  8) | \
                ((((uint32_t)(n)) & 0x00ff0000UL) >>  8) | \
                ((((uint32_t)(n)) & 0xff000000UL) >> 24))
 #endif
 
+#ifdef CONFIG_HAVE_LONG_LONG
+#  ifdef CONFIG_HAVE_BUILTIN_BSWAP64
+#    define __swap_uint64 __builtin_bswap64
+#  else
+#    define __swap_uint64(n) \
+        (uint64_t)(((((uint64_t)(n)) & 0x00000000000000ffULL) << 56) | \
+                   ((((uint64_t)(n)) & 0x000000000000ff00ULL) << 40) | \
+                   ((((uint64_t)(n)) & 0x0000000000ff0000ULL) << 24) | \
+                   ((((uint64_t)(n)) & 0x00000000ff000000ULL) <<  8) | \
+                   ((((uint64_t)(n)) & 0x000000ff00000000ULL) >>  8) | \
+                   ((((uint64_t)(n)) & 0x0000ff0000000000ULL) >> 24) | \
+                   ((((uint64_t)(n)) & 0x00ff000000000000ULL) >> 40) | \
+                   ((((uint64_t)(n)) & 0xff00000000000000ULL) >> 56))
+#  endif
+#endif
+
 /* Endian-specific definitions */
 
 #ifdef CONFIG_ENDIAN_BIG
@@ -120,21 +137,4 @@
 #    define le64toh(n)        (n)
 #  endif
 #endif
-
-/****************************************************************************
- * Public Function Prototypes
- ****************************************************************************/
-
-#ifndef __SWAP_UINT16_ISMACRO
-uint16_t __swap_uint16(uint16_t n);
-#endif
-
-#ifndef __SWAP_UINT32_ISMACRO
-uint32_t __swap_uint32(uint32_t n);
-#endif
-
-#if CONFIG_HAVE_LONG_LONG
-uint64_t __swap_uint64(uint64_t n);
-#endif
-
 #endif /* __INCLUDE_ENDIAN_H */
diff --git a/include/netinet/in.h b/include/netinet/in.h
@@ -30,6 +30,7 @@
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <stdint.h>
+#include <endian.h>
 
 /****************************************************************************
  * Pre-processor Definitions
@@ -217,26 +218,14 @@
 /* This macro to convert a 16/32-bit constant values quantity from host byte
  * order to network byte order.  The 16-bit version of this macro is required
  * for uIP:
- *
- *   Author Adam Dunkels <adam@dunkels.com>
- *   Copyright (c) 2001-2003, Adam Dunkels.
- *   All rights reserved.
  */
 
 #ifdef CONFIG_ENDIAN_BIG
-# define HTONS(ns) (ns)
-# define HTONL(nl) (nl)
+#  define HTONS(ns) (ns)
+#  define HTONL(nl) (nl)
 #else
-# define HTONS(ns) \
-  (unsigned short) \
-    (((((unsigned short)(ns)) & 0x00ff) << 8) | \
-     ((((unsigned short)(ns)) >> 8) & 0x00ff))
-# define HTONL(nl) \
-  (unsigned long) \
-    (((((unsigned long)(nl)) & 0x000000ffUL) << 24) | \
-     ((((unsigned long)(nl)) & 0x0000ff00UL) <<  8) | \
-     ((((unsigned long)(nl)) & 0x00ff0000UL) >>  8) | \
-     ((((unsigned long)(nl)) & 0xff000000UL) >> 24))
+#  define HTONS __swap_uint16
+#  define HTONL __swap_uint32
 #endif
 
 #define NTOHS(hs) HTONS(hs)

diff --git a/include/nuttx/compiler.h b/include/nuttx/compiler.h
@@ -73,6 +73,20 @@
 
 #ifdef __GNUC__
 
+/* Built-ins */
+#  if __GNUC__ >= 4
+#    define CONFIG_HAVE_BUILTIN_BSWAP16 1
+#    define CONFIG_HAVE_BUILTIN_BSWAP32 1
+#    define CONFIG_HAVE_BUILTIN_BSWAP64 1
+#    define CONFIG_HAVE_BUILTIN_CTZ 1
+#    define CONFIG_HAVE_BUILTIN_CLZ 1
+#    define CONFIG_HAVE_BUILTIN_POPCOUNT 1
+#    define CONFIG_HAVE_BUILTIN_POPCOUNTLL 1
+#    define CONFIG_HAVE_BUILTIN_FFS 1
+#    define CONFIG_HAVE_BUILTIN_FFSL 1
+#    define CONFIG_HAVE_BUILTIN_FFSLL 1
+#  endif
+
 /* Pre-processor */
 
 #  define CONFIG_CPP_HAVE_VARARGS 1 /* Supports variable argument macros */
@@ -103,18 +117,6 @@
 
 #  define offsetof(a, b) __builtin_offsetof(a, b)
 
-/* GCC 4.x have __builtin_ctz(|l|ll) and __builtin_clz(|l|ll). These count
- * trailing/leading zeros of input number and typically will generate few
- * fast bit-counting instructions. Inputting zero to these functions is
- * undefined and needs to be taken care of by the caller.
- */
-
-#  if __GNUC__ >= 4
-#    define CONFIG_HAVE_BUILTIN_CTZ      1
-#    define CONFIG_HAVE_BUILTIN_CLZ      1
-#    define CONFIG_HAVE_BUILTIN_POPCOUNT 1
-#  endif
-
 /* Attributes
  *
  * GCC supports weak symbols which can be used to reduce code size because

diff --git a/libs/libc/Makefile b/libs/libc/Makefile
@@ -27,7 +27,6 @@ include builtin/Make.defs
 include ctype/Make.defs
 include dirent/Make.defs
 include dlfcn/Make.defs
-include endian/Make.defs
 include errno/Make.defs
 include eventfd/Make.defs
 include fixedmath/Make.defs

diff --git a/libs/libc/endian/Make.defs b/libs/libc/endian/Make.defs
diff --git a/libs/libc/endian/lib_swap16.c b/libs/libc/endian/lib_swap16.c
diff --git a/libs/libc/endian/lib_swap32.c b/libs/libc/endian/lib_swap32.c
diff --git a/libs/libc/endian/lib_swap64.c b/libs/libc/endian/lib_swap64.c
diff --git a/libs/libc/net/lib_htonl.c b/libs/libc/net/lib_htonl.c
@@ -33,21 +33,10 @@
 
 uint32_t htonl(uint32_t hl)
 {
-#ifdef CONFIG_ENDIAN_BIG
-  return hl;
-#else
-  return (((hl) >> 24) |
-          (((hl) >>  8) & 0x0000ff00) |
-          (((hl) <<  8) & 0x00ff0000) |
-           ((hl) << 24));
-#endif
+  return HTONL(hl);
 }
 
 uint32_t ntohl(uint32_t nl)
 {
-#ifdef CONFIG_ENDIAN_BIG
-  return nl;
-#else
-  return htonl(nl);
-#endif
+  return NTOHL(nl);
 }
diff --git a/libs/libc/net/lib_htons.c b/libs/libc/net/lib_htons.c
@@ -38,9 +38,5 @@ uint16_t htons(uint16_t hs)
 
 uint16_t ntohs(uint16_t ns)
 {
-#ifdef CONFIG_ENDIAN_BIG
-  return ns;
-#else
-  return HTONS(ns);
-#endif
+  return NTOHS(ns);
 }
diff --git a/libs/libc/string/lib_ffs.c b/libs/libc/string/lib_ffs.c
@@ -55,7 +55,9 @@ int ffs(int j)
 
   if (j != 0)
     {
-#ifdef CONFIG_HAVE_BUILTIN_CTZ
+#ifdef CONFIG_HAVE_BUILTIN_FFS
+      ret = __builtin_ffs(j);
+#elif defined (CONFIG_HAVE_BUILTIN_CTZ)
       /* Count trailing zeros function can be used to implement ffs. */
 
       ret = __builtin_ctz(j) + 1;

diff --git a/libs/libc/string/lib_ffsl.c b/libs/libc/string/lib_ffsl.c
@@ -55,7 +55,9 @@ int ffsl(long j)
 
   if (j != 0)
     {
-#ifdef CONFIG_HAVE_BUILTIN_CTZ
+#ifdef CONFIG_HAVE_BUILTIN_FFSL
+      ret = __builtin_ffsl(j);
+#elif defined (CONFIG_HAVE_BUILTIN_CTZ)
       /* Count trailing zeros function can be used to implement ffs. */
 
       ret = __builtin_ctzl(j) + 1;

diff --git a/libs/libc/string/lib_ffsll.c b/libs/libc/string/lib_ffsll.c
@@ -57,7 +57,9 @@ int ffsll(long long j)
 
   if (j != 0)
     {
-#ifdef CONFIG_HAVE_BUILTIN_CTZ
+#ifdef CONFIG_HAVE_BUILTIN_FFSLL
+      ret = __builtin_ffsll(j);
+#elif defined (CONFIG_HAVE_BUILTIN_CTZ)
       /* Count trailing zeros function can be used to implement ffs. */
 
       ret = __builtin_ctzll(j) + 1;