From 19b37fe7b76b885d714fe869dfd9993740bb3c58 Mon Sep 17 00:00:00 2001
From: Felix Johnny <felixjohnny.thomasmathibalan@arm.com>
Date: Fri, 19 Feb 2021 14:28:55 +0100
Subject: [PATCH] CMSIS-Core : Add support for __SXTAB16_RORn

Targetted optimization for GCC when  __SXTAB
is used with __ROR.

Updated description for __SXTB16_RORn

Change-Id: I7fbb9afb0a2d5a2f2b239d27af7177a1607ac9a1

Fix review comment: Update description
---
 CMSIS/Core/Include/cmsis_armcc.h          |   14 +-
 CMSIS/Core/Include/cmsis_armclang.h       |   14 +-
 CMSIS/Core/Include/cmsis_armclang_ltm.h   |   10 +-
 CMSIS/Core/Include/cmsis_gcc.h            |   12 +
 CMSIS/Core/Include/cmsis_iccarm.h         |    2 +
 CMSIS/CoreValidation/Source/CV_CoreSimd.c |   29 +-
 CMSIS/DoxyGen/Core/src/Ref_cm4_simd.txt   | 1172 +++++++++++----------
 7 files changed, 665 insertions(+), 588 deletions(-)

diff --git a/CMSIS/Core/Include/cmsis_armcc.h b/CMSIS/Core/Include/cmsis_armcc.h
index 237ff6ec3e..ced0a2c65d 100644
--- a/CMSIS/Core/Include/cmsis_armcc.h
+++ b/CMSIS/Core/Include/cmsis_armcc.h
@@ -1,11 +1,11 @@
 /**************************************************************************//**
  * @file     cmsis_armcc.h
  * @brief    CMSIS compiler ARMCC (Arm Compiler 5) header file
- * @version  V5.2.1
- * @date     26. March 2020
+ * @version  V5.3.0
+ * @date     19. February 2021
  ******************************************************************************/
 /*
- * Copyright (c) 2009-2020 Arm Limited. All rights reserved.
+ * Copyright (c) 2009-2021 Arm Limited. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -63,9 +63,9 @@
 #ifndef   __STATIC_INLINE
   #define __STATIC_INLINE                        static __inline
 #endif
-#ifndef   __STATIC_FORCEINLINE                 
+#ifndef   __STATIC_FORCEINLINE
   #define __STATIC_FORCEINLINE                   static __forceinline
-#endif           
+#endif
 #ifndef   __NO_RETURN
   #define __NO_RETURN                            __declspec(noreturn)
 #endif
@@ -461,7 +461,7 @@ __STATIC_INLINE void __set_FPSCR(uint32_t fpscr)
  */
 #define __DMB()                           __dmb(0xF)
 
-                  
+
 /**
   \brief   Reverse byte order (32 bit)
   \details Reverses the byte order in unsigned integer value. For example, 0x12345678 becomes 0x78563412.
@@ -878,6 +878,8 @@ __attribute__((always_inline)) __STATIC_INLINE uint32_t __USAT(int32_t val, uint
 
 #define __SXTB16_RORn(ARG1, ARG2)        __SXTB16(__ROR(ARG1, ARG2))
 
+#define __SXTAB16_RORn(ARG1, ARG2, ARG3) __SXTAB16(ARG1, __ROR(ARG2, ARG3))
+
 #endif /* ((defined (__ARM_ARCH_7EM__) && (__ARM_ARCH_7EM__ == 1))     ) */
 /*@} end of group CMSIS_SIMD_intrinsics */
 
diff --git a/CMSIS/Core/Include/cmsis_armclang.h b/CMSIS/Core/Include/cmsis_armclang.h
index 05d6396c07..b14038c25f 100644
--- a/CMSIS/Core/Include/cmsis_armclang.h
+++ b/CMSIS/Core/Include/cmsis_armclang.h
@@ -1,11 +1,11 @@
 /**************************************************************************//**
  * @file     cmsis_armclang.h
  * @brief    CMSIS compiler armclang (Arm Compiler 6) header file
- * @version  V5.3.2
- * @date     16. December 2020
+ * @version  V5.4.0
+ * @date     19. February 2020
  ******************************************************************************/
 /*
- * Copyright (c) 2009-2020 Arm Limited. All rights reserved.
+ * Copyright (c) 2009-2021 Arm Limited. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -617,7 +617,7 @@ __STATIC_FORCEINLINE void __TZ_set_FAULTMASK_NS(uint32_t faultMask)
   Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
   Stack Pointer Limit register hence zero is returned always in non-secure
   mode.
-  
+
   \details Returns the current value of the Process Stack Pointer Limit (PSPLIM).
   \return               PSPLIM Register value
  */
@@ -665,7 +665,7 @@ __STATIC_FORCEINLINE uint32_t __TZ_get_PSPLIM_NS(void)
   Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
   Stack Pointer Limit register hence the write is silently ignored in non-secure
   mode.
-  
+
   \details Assigns the given value to the Process Stack Pointer Limit (PSPLIM).
   \param [in]    ProcStackPtrLimit  Process Stack Pointer Limit value to set
  */
@@ -1248,7 +1248,7 @@ __STATIC_FORCEINLINE uint32_t __USAT(int32_t val, uint32_t sat)
 #if ((defined (__ARM_ARCH_8M_MAIN__  ) && (__ARM_ARCH_8M_MAIN__   == 1)) || \
      (defined (__ARM_ARCH_8M_BASE__  ) && (__ARM_ARCH_8M_BASE__   == 1)) || \
      (defined (__ARM_ARCH_8_1M_MAIN__) && (__ARM_ARCH_8_1M_MAIN__ == 1))     )
-           
+
 /**
   \brief   Load-Acquire (8 bit)
   \details Executes a LDAB instruction for 8 bit value.
@@ -1472,6 +1472,8 @@ __STATIC_FORCEINLINE void __STL(uint32_t value, volatile uint32_t *ptr)
 
 #define __SXTB16_RORn(ARG1, ARG2)        __SXTB16(__ROR(ARG1, ARG2))
 
+#define __SXTAB16_RORn(ARG1, ARG2, ARG3) __SXTAB16(ARG1, __ROR(ARG2, ARG3))
+
 __STATIC_FORCEINLINE int32_t __SMMLA (int32_t op1, int32_t op2, int32_t op3)
 {
   int32_t result;
diff --git a/CMSIS/Core/Include/cmsis_armclang_ltm.h b/CMSIS/Core/Include/cmsis_armclang_ltm.h
index ff362cf2a1..3972d01307 100644
--- a/CMSIS/Core/Include/cmsis_armclang_ltm.h
+++ b/CMSIS/Core/Include/cmsis_armclang_ltm.h
@@ -1,8 +1,8 @@
 /**************************************************************************//**
  * @file     cmsis_armclang_ltm.h
  * @brief    CMSIS compiler armclang (Arm Compiler 6) header file
- * @version  V1.4.0
- * @date     29. January 2021
+ * @version  V1.5.0
+ * @date     19. February 2021
  ******************************************************************************/
 /*
  * Copyright (c) 2018-2021 Arm Limited. All rights reserved.
@@ -614,7 +614,7 @@ __STATIC_FORCEINLINE void __TZ_set_FAULTMASK_NS(uint32_t faultMask)
   Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
   Stack Pointer Limit register hence zero is returned always in non-secure
   mode.
-  
+
   \details Returns the current value of the Process Stack Pointer Limit (PSPLIM).
   \return               PSPLIM Register value
  */
@@ -660,7 +660,7 @@ __STATIC_FORCEINLINE uint32_t __TZ_get_PSPLIM_NS(void)
   Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
   Stack Pointer Limit register hence the write is silently ignored in non-secure
   mode.
-  
+
   \details Assigns the given value to the Process Stack Pointer Limit (PSPLIM).
   \param [in]    ProcStackPtrLimit  Process Stack Pointer Limit value to set
  */
@@ -1897,6 +1897,8 @@ __STATIC_FORCEINLINE  int32_t __QSUB( int32_t op1,  int32_t op2)
 
 #define __SXTB16_RORn(ARG1, ARG2)        __SXTB16(__ROR(ARG1, ARG2))
 
+#define __SXTAB16_RORn(ARG1, ARG2, ARG3) __SXTAB16(ARG1, __ROR(ARG2, ARG3))
+
 __STATIC_FORCEINLINE int32_t __SMMLA (int32_t op1, int32_t op2, int32_t op3)
 {
   int32_t result;
diff --git a/CMSIS/Core/Include/cmsis_gcc.h b/CMSIS/Core/Include/cmsis_gcc.h
index c6a81e8509..13fb64f122 100644
--- a/CMSIS/Core/Include/cmsis_gcc.h
+++ b/CMSIS/Core/Include/cmsis_gcc.h
@@ -2003,6 +2003,18 @@ __STATIC_FORCEINLINE uint32_t __SXTAB16(uint32_t op1, uint32_t op2)
   return(result);
 }
 
+__STATIC_FORCEINLINE uint32_t __SXTAB16_RORn(uint32_t op1, uint32_t op2, uint32_t rotate)
+{
+  uint32_t result;
+  if (__builtin_constant_p(rotate) && ((rotate == 8U) || (rotate == 16U) || (rotate == 24U))) {
+    __ASM volatile ("sxtab16 %0, %1, %2, ROR %3" : "=r" (result) : "r" (op1) , "r" (op2) , "i" (rotate));
+  } else {
+    result = __SXTAB16(op1, __ROR(op2, rotate));
+  }
+  return result;
+}
+
+
 __STATIC_FORCEINLINE uint32_t __SMUAD  (uint32_t op1, uint32_t op2)
 {
   uint32_t result;
diff --git a/CMSIS/Core/Include/cmsis_iccarm.h b/CMSIS/Core/Include/cmsis_iccarm.h
index 58e8e360ce..45e90af6c2 100644
--- a/CMSIS/Core/Include/cmsis_iccarm.h
+++ b/CMSIS/Core/Include/cmsis_iccarm.h
@@ -966,4 +966,6 @@ __packed struct  __iar_u32 { uint32_t v; };
 
 #define __SXTB16_RORn(ARG1, ARG2) __SXTB16(__ROR(ARG1, ARG2))
 
+#define __SXTAB16_RORn(ARG1, ARG2, ARG3) __SXTAB16(ARG1, __ROR(ARG2, ARG3))
+
 #endif /* __CMSIS_ICCARM_H__ */
diff --git a/CMSIS/CoreValidation/Source/CV_CoreSimd.c b/CMSIS/CoreValidation/Source/CV_CoreSimd.c
index e3725d961f..8068537cd3 100644
--- a/CMSIS/CoreValidation/Source/CV_CoreSimd.c
+++ b/CMSIS/CoreValidation/Source/CV_CoreSimd.c
@@ -85,6 +85,7 @@ void TC_CoreSimd_ParSat16 (void) {
   __SXTB16
   __SXTB16_RORn
   __SXTAB16
+  __SXTAB16__RORn
   __UXTB16
   __UXTAB16
 */
@@ -114,18 +115,42 @@ void TC_CoreSimd_PackUnpack (void) {
   res_s32 = __SXTB16_RORn(op1_s32, 24);
   ASSERT_TRUE(res_s32 == (int32_t)0x1FF80);
 
-  /* --- __SXTAB16 Test ---------------------------------------------- */
+  /* --- __SXTAB16 Test --------------------------------------------- */
   op1_s32 = (int32_t)0x000D0008;
   op2_s32 = (int32_t)0x80830168;
   res_s32 = __SXTAB16(op1_s32, op2_s32);
   ASSERT_TRUE(res_s32 == (int32_t)0xFF900070);
 
+  /* --- __SXTAB16__ROR8 Test --------------------------------------- */
+  op1_s32 = (int32_t)0x000A000A;
+  op2_s32 = (int32_t)0x80830168;
+  res_s32 = __SXTAB16_RORn(op1_s32, op2_s32, 8);
+  ASSERT_TRUE(res_s32 == (int32_t)0xFF8A000B);
+
+  /* --- __SXTAB16__ROR8 Test --------------------------------------- */
+  op1_s32 = (int32_t)0xFFF6FFF6;
+  op2_s32 = (int32_t)0x80830168;
+  res_s32 = __SXTAB16_RORn(op1_s32, op2_s32, 8);
+  ASSERT_TRUE(res_s32 == (int32_t)0xFF76FFF7);
+
+  /* --- __SXTAB16__ROR16 Test -------------------------------------- */
+  op1_s32 = (int32_t)0xFFF60015;
+  op2_s32 = (int32_t)0x70880168;
+  res_s32 = __SXTAB16_RORn(op1_s32, op2_s32, 16);
+  ASSERT_TRUE(res_s32 == (int32_t)0x5EFF9D);
+
+  /* --- __SXTAB16__ROR24 Test -------------------------------------- */
+  op1_s32 = (int32_t)0xFFF60015;
+  op2_s32 = (int32_t)0x70880168;
+  res_s32 = __SXTAB16_RORn(op1_s32, op2_s32, 24);
+  ASSERT_TRUE(res_s32 == (int32_t)0xFFF70085);
+
   /* --- __UXTB16 Test ---------------------------------------------- */
   op1_s32 = (int32_t)0x80830168;
   res_s32 = __UXTB16(op1_s32);
   ASSERT_TRUE(res_s32 == 0x00830068);
 
-  /* --- __UXTAB16 Test ---------------------------------------------- */
+  /* --- __UXTAB16 Test --------------------------------------------- */
   op1_s32 =          0x000D0008;
   op2_s32 = (int32_t)0x80830168;
   res_s32 = __UXTAB16(op1_s32, op2_s32);
diff --git a/CMSIS/DoxyGen/Core/src/Ref_cm4_simd.txt b/CMSIS/DoxyGen/Core/src/Ref_cm4_simd.txt
index c6a819b38b..995e06b977 100644
--- a/CMSIS/DoxyGen/Core/src/Ref_cm4_simd.txt
+++ b/CMSIS/DoxyGen/Core/src/Ref_cm4_simd.txt
@@ -1,5 +1,5 @@
 /* ######################  CMSIS Support for Cortex-M4 SIMD Instructions  ####################### */
-/** 
+/**
 
 \defgroup  intrinsic_SIMD_gr  Intrinsic Functions for SIMD Instructions [only Cortex-M4 and Cortex-M7]
 \brief     Access to dedicated SIMD instructions.
@@ -7,9 +7,9 @@
 \details
 
 <b>Single Instruction Multiple Data (SIMD)</b> extensions are provided <b>only for Cortex-M4 and Cortex-M7 cores</b>
-to simplify development of application software. SIMD extensions increase the processing capability 
-without materially increasing the power consumption. The SIMD extensions are completely transparent 
-to the operating system (OS), allowing existing OS ports to be used.  
+to simplify development of application software. SIMD extensions increase the processing capability
+without materially increasing the power consumption. The SIMD extensions are completely transparent
+to the operating system (OS), allowing existing OS ports to be used.
 
 <b>SIMD Features:</b>
 
@@ -71,7 +71,7 @@ uint32_t dual_mul_add_products(uint32_t val1, uint32_t val2)
             \li the addition of the third bytes of each operand, in the third byte of the return value.
             \li the addition of the fourth bytes of each operand, in the fourth byte of the return value.
         \par
-            Each bit in APSR.GE is set or cleared for each byte in the return value, depending on 
+            Each bit in APSR.GE is set or cleared for each byte in the return value, depending on
              the results of the operation.
         \par
             If \em res is the return value, then:
@@ -82,19 +82,19 @@ uint32_t dual_mul_add_products(uint32_t val1, uint32_t val2)
 
     \par Operation:
         \code
-   res[7:0]   = val1[7:0]   + val2[7:0] 
-   res[15:8]  = val1[15:8]  + val2[15:8] 
-   res[23:16] = val1[23:16] + val2[23:16] 
-   res[31:24] = val1[31:24] + val2[31:24]          
+   res[7:0]   = val1[7:0]   + val2[7:0]
+   res[15:8]  = val1[15:8]  + val2[15:8]
+   res[23:16] = val1[23:16] + val2[23:16]
+   res[31:24] = val1[31:24] + val2[31:24]
         \endcode
 */
 uint32_t __SADD8(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** \ingroup    Intrinsic_SIMD_gr  
+/** \ingroup    Intrinsic_SIMD_gr
     \brief      Q setting quad 8-bit saturating addition
-    \details     This function enables you to perform four 8-bit integer additions, saturating the results to 
+    \details     This function enables you to perform four 8-bit integer additions, saturating the results to
           the 8-bit signed integer range -2<sup>7</sup> \<= x \<= 2<sup>7</sup> - 1.
     \param      val1    first four 8-bit summands.
     \param      val2    second four 8-bit summands.
@@ -109,17 +109,17 @@ uint32_t __SADD8(uint32_t val1, uint32_t val2);
 
     \par Operation:
         \code
-   res[7:0]   = val1[7:0]   + val2[7:0] 
-   res[15:8]  = val1[15:8]  + val2[15:8] 
-   res[23:16] = val1[23:16] + val2[23:16] 
-   res[31:24] = val1[31:24] + val2[31:24]          
+   res[7:0]   = val1[7:0]   + val2[7:0]
+   res[15:8]  = val1[15:8]  + val2[15:8]
+   res[23:16] = val1[23:16] + val2[23:16]
+   res[31:24] = val1[31:24] + val2[31:24]
         \endcode
 */
 uint32_t __QADD8(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Quad 8-bit signed addition with halved results
     \details     This function enables you to perform four signed 8-bit integer additions, halving the results.
     \param      val1    first four 8-bit summands.
@@ -136,19 +136,19 @@ uint32_t __QADD8(uint32_t val1, uint32_t val2);
    res[7:0]   = val1[7:0]   + val2[7:0]  >> 1
    res[15:8]  = val1[15:8]  + val2[15:8] >> 1
    res[23:16] = val1[23:16] + val2[23:16] >> 1
-   res[31:24] = val1[31:24] + val2[31:24] >> 1         
+   res[31:24] = val1[31:24] + val2[31:24] >> 1
         \endcode
 */
 uint32_t __SHADD8(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** \ingroup    Intrinsic_SIMD_gr  
+/** \ingroup    Intrinsic_SIMD_gr
     \brief      GE setting quad 8-bit unsigned addition
-    
+
     \details    This function enables you to perform four unsigned 8-bit integer additions.
-                The GE bits of the APSR are set according to the results. 
-                
+                The GE bits of the APSR are set according to the results.
+
     \param      val1    first four 8-bit summands for each addition.
     \param      val2    second four 8-bit summands for each addition.
 
@@ -157,12 +157,12 @@ uint32_t __SHADD8(uint32_t val1, uint32_t val2);
             \li the halved addition of the second bytes from each operand, in the second byte of the return value.
             \li the halved addition of the third bytes from each operand, in the third byte of the return value.
             \li the halved addition of the fourth bytes from each operand, in the fourth byte of the return value.
-            
-    \par    
+
+    \par
             Each bit in APSR.GE is set or cleared for each byte in the return value, depending on the results of the operation.
-    
-    \par    
-            If \em res is the return value, then: 
+
+    \par
+            If \em res is the return value, then:
             \li if res[7:0] \>= 0x100 then APSR.GE[0] = 1 else 0
             \li if res[15:8] \>= 0x100 then APSR.GE[1] = 1 else 0
             \li if res[23:16] \>= 0x100 then APSR.GE[2] = 1 else 0
@@ -170,7 +170,7 @@ uint32_t __SHADD8(uint32_t val1, uint32_t val2);
 
     \par Operation:
         \code
-   res[7:0]   = val1[7:0]   + val2[7:0] 
+   res[7:0]   = val1[7:0]   + val2[7:0]
    res[15:8]  = val1[15:8]  + val2[15:8]
    res[23:16] = val1[23:16] + val2[23:16]
    res[31:24] = val1[31:24] + val2[31:24]
@@ -180,12 +180,12 @@ uint32_t __UADD8(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Quad 8-bit unsigned saturating addition
-    
-    \details    This function enables you to perform four unsigned 8-bit integer additions, saturating the 
+
+    \details    This function enables you to perform four unsigned 8-bit integer additions, saturating the
            results to the 8-bit unsigned integer range 0 \< x \< 2<sup>8</sup> - 1.
-                
+
     \param      val1    first four 8-bit summands.
     \param      val2    second four 8-bit summands.
 
@@ -194,13 +194,13 @@ uint32_t __UADD8(uint32_t val1, uint32_t val2);
             \li the halved addition of the second bytes in each operand, in the second byte of the return value.
             \li the halved addition of the third bytes in each operand, in the third byte of the return value.
             \li the halved addition of the fourth bytes in each operand, in the fourth byte of the return value.
-            
-    \par    
+
+    \par
             The results are saturated to the 8-bit unsigned integer range 0 \< x \< 2<sup>8</sup> - 1.
 
     \par Operation:
         \code
-   res[7:0]   = val1[7:0]   + val2[7:0] 
+   res[7:0]   = val1[7:0]   + val2[7:0]
    res[15:8]  = val1[15:8]  + val2[15:8]
    res[23:16] = val1[23:16] + val2[23:16]
    res[31:24] = val1[31:24] + val2[31:24]
@@ -210,11 +210,11 @@ uint32_t __UQADD8(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Quad 8-bit unsigned addition with halved results
-    
+
     \details    This function enables you to perform four unsigned 8-bit integer additions, halving the results.
-                
+
     \param      val1    first four 8-bit summands.
     \param      val2    second four 8-bit summands.
 
@@ -223,7 +223,7 @@ uint32_t __UQADD8(uint32_t val1, uint32_t val2);
             \li the halved addition of the second bytes in each operand, in the second byte of the return value.
             \li the halved addition of the third bytes in each operand, in the third byte of the return value.
             \li the halved addition of the fourth bytes in each operand, in the fourth byte of the return value.
-            
+
     \par Operation:
         \code
    res[7:0]   = val1[7:0]   + val2[7:0]   >> 1
@@ -236,28 +236,28 @@ uint32_t __UHADD8(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      GE setting quad 8-bit signed subtraction
-    
+
     \details    This function enables you to perform four 8-bit signed integer subtractions.<br>
                 The GE bits in the APSR are set according to the results.
-                
+
     \param      val1    first four 8-bit  operands of each subtraction.
     \param      val2    second four 8-bit  operands of each subtraction.
 
     \returns
-            \li the subtraction of the first byte in the second operand from the first byte in the 
+            \li the subtraction of the first byte in the second operand from the first byte in the
                 first operand, in the first bytes of the return value.
-            \li the subtraction of the second byte in the second operand from the second byte in 
+            \li the subtraction of the second byte in the second operand from the second byte in
                 the first operand, in the second byte of the return value.
-            \li the subtraction of the third byte in the second operand from the third byte in the 
+            \li the subtraction of the third byte in the second operand from the third byte in the
                 first operand, in the third byte of the return value.
-            \li the subtraction of the fourth byte in the second operand from the fourth byte in 
+            \li the subtraction of the fourth byte in the second operand from the fourth byte in
                 the first operand, in the fourth byte of the return value.
 
-        \par    Each bit in APSR.GE is set or cleared for each byte in the return value, depending on 
-             the results of the operation. 
-             
+        \par    Each bit in APSR.GE is set or cleared for each byte in the return value, depending on
+             the results of the operation.
+
         \par
             If \em res is the return value, then:
             \li if res[8:0] \>= 0 then APSR.GE[0] = 1 else 0
@@ -268,7 +268,7 @@ uint32_t __UHADD8(uint32_t val1, uint32_t val2);
 
     \par Operation:
         \code
-   res[7:0]   = val1[7:0]   - val2[7:0] 
+   res[7:0]   = val1[7:0]   - val2[7:0]
    res[15:8]  = val1[15:8]  - val2[15:8]
    res[23:16] = val1[23:16] - val2[23:16]
    res[31:24] = val1[31:24] - val2[31:24]
@@ -278,32 +278,32 @@ uint32_t __SSUB8(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Q setting quad 8-bit saturating subtract
-    
-    \details    This function enables you to perform four 8-bit integer subtractions, saturating the results 
+
+    \details    This function enables you to perform four 8-bit integer subtractions, saturating the results
           to the 8-bit signed integer range -2<sup>7</sup> \<= x \<= 2<sup>7</sup> - 1.
-                
+
     \param      val1    first four 8-bit  operands.
     \param      val2    second four 8-bit  operands.
 
     \returns
-            \li the subtraction of the first byte in the second operand from the first byte in the 
+            \li the subtraction of the first byte in the second operand from the first byte in the
                 first operand, in the first bytes of the return value.
-            \li the subtraction of the second byte in the second operand from the second byte in 
+            \li the subtraction of the second byte in the second operand from the second byte in
                 the first operand, in the second byte of the return value.
-            \li the subtraction of the third byte in the second operand from the third byte in the 
+            \li the subtraction of the third byte in the second operand from the third byte in the
                 first operand, in the third byte of the return value.
-            \li the subtraction of the fourth byte in the second operand from the fourth byte in 
+            \li the subtraction of the fourth byte in the second operand from the fourth byte in
                 the first operand, in the fourth byte of the return value.
 
-        \par    
-            The returned results are saturated to the 8-bit signed integer range -2<sup>7</sup> \<= x \<= 2<sup>7</sup> - 1. 
-             
-            
+        \par
+            The returned results are saturated to the 8-bit signed integer range -2<sup>7</sup> \<= x \<= 2<sup>7</sup> - 1.
+
+
     \par Operation:
         \code
-   res[7:0]   = val1[7:0]   - val2[7:0] 
+   res[7:0]   = val1[7:0]   - val2[7:0]
    res[15:8]  = val1[15:8]  - val2[15:8]
    res[23:16] = val1[23:16] - val2[23:16]
    res[31:24] = val1[31:24] - val2[31:24]
@@ -313,25 +313,25 @@ uint32_t __QSUB8(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Quad 8-bit signed subtraction with halved results
-    
-    \details    This function enables you to perform four signed 8-bit integer subtractions, halving the 
+
+    \details    This function enables you to perform four signed 8-bit integer subtractions, halving the
      results.
-                
+
     \param      val1    first four 8-bit  operands.
     \param      val2    second four 8-bit  operands.
 
     \returns
-            \li the halved subtraction of the first byte in the second operand from the first byte in the 
+            \li the halved subtraction of the first byte in the second operand from the first byte in the
                 first operand, in the first bytes of the return value.
-            \li the halved subtraction of the second byte in the second operand from the second byte in 
+            \li the halved subtraction of the second byte in the second operand from the second byte in
                 the first operand, in the second byte of the return value.
-            \li the halved subtraction of the third byte in the second operand from the third byte in the 
+            \li the halved subtraction of the third byte in the second operand from the third byte in the
                 first operand, in the third byte of the return value.
-            \li the halved subtraction of the fourth byte in the second operand from the fourth byte in 
+            \li the halved subtraction of the fourth byte in the second operand from the fourth byte in
                 the first operand, in the fourth byte of the return value.
-            
+
     \par Operation:
         \code
    res[7:0]   = val1[7:0]   - val2[7:0]   >> 1
@@ -344,36 +344,36 @@ uint32_t __SHSUB8(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      GE setting quad 8-bit unsigned subtract
-    
+
     \details    This function enables you to perform four 8-bit unsigned integer subtractions.
           The GE bits in the APSR are set according to the results.
-                
+
     \param      val1    first four 8-bit  operands.
     \param      val2    second four 8-bit  operands.
 
     \returns
-            \li the subtraction of the first byte in the second operand from the first byte in the 
+            \li the subtraction of the first byte in the second operand from the first byte in the
                 first operand, in the first bytes of the return value.
-            \li the subtraction of the second byte in the second operand from the second byte in 
+            \li the subtraction of the second byte in the second operand from the second byte in
                 the first operand, in the second byte of the return value.
-            \li the subtraction of the third byte in the second operand from the third byte in the 
+            \li the subtraction of the third byte in the second operand from the third byte in the
                 first operand, in the third byte of the return value.
-            \li the subtraction of the fourth byte in the second operand from the fourth byte in 
+            \li the subtraction of the fourth byte in the second operand from the fourth byte in
                 the first operand, in the fourth byte of the return value.
-            
+
     \par
-        Each bit in APSR.GE is set or cleared for each byte in the return value, depending on 
+        Each bit in APSR.GE is set or cleared for each byte in the return value, depending on
              the results of the operation.
-             
-    \par 
+
+    \par
         If \em res is the return value, then:
             \li if res[8:0] \>= 0 then APSR.GE[0] = 1 else 0
             \li if res[15:8] \>= 0 then APSR.GE[1] = 1 else 0
             \li if res[23:16] \>= 0 then APSR.GE[2] = 1 else 0
             \li if res[31:24] \>= 0 then APSR.GE[3] = 1 else 0
-        
+
 
     \par Operation:
         \code
@@ -387,29 +387,29 @@ uint32_t __USUB8(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Quad 8-bit unsigned saturating subtraction
-    
-    \details    This function enables you to perform four unsigned 8-bit integer subtractions, saturating 
+
+    \details    This function enables you to perform four unsigned 8-bit integer subtractions, saturating
           the results to the 8-bit unsigned integer range 0 \< x \< 2<sup>8</sup> - 1.
-                
+
     \param      val1    first four 8-bit  operands.
     \param      val2    second four 8-bit  operands.
 
     \returns
-            \li the subtraction of the first byte in the second operand from the first byte in the 
+            \li the subtraction of the first byte in the second operand from the first byte in the
                 first operand, in the first bytes of the return value.
-            \li the subtraction of the second byte in the second operand from the second byte in 
+            \li the subtraction of the second byte in the second operand from the second byte in
                 the first operand, in the second byte of the return value.
-            \li the subtraction of the third byte in the second operand from the third byte in the 
+            \li the subtraction of the third byte in the second operand from the third byte in the
                 first operand, in the third byte of the return value.
-            \li the subtraction of the fourth byte in the second operand from the fourth byte in 
+            \li the subtraction of the fourth byte in the second operand from the fourth byte in
                 the first operand, in the fourth byte of the return value.
-            
+
     \par
         The results are saturated to the 8-bit unsigned integer range 0 \< x \< 2<sup>8</sup> - 1.
 
-        
+
     \par Operation:
         \code
    res[7:0]   = val1[7:0]   - val2[7:0]
@@ -422,25 +422,25 @@ uint32_t __UQSUB8(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Quad 8-bit unsigned subtraction with halved results
-    
-    \details    This function enables you to perform four unsigned 8-bit integer subtractions, halving the 
+
+    \details    This function enables you to perform four unsigned 8-bit integer subtractions, halving the
      results.
-                
+
     \param      val1    first four 8-bit  operands.
     \param      val2    second four 8-bit  operands.
 
     \returns
-            \li the halved subtraction of the first byte in the second operand from the first byte in the 
+            \li the halved subtraction of the first byte in the second operand from the first byte in the
                 first operand, in the first bytes of the return value.
-            \li the halved subtraction of the second byte in the second operand from the second byte in 
+            \li the halved subtraction of the second byte in the second operand from the second byte in
                 the first operand, in the second byte of the return value.
-            \li the halved subtraction of the third byte in the second operand from the third byte in the 
+            \li the halved subtraction of the third byte in the second operand from the third byte in the
                 first operand, in the third byte of the return value.
-            \li the halved subtraction of the fourth byte in the second operand from the fourth byte in 
+            \li the halved subtraction of the fourth byte in the second operand from the fourth byte in
                 the first operand, in the fourth byte of the return value.
-            
+
     \par Operation:
         \code
    res[7:0]   = val1[7:0]   - val2[7:0]    >> 1
@@ -453,20 +453,20 @@ uint32_t __UHSUB8(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      GE setting dual 16-bit signed addition
-    
+
     \details    This function enables you to perform two 16-bit signed integer additions.<br>
           The GE bits in the APSR are set according to the results of the additions.
-                
+
     \param      val1    first two 16-bit  summands.
     \param      val2    second two 16-bit  summands.
 
     \returns
             \li the addition of the low halfwords in the low halfword of the return value.
             \li the addition of the high halfwords in the high halfword of the return value.
-            
-    \par 
+
+    \par
         Each bit in APSR.GE is set or cleared for each byte in the return value, depending on
              the results of the operation.
     \par
@@ -485,21 +485,21 @@ uint32_t __SADD16(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Q setting dual 16-bit saturating addition
-    
-    \details    This function enables you to perform two 16-bit integer arithmetic additions in parallel, 
+
+    \details    This function enables you to perform two 16-bit integer arithmetic additions in parallel,
           saturating the results to the 16-bit signed integer range -2<sup>15</sup> \<= x \<= 2<sup>15</sup> - 1.
-                
+
     \param      val1    first two 16-bit  summands.
     \param      val2    second two 16-bit  summands.
 
     \returns
             \li the saturated addition of the low halfwords, in the low halfword of the return value.
             \li the saturated addition of the high halfwords, in the high halfword of the return value.
-            
-    \par 
-        The returned results are saturated to the 16-bit signed integer 
+
+    \par
+        The returned results are saturated to the 16-bit signed integer
              range -2<sup>15</sup> \<= x \<= 2<sup>15</sup> - 1
 
     \par Operation:
@@ -512,19 +512,19 @@ uint32_t __QADD16(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Dual 16-bit signed addition with halved results
-    
-    \details    This function enables you to perform two signed 16-bit integer additions, halving the 
+
+    \details    This function enables you to perform two signed 16-bit integer additions, halving the
      results.
-                
+
     \param      val1    first two 16-bit  summands.
     \param      val2    second two 16-bit  summands.
 
     \returns
             \li the halved addition of the low halfwords, in the low halfword of the return value.
             \li the halved addition of the high halfwords, in the high halfword of the return value.
-            
+
     \par Operation:
         \code
    res[15:0]  = val1[15:0]  + val2[15:0]  >> 1
@@ -535,32 +535,32 @@ uint32_t __SHADD16(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      GE setting dual 16-bit unsigned addition
-    
+
     \details    This function enables you to perform two 16-bit unsigned integer additions.<br>
           The GE bits in the APSR are set according to the results.
-                
+
     \param      val1    first two 16-bit  summands for each addition.
     \param      val2    second two 16-bit  summands for each addition.
 
     \returns
-            \li the addition of the low halfwords in each operand, in the low halfword of the 
+            \li the addition of the low halfwords in each operand, in the low halfword of the
                 return value.
-            \li the addition of the high halfwords in each operand, in the high halfword of the 
+            \li the addition of the high halfwords in each operand, in the high halfword of the
                 return value.
-            
+
     \par
-        Each bit in APSR.GE is set or cleared for each byte in the return value, depending on 
+        Each bit in APSR.GE is set or cleared for each byte in the return value, depending on
              the results of the operation.
     \par
         If \em res is the return value, then:
             \li if res[15:0] \>= 0x10000 then APSR.GE[0] = 11 else 00
             \li if res[31:16] \>= 0x10000 then APSR.GE[1] = 11 else 00
-        
+
     \par Operation:
         \code
-   res[15:0]  = val1[15:0]  + val2[15:0] 
+   res[15:0]  = val1[15:0]  + val2[15:0]
    res[31:16] = val1[31:16] + val2[31:16]
         \endcode
 */
@@ -568,28 +568,28 @@ uint32_t __UADD16(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Dual 16-bit unsigned saturating addition
-    
-    \details    This function enables you to perform two unsigned 16-bit integer additions, saturating the 
+
+    \details    This function enables you to perform two unsigned 16-bit integer additions, saturating the
      results to the 16-bit unsigned integer range 0 \< x \< 2<sup>16</sup> - 1.
-                
+
     \param      val1    first two 16-bit  summands.
     \param      val2    second two 16-bit  summands.
 
     \returns
-            \li the addition of the low halfword in the first operand and the low halfword in the 
+            \li the addition of the low halfword in the first operand and the low halfword in the
                 second operand, in the low halfword of the return value.
-            \li the addition of the high halfword in the first operand and the high halfword in the 
+            \li the addition of the high halfword in the first operand and the high halfword in the
                 second operand, in the high halfword of the return value.
-            
+
     \par
-        The results are saturated to the 16-bit unsigned integer 
+        The results are saturated to the 16-bit unsigned integer
              range 0 \< x \< 2<sup>16</sup> - 1.
-        
+
     \par Operation:
         \code
-   res[15:0]  = val1[15:0]  + val2[15:0] 
+   res[15:0]  = val1[15:0]  + val2[15:0]
    res[31:16] = val1[31:16] + val2[31:16]
         \endcode
 */
@@ -597,21 +597,21 @@ uint32_t __UQADD16(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Dual 16-bit unsigned addition with halved results
-    
-    \details    This function enables you to perform two unsigned 16-bit integer additions, halving the 
+
+    \details    This function enables you to perform two unsigned 16-bit integer additions, halving the
      results.
-                
+
     \param      val1    first two 16-bit  summands.
     \param      val2    second two 16-bit  summands.
 
     \returns
-            \li the halved addition of the low halfwords in each operand, in the low halfword of 
+            \li the halved addition of the low halfwords in each operand, in the low halfword of
                 the return value.
-            \li the halved addition of the high halfwords in each operand, in the high halfword 
+            \li the halved addition of the high halfwords in each operand, in the high halfword
                 of the return value.
-        
+
     \par Operation:
         \code
    res[15:0]  = val1[15:0]  + val2[15:0]   >> 1
@@ -622,30 +622,30 @@ uint32_t __UHADD16(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      GE setting dual 16-bit signed subtraction
-    
+
     \details    This function enables you to perform two 16-bit signed integer subtractions.<br>
           The GE bits in the APSR are set according to the results.
-                
+
     \param      val1    first two 16-bit operands of each subtraction.
     \param      val2    second two 16-bit operands of each subtraction.
 
     \returns
-            \li the subtraction of the low halfword in the second operand from the low halfword 
+            \li the subtraction of the low halfword in the second operand from the low halfword
                 in the first operand, in the low halfword of the return value.
-            \li the subtraction of the high halfword in the second operand from the high halfword 
+            \li the subtraction of the high halfword in the second operand from the high halfword
                 in the first operand, in the high halfword of the return value.
-        
-    \par 
-        Each bit in APSR.GE is set or cleared for each byte in the return value, depending on 
+
+    \par
+        Each bit in APSR.GE is set or cleared for each byte in the return value, depending on
              the results of the operation.
-    \par 
+    \par
         If \li res is the return value, then:
             \li if res[15:0] \>= 0 then APSR.GE[1:0] = 11 else 00
             \li if res[31:16] \>= 0 then APSR.GE[3:2] = 11 else 00
 
-        
+
     \par Operation:
         \code
    res[15:0]  = val1[15:0]  - val2[15:0]
@@ -656,25 +656,25 @@ uint32_t __SSUB16(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Q setting dual 16-bit saturating subtract
-    
-    \details    This function enables you to perform two 16-bit integer subtractions, saturating the 
+
+    \details    This function enables you to perform two 16-bit integer subtractions, saturating the
           results to the 16-bit signed integer range -2<sup>15</sup> \<= x \<= 2<sup>15</sup> - 1.
-                
+
     \param      val1    first two 16-bit operands.
     \param      val2    second two 16-bit operands.
 
     \returns
-            \li the saturated subtraction of the low halfword in the second operand from the low 
+            \li the saturated subtraction of the low halfword in the second operand from the low
                 halfword in the first operand, in the low halfword of the returned result.
-            \li the saturated subtraction of the high halfword in the second operand from the high 
+            \li the saturated subtraction of the high halfword in the second operand from the high
                 halfword in the first operand, in the high halfword of the returned result.
-        
-    \par 
-        The returned results are saturated to the 16-bit signed integer 
+
+    \par
+        The returned results are saturated to the 16-bit signed integer
              range -2<sup>15</sup> \<= x \<= 2<sup>15</sup> - 1.
-        
+
     \par Operation:
         \code
    res[15:0]  = val1[15:0]  - val2[15:0]
@@ -685,22 +685,22 @@ uint32_t __QSUB16(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Dual 16-bit signed subtraction with halved results
-    
-    \details    This function enables you to perform two signed 16-bit integer subtractions, halving the 
+
+    \details    This function enables you to perform two signed 16-bit integer subtractions, halving the
           results.
-                
+
     \param      val1    first two 16-bit operands.
     \param      val2    second two 16-bit operands.
 
     \returns
-            \li the halved subtraction of the low halfword in the second operand from the low 
+            \li the halved subtraction of the low halfword in the second operand from the low
                 halfword in the first operand, in the low halfword of the returned result.
-            \li the halved subtraction of the high halfword in the second operand from the high 
+            \li the halved subtraction of the high halfword in the second operand from the high
                 halfword in the first operand, in the high halfword of the returned result.
-        
-        
+
+
     \par Operation:
         \code
    res[15:0]  = val1[15:0]  - val2[15:0]   >> 1
@@ -711,25 +711,25 @@ uint32_t __SHSUB16(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      GE setting dual 16-bit unsigned subtract
-    
+
     \details    This function enables you to perform two 16-bit unsigned integer subtractions.<br>
           The GE bits in the APSR are set according to the results.
-                
+
     \param      val1    first two 16-bit operands.
     \param      val2    second two 16-bit operands.
 
     \returns
-            \li the subtraction of the low halfword in the second operand from the low halfword 
+            \li the subtraction of the low halfword in the second operand from the low halfword
                 in the first operand, in the low halfword of the return value.
-            \li the subtraction of the high halfword in the second operand from the high halfword 
+            \li the subtraction of the high halfword in the second operand from the high halfword
                 in the first operand, in the high halfword of the return value.
-        
-    \par 
+
+    \par
         Each bit in APSR.GE is set or cleared for each byte in the return value, depending on
              the results of the operation.
-        
+
     \par
         If \em res is the return value, then:
             \li if res[15:0] \>= 0 then APSR.GE[1:0] = 11 else 00
@@ -737,58 +737,58 @@ uint32_t __SHSUB16(uint32_t val1, uint32_t val2);
 
     \par Operation:
         \code
-   res[15:0]  = val1[15:0]  - val2[15:0]   
-   res[31:16] = val1[31:16] - val2[31:16]  
+   res[15:0]  = val1[15:0]  - val2[15:0]
+   res[31:16] = val1[31:16] - val2[31:16]
         \endcode
 */
 uint32_t __USUB16(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Dual 16-bit unsigned saturating subtraction
-    
-    \details    This function enables you to perform two unsigned 16-bit integer subtractions, saturating 
+
+    \details    This function enables you to perform two unsigned 16-bit integer subtractions, saturating
           the results to the 16-bit unsigned integer range 0 \< x \< 2<sup>16</sup> - 1.
-                
+
     \param      val1    first two 16-bit operands for each subtraction.
     \param      val2    second two 16-bit operands for each subtraction.
 
     \returns
-            \li the subtraction of the low halfword in the second operand from the low halfword 
+            \li the subtraction of the low halfword in the second operand from the low halfword
                 in the first operand, in the low halfword of the return value.
-            \li the subtraction of the high halfword in the second operand from the high halfword 
+            \li the subtraction of the high halfword in the second operand from the high halfword
                 in the first operand, in the high halfword of the return value.
-        
-    \par 
+
+    \par
             The results are saturated to the 16-bit unsigned integer range 0 \< x \< 2<sup>16</sup> - 1.
-        
+
 
     \par Operation:
         \code
-   res[15:0]  = val1[15:0]  - val2[15:0]   
-   res[31:16] = val1[31:16] - val2[31:16]  
+   res[15:0]  = val1[15:0]  - val2[15:0]
+   res[31:16] = val1[31:16] - val2[31:16]
         \endcode
 */
 uint32_t __UQSUB16(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Dual 16-bit unsigned subtraction with halved results
-    
-    \details    This function enables you to perform two unsigned 16-bit integer subtractions, halving 
+
+    \details    This function enables you to perform two unsigned 16-bit integer subtractions, halving
           the results.
-                
+
     \param      val1    first two 16-bit operands.
     \param      val2    second two 16-bit operands.
 
     \returns
-            \li the halved subtraction of the low halfword in the second operand from the low halfword 
+            \li the halved subtraction of the low halfword in the second operand from the low halfword
                 in the first operand, in the low halfword of the return value.
-            \li the halved subtraction of the high halfword in the second operand from the high halfword 
+            \li the halved subtraction of the high halfword in the second operand from the high halfword
                 in the first operand, in the high halfword of the return value.
-        
+
 
     \par Operation:
         \code
@@ -800,93 +800,93 @@ uint32_t __UHSUB16(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      GE setting dual 16-bit addition and subtraction with exchange
-    
-    \details    This function inserts an SASX instruction into the instruction stream generated by the 
-          compiler. It enables you to exchange the halfwords of the second operand, add the high 
+
+    \details    This function inserts an SASX instruction into the instruction stream generated by the
+          compiler. It enables you to exchange the halfwords of the second operand, add the high
           halfwords and subtract the low halfwords.<br>
           The GE bits in the APRS are set according to the results.
-                
+
     \param      val1    first operand for the subtraction in the low halfword, and the
               first operand for the addition in the high halfword.
-    \param      val2    second operand for the subtraction in the high halfword, and the 
+    \param      val2    second operand for the subtraction in the high halfword, and the
               second operand for the addition in the low halfword.
-              
+
     \returns
-            \li the subtraction of the high halfword in the second operand from the low halfword 
+            \li the subtraction of the high halfword in the second operand from the low halfword
                 in the first operand, in the low halfword of the return value.
-            \li the addition of the high halfword in the first operand and the low halfword in the 
+            \li the addition of the high halfword in the first operand and the low halfword in the
                 second operand, in the high halfword of the return value.
-        
-    \par 
+
+    \par
         Each bit in APSR.GE is set or cleared for each byte in the return value, depending on
              the results of the operation.
     \par
         If \em res is the return value, then:
             \li if res[15:0] \>= 0 then APSR.GE[1:0] = 11 else 00
             \li if res[31:16] \>= 0 then APSR.GE[3:2] = 11 else 00
-    
+
     \par Operation:
         \code
-   res[15:0]  = val1[15:0]  - val2[31:16]   
-   res[31:16] = val1[31:16] + val2[15:0]  
+   res[15:0]  = val1[15:0]  - val2[31:16]
+   res[31:16] = val1[31:16] + val2[15:0]
         \endcode
 */
 uint32_t __SASX(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Q setting dual 16-bit add and subtract with exchange
-    
+
     \details    This function enables you to exchange the halfwords of the one operand, then add the high
-          halfwords and subtract the low halfwords, saturating the results to the 16-bit signed 
+          halfwords and subtract the low halfwords, saturating the results to the 16-bit signed
           integer range -2<sup>15</sup> \<= x \<= 2<sup>15</sup> - 1.
-                
-    \param      val1    first operand for the subtraction in the low halfword, and the 
+
+    \param      val1    first operand for the subtraction in the low halfword, and the
                        first operand for the addition in the high halfword.
     \param      val2    second operand for the subtraction in the high halfword, and the
                        second operand for the addition in the low halfword.
-              
+
     \returns
-            \li the saturated subtraction of the high halfword in the second operand from the low 
+            \li the saturated subtraction of the high halfword in the second operand from the low
                 halfword in the first operand, in the low halfword of the return value.
-            \li the saturated addition of the high halfword in the first operand and the low 
+            \li the saturated addition of the high halfword in the first operand and the low
                 halfword in the second operand, in the high halfword of the return value.
-        
-    \par 
-        The returned results are saturated to the 16-bit signed integer 
+
+    \par
+        The returned results are saturated to the 16-bit signed integer
              range -2<sup>15</sup> \<= x \<= 2<sup>15</sup> - 1.
-    
+
     \par Operation:
         \code
-   res[15:0]  = val1[15:0]  - val2[31:16]   
-   res[31:16] = val1[31:16] + val2[15:0]  
+   res[15:0]  = val1[15:0]  - val2[31:16]
+   res[31:16] = val1[31:16] + val2[15:0]
         \endcode
 */
 uint32_t __QASX(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Dual 16-bit signed addition and subtraction with halved results
-    
-    \details    This function enables you to exchange the two halfwords of one operand, perform one 
+
+    \details    This function enables you to exchange the two halfwords of one operand, perform one
           signed 16-bit integer addition and one signed 16-bit subtraction, and halve the results.
-                
+
     \param      val1    first 16-bit operands.
     \param      val2    second 16-bit operands.
-              
+
     \returns
-            \li the halved subtraction of the high halfword in the second operand from the low 
+            \li the halved subtraction of the high halfword in the second operand from the low
                 halfword in the first operand, in the low halfword of the return value.
-            \li the halved addition of the low halfword in the second operand and the high 
+            \li the halved addition of the low halfword in the second operand and the high
                 halfword in the first operand, in the high halfword of the return value.
-        
+
     \par Operation:
         \code
-   res[15:0]  = (val1[15:0]  - val2[31:16]) >> 1  
+   res[15:0]  = (val1[15:0]  - val2[31:16]) >> 1
    res[31:16] = (val1[31:16] + val2[15:0] ) >> 1
         \endcode
 */
@@ -894,28 +894,28 @@ uint32_t __SHASX(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      GE setting dual 16-bit unsigned addition and subtraction with exchange
-    
-    \details    This function enables you to exchange the two halfwords of the second operand, add the 
+
+    \details    This function enables you to exchange the two halfwords of the second operand, add the
           high halfwords and subtract the low halfwords.<br>
           The GE bits in the APSR are set according to the results.
-                
+
     \param      val1    first operand for the subtraction in the low halfword, and the
               first operand for the addition in the high halfword.
     \param      val2    second operand for the subtraction in the high halfword and the
               second operand for the addition in the low halfword.
-              
+
     \returns
-            \li the subtraction of the high halfword in the second operand from the low halfword 
+            \li the subtraction of the high halfword in the second operand from the low halfword
                 in the first operand, in the low halfword of the return value.
-            \li the addition of the high halfword in the first operand and the low halfword in the 
+            \li the addition of the high halfword in the first operand and the low halfword in the
                 second operand, in the high halfword of the return value.
-        
-    \par 
-            Each bit in APSR.GE is set or cleared for each byte in the return value, depending on 
+
+    \par
+            Each bit in APSR.GE is set or cleared for each byte in the return value, depending on
              the results of the operation.
-             
+
     \par    If \em res is the return value, then:
             \li if res[15:0] \>= 0 then APSR.GE[1:0] = 11 else 00
             \li if res[31:16] \>= 0x10000 then APSR.GE[3:2] = 11 else 00
@@ -923,61 +923,61 @@ uint32_t __SHASX(uint32_t val1, uint32_t val2);
     \par Operation:
         \code
    res[15:0]  = val1[15:0]  - val2[31:16]
-   res[31:16] = val1[31:16] + val2[15:0] 
+   res[31:16] = val1[31:16] + val2[15:0]
         \endcode
 */
 uint32_t __UASX(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Dual 16-bit unsigned saturating addition and subtraction with exchange
-    
-    \details    This function enables you to exchange the halfwords of the second operand and perform 
-          one unsigned 16-bit integer addition and one unsigned 16-bit subtraction, saturating the 
+
+    \details    This function enables you to exchange the halfwords of the second operand and perform
+          one unsigned 16-bit integer addition and one unsigned 16-bit subtraction, saturating the
           results to the 16-bit unsigned integer range 0 \<= x \<= 2<sup>16</sup> - 1.
-                
+
     \param      val1    first two 16-bit operands.
     \param      val2    second two 16-bit operands.
-              
+
     \returns
-            \li the subtraction of the high halfword in the second operand from the low halfword 
+            \li the subtraction of the high halfword in the second operand from the low halfword
                 in the first operand, in the low halfword of the return value.
-            \li the subtraction of the low halfword in the second operand from the high halfword 
+            \li the subtraction of the low halfword in the second operand from the high halfword
                 in the first operand, in the high halfword of the return value.
-        
-    \par 
-            The results are saturated to the 16-bit unsigned integer  
+
+    \par
+            The results are saturated to the 16-bit unsigned integer
              range 0 \<= x \<= 2<sup>16</sup> - 1.
-             
+
     \par Operation:
         \code
    res[15:0]  = val1[15:0]  - val2[31:16]
-   res[31:16] = val1[31:16] + val2[15:0] 
+   res[31:16] = val1[31:16] + val2[15:0]
         \endcode
 */
 uint32_t __UQASX(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Dual 16-bit unsigned addition and subtraction with halved results and exchange
-    
-    \details    This function enables you to exchange the halfwords of the second operand, add the high 
+
+    \details    This function enables you to exchange the halfwords of the second operand, add the high
           halfwords and subtract the low halfwords, halving the results.
-                
+
     \param      val1    first operand for the subtraction in the low halfword, and the
            first operand for the addition in the high halfword.
-    \param      val2    second operand for the subtraction in the high halfword, and the 
+    \param      val2    second operand for the subtraction in the high halfword, and the
            second operand for the addition in the low halfword.
-              
+
     \returns
-            \li the halved subtraction of the high halfword in the second operand from the low 
+            \li the halved subtraction of the high halfword in the second operand from the low
                 halfword in the first operand.
-            \li the halved addition of the high halfword in the first operand and the low halfword 
+            \li the halved addition of the high halfword in the first operand and the low halfword
                 in the second operand.
-        
-             
+
+
     \par Operation:
         \code
    res[15:0]  = (val1[15:0]  - val2[31:16]) >> 1
@@ -988,87 +988,87 @@ uint32_t __UHASX(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      GE setting dual 16-bit signed subtraction and addition with exchange
-    
-    \details    This function enables you to exchange the two halfwords of one operand and perform one 
+
+    \details    This function enables you to exchange the two halfwords of one operand and perform one
           16-bit integer subtraction and one 16-bit addition.<br>
           The GE bits in the APSR are set according to the results.
-                
-    \param      val1    first operand for the addition in the low halfword, and the first 
+
+    \param      val1    first operand for the addition in the low halfword, and the first
               operand for the subtraction in the high halfword.
     \param      val2    second operand for the addition in the high halfword, and the
               second operand for the subtraction in the low halfword.
-              
+
     \returns
-            \li the addition of the low halfword in the first operand and the high halfword in the 
+            \li the addition of the low halfword in the first operand and the high halfword in the
                 second operand, in the low halfword of the return value.
-            \li the subtraction of the low halfword in the second operand from the high halfword 
+            \li the subtraction of the low halfword in the second operand from the high halfword
                 in the first operand, in the high halfword of the return value.
     \par
-        Each bit in APSR.GE is set or cleared for each byte in the return value, depending on 
+        Each bit in APSR.GE is set or cleared for each byte in the return value, depending on
              the results of the operation.
     \par
         If \em res is the return value, then:
             \li if res[15:0] \>= 0 then APSR.GE[1:0] = 11 else 00
             \li if res[31:16] \>= 0 then APSR.GE[3:2] = 11 else 00
-        
+
     \par Operation:
         \code
    res[15:0]  = val1[15:0]  + val2[31:16]
-   res[31:16] = val1[31:16] - val2[15:0] 
+   res[31:16] = val1[31:16] - val2[15:0]
         \endcode
 */
 uint32_t __SSAX(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Q setting dual 16-bit subtract and add with exchange
-    
-    \details    This function enables you to exchange the halfwords of one operand, then subtract the 
-          high halfwords and add the low halfwords, saturating the results to the 16-bit signed 
+
+    \details    This function enables you to exchange the halfwords of one operand, then subtract the
+          high halfwords and add the low halfwords, saturating the results to the 16-bit signed
           integer range -2<sup>15</sup> \<= x \<= 2<sup>15</sup> - 1.
-                
+
     \param      val1    first operand for the addition in the low halfword, and the first
            operand for the subtraction in the high halfword.
-    \param      val2    second operand for the addition in the high halfword, and the 
+    \param      val2    second operand for the addition in the high halfword, and the
            second operand for the subtraction in the low halfword.
-              
+
     \returns
-            \li the saturated addition of the low halfword of the first operand and the high 
+            \li the saturated addition of the low halfword of the first operand and the high
                 halfword of the second operand, in the low halfword of the return value.
-            \li the saturated subtraction of the low halfword of the second operand from the high 
+            \li the saturated subtraction of the low halfword of the second operand from the high
                 halfword of the first operand, in the high halfword of the return value.
     \par
-        The returned results are saturated to the 16-bit signed integer 
+        The returned results are saturated to the 16-bit signed integer
              range -2<sup>15</sup> \<= x \<= 2<sup>15</sup> - 1.
-        
+
     \par Operation:
         \code
    res[15:0]  = val1[15:0]  + val2[31:16]
-   res[31:16] = val1[31:16] - val2[15:0] 
+   res[31:16] = val1[31:16] - val2[15:0]
         \endcode
 */
 uint32_t __QSAX(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Dual 16-bit signed subtraction and addition with halved results
-    
-    \details    This function enables you to exchange the two halfwords of one operand, perform one 
+
+    \details    This function enables you to exchange the two halfwords of one operand, perform one
           signed 16-bit integer subtraction and one signed 16-bit addition, and halve the results.
-                
+
     \param      val1    first 16-bit operands.
     \param      val2    second 16-bit operands.
-              
+
     \returns
-            \li the halved addition of the low halfword in the first operand and the high halfword 
+            \li the halved addition of the low halfword in the first operand and the high halfword
                 in the second operand, in the low halfword of the return value.
-            \li the halved subtraction of the low halfword in the second operand from the high 
+            \li the halved subtraction of the low halfword in the second operand from the high
                 halfword in the first operand, in the high halfword of the return value.
-        
+
     \par Operation:
         \code
    res[15:0]  = (val1[15:0]  + val2[31:16]) >> 1
@@ -1079,89 +1079,89 @@ uint32_t __SHSAX(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      GE setting dual 16-bit unsigned subtract and add with exchange
-    
-    \details    This function enables you to exchange the halfwords of the second operand, subtract the 
+
+    \details    This function enables you to exchange the halfwords of the second operand, subtract the
           high halfwords and add the low halfwords.<br>
           The GE bits in the APSR are set according to the results.
-                
+
     \param      val1    first operand for the addition in the low halfword, and the first
               operand for the subtraction in the high halfword.
     \param      val2    second operand for the addition in the high halfword, and the
               second operand for the subtraction in the low halfword.
-              
+
     \returns
-            \li the addition of the low halfword in the first operand and the high halfword in the 
+            \li the addition of the low halfword in the first operand and the high halfword in the
                 second operand, in the low halfword of the return value.
-            \li the subtraction of the low halfword in the second operand from the high halfword 
+            \li the subtraction of the low halfword in the second operand from the high halfword
                 in the first operand, in the high halfword of the return value.
-    \par 
-        Each bit in APSR.GE is set or cleared for each byte in the return value, depending on 
+    \par
+        Each bit in APSR.GE is set or cleared for each byte in the return value, depending on
              the results of the operation.
-    \par 
+    \par
         If \em res is the return value, then:
             \li if res[15:0] \>= 0x10000 then APSR.GE[1:0] = 11 else 00
             \li if res[31:16] \>= 0 then APSR.GE[3:2] = 11 else 00
-        
+
     \par Operation:
         \code
    res[15:0]  = val1[15:0]  + val2[31:16]
-   res[31:16] = val1[31:16] - val2[15:0] 
+   res[31:16] = val1[31:16] - val2[15:0]
         \endcode
 */
 uint32_t __USAX(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Dual 16-bit unsigned saturating subtraction and addition with exchange
-    
-    \details    This function enables you to exchange the halfwords of the second operand and perform 
-          one unsigned 16-bit integer subtraction and one unsigned 16-bit addition, saturating the 
+
+    \details    This function enables you to exchange the halfwords of the second operand and perform
+          one unsigned 16-bit integer subtraction and one unsigned 16-bit addition, saturating the
           results to the 16-bit unsigned integer range 0 \<= x \<= 2<sup>16</sup> - 1.
-                
+
     \param      val1    first 16-bit operand for the addition in the low halfword, and the
               first 16-bit operand for the subtraction in the high halfword.
     \param      val2    second 16-bit halfword for the addition in the high halfword,
               and the second 16-bit halfword for the subtraction in the low halfword.
-              
+
     \returns
-            \li the addition of the low halfword in the first operand and the high halfword in the 
+            \li the addition of the low halfword in the first operand and the high halfword in the
                 second operand, in the low halfword of the return value.
-            \li the subtraction of the low halfword in the second operand from the high halfword 
+            \li the subtraction of the low halfword in the second operand from the high halfword
                 in the first operand, in the high halfword of the return value.
-    \par 
-        The results are saturated to the 16-bit unsigned integer 
+    \par
+        The results are saturated to the 16-bit unsigned integer
              range 0 \<= x \<= 2<sup>16</sup> - 1.
-        
+
     \par Operation:
         \code
    res[15:0]  = val1[15:0]  + val2[31:16]
-   res[31:16] = val1[31:16] - val2[15:0] 
+   res[31:16] = val1[31:16] - val2[15:0]
         \endcode
 */
 uint32_t __UQSAX(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Dual 16-bit unsigned subtraction and addition with halved results and exchange
-    
-    \details    This function enables you to exchange the halfwords of the second operand, subtract the 
+
+    \details    This function enables you to exchange the halfwords of the second operand, subtract the
           high halfwords and add the low halfwords, halving the results.
-                
+
     \param      val1    first operand for the addition in the low halfword, and the first
               operand for the subtraction in the high halfword.
     \param      val2    second operand for the addition in the high halfword, and the
               second operand for the subtraction in the low halfword.
-              
+
     \returns
-            \li the halved addition of the high halfword in the second operand and the low 
+            \li the halved addition of the high halfword in the second operand and the low
                 halfword in the first operand, in the low halfword of the return value.
-            \li the halved subtraction of the low halfword in the second operand from the high 
+            \li the halved subtraction of the low halfword in the second operand from the high
                 halfword in the first operand, in the high halfword of the return value.
-        
+
     \par Operation:
         \code
    res[15:0]  = (val1[15:0]  + val2[31:16]) >> 1
@@ -1172,29 +1172,29 @@ uint32_t __UHSAX(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Unsigned sum of quad 8-bit unsigned absolute difference
-    
-    \details    This function enables you to perform four unsigned 8-bit subtractions, and add the 
-          absolute values of the differences together, returning the result as a single unsigned 
+
+    \details    This function enables you to perform four unsigned 8-bit subtractions, and add the
+          absolute values of the differences together, returning the result as a single unsigned
           integer.
-                
+
     \param      val1    first four 8-bit operands for the subtractions.
     \param      val2    second four 8-bit operands for the subtractions.
-              
+
     \returns
-            \li the subtraction of the first byte in the second operand from the first byte in the 
+            \li the subtraction of the first byte in the second operand from the first byte in the
                 first operand.
-            \li the subtraction of the second byte in the second operand from the second byte in 
+            \li the subtraction of the second byte in the second operand from the second byte in
                 the first operand.
-            \li the subtraction of the third byte in the second operand from the third byte in the 
+            \li the subtraction of the third byte in the second operand from the third byte in the
                 first operand.
-            \li the subtraction of the fourth byte in the second operand from the fourth byte in 
+            \li the subtraction of the fourth byte in the second operand from the fourth byte in
                 the first operand.
     \par
         The sum is returned as a single unsigned integer.
-        
-    
+
+
     \par Operation:
         \code
    absdiff1  = val1[7:0]   - val2[7:0]
@@ -1208,29 +1208,29 @@ uint32_t __USAD8(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Unsigned sum of quad 8-bit unsigned absolute difference with 32-bit accumulate
-    
-    \details    This function enables you to perform four unsigned 8-bit subtractions, and add the 
+
+    \details    This function enables you to perform four unsigned 8-bit subtractions, and add the
           absolute values of the differences to a 32-bit accumulate operand.
-                
+
     \param      val1    first four 8-bit operands for the subtractions.
     \param      val2    second four 8-bit operands for the subtractions.
     \param      val3    accumulation value.
-              
+
     \returns
-        the sum of the absolute differences of the following 
+        the sum of the absolute differences of the following
             bytes, added to the accumulation value:
-            \li the subtraction of the first byte in the second operand from the first byte in the 
+            \li the subtraction of the first byte in the second operand from the first byte in the
                 first operand.
-            \li the subtraction of the second byte in the second operand from the second byte in 
+            \li the subtraction of the second byte in the second operand from the second byte in
                 the first operand.
-            \li the subtraction of the third byte in the second operand from the third byte in the 
+            \li the subtraction of the third byte in the second operand from the third byte in the
                 first operand.
-            \li the subtraction of the fourth byte in the second operand from the fourth byte in 
+            \li the subtraction of the fourth byte in the second operand from the fourth byte in
                 the first operand.
-        
-    
+
+
     \par Operation:
         \code
    absdiff1  = val1[7:0]   - val2[7:0]
@@ -1245,26 +1245,26 @@ uint32_t __USADA8(uint32_t val1, uint32_t val2, uint32_t val3);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Q setting dual 16-bit saturate
-    
+
     \details    This function enables you to saturate two signed 16-bit values to a selected signed range.<br>
           The Q bit is set if either operation saturates.
-                
+
     \param      val1    two signed 16-bit values to be saturated.
     \param      val2    bit position for saturation, an integral constant expression in the
               range 1 to 16.
 
-              
+
     \returns
-        the sum of the absolute differences of the following 
+        the sum of the absolute differences of the following
             bytes, added to the accumulation value:
-            \li the signed saturation of the low halfword in \em val1, saturated to the bit position 
+            \li the signed saturation of the low halfword in \em val1, saturated to the bit position
                 specified in \em val2 and returned in the low halfword of the return value.
-            \li the signed saturation of the high halfword in <i>val1</i>, saturated to the bit position 
+            \li the signed saturation of the high halfword in <i>val1</i>, saturated to the bit position
                 specified in <i>val2</i> and returned in the high halfword of the return value.
-        
-    
+
+
     \par Operation:
         \code
    Saturate halfwords in val1 to the signed range specified by the bit position in val2
@@ -1274,26 +1274,26 @@ uint32_t __SSAT16(uint32_t val1, const uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Q setting dual 16-bit unsigned saturate
-    
-    \details    This function enables you to saturate two signed 16-bit values to a selected unsigned 
+
+    \details    This function enables you to saturate two signed 16-bit values to a selected unsigned
          range.<br>
          The Q bit is set if either operation saturates.
-                
+
     \param      val1    two 16-bit values that are to be saturated.
-    \param      val2    bit position for saturation, and must be an integral constant 
+    \param      val2    bit position for saturation, and must be an integral constant
            expression in the range 0 to 15.
 
-              
+
     \returns
         the saturation of the two signed 16-bit values, as non-negative values.
-            \li the saturation of the low halfword in \em val1, saturated to the bit position 
+            \li the saturation of the low halfword in \em val1, saturated to the bit position
                 specified in \em val2 and returned in the low halfword of the return value.
-            \li the saturation of the high halfword in \em val1, saturated to the bit position 
+            \li the saturation of the high halfword in \em val1, saturated to the bit position
                 specified in \em val2 and returned in the high halfword of the return value.
-        
-    
+
+
     \par Operation:
         \code
    Saturate halfwords in val1 to the unsigned range specified by the bit position in val2
@@ -1303,21 +1303,21 @@ uint32_t __USAT16(uint32_t val1, const uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Dual extract 8-bits and zero-extend to 16-bits
-    
-    \details    This function enables you to extract two 8-bit values from an operand and zero-extend 
+
+    \details    This function enables you to extract two 8-bit values from an operand and zero-extend
           them to 16 bits each.
-                
+
     \param      val     two 8-bit values in val[7:0] and val[23:16] to be sign-extended.
 
-              
+
     \returns
         the 8-bit values zero-extended to 16-bit values.
             \li zero-extended value of val[7:0] in the low halfword of the return value.
             \li zero-extended value of val[23:16] in the high halfword of the return value.
-        
-    
+
+
     \par Operation:
         \code
    res[15:0]  = ZeroExtended(val[7:0]  )
@@ -1328,21 +1328,21 @@ uint32_t __UXTB16(uint32_t val);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Extracted 16-bit to 32-bit unsigned addition
-    
-    \details    This function enables you to extract two 8-bit values from one operand, zero-extend them 
+
+    \details    This function enables you to extract two 8-bit values from one operand, zero-extend them
           to 16 bits each, and add the results to two 16-bit values from another operand.
-                
-    \param      val1    value added to the zero-extended to 16-bit values.    
+
+    \param      val1    value added to the zero-extended to 16-bit values.
     \param      val2    two 8-bit values to be extracted and zero-extended.
 
-              
+
     \returns
-        the 8-bit values in \em val2, zero-extended to 16-bit values 
+        the 8-bit values in \em val2, zero-extended to 16-bit values
             and added to \em val1.
-        
-    
+
+
     \par Operation:
         \code
    res[15:0]  = ZeroExt(val2[7:0]   to 16 bits) + val1[15:0]
@@ -1353,22 +1353,22 @@ uint32_t __UXTAB16(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Dual extract 8-bits and sign extend each to 16-bits
-    
-    \details    This function enables you to extract two 8-bit values from an operand and sign-extend 
+
+    \details    This function enables you to extract two 8-bit values from an operand and sign-extend
           them to 16 bits each.
-                
-    \param      val     two 8-bit values in val[7:0] and val[23:16] to be sign-extended.  
+
+    \param      val     two 8-bit values in val[7:0] and val[23:16] to be sign-extended.
+
 
 
-              
     \returns
         the 8-bit values sign-extended to 16-bit values.
             \li     sign-extended value of val[7:0] in the low halfword of the return value.
             \li     sign-extended value of val[23:16] in the high halfword of the return value.
-        
-    
+
+
     \par Operation:
         \code
    res[15:0]  = SignExtended(val[7:0]
@@ -1379,22 +1379,24 @@ uint32_t __SXTB16(uint32_t val);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Rotate right, dual extract 8-bits and sign extend each to 16-bits
-    
-    \details    This function enables you to rotate an operand by 8/16/24 bit, extract two 8-bit values and sign-extend 
+
+    \details    This function enables you to rotate an operand by 8/16/24 bit, extract two 8-bit values and sign-extend
           them to 16 bits each.
-                
+
     \param      val     two 8-bit values in val[7:0] and val[23:16] to be sign-extended.
-    \param      rotate  number of bits to rotate val. Only 8,16 and 24 are accepted
+    \param      rotate  number of bits to rotate val. Constant rotate value of 8, 16 and 24 can be
+                	optimally used with a single __SXTB16 instruction. Any other valid constant rotate
+			value will result in use of two instructions, __ROR and __SXTB16
+
 
-              
     \returns
         the 8-bit values sign-extended to 16-bit values.
             \li     sign-extended value of val[7:0] in the low halfword of the return value.
             \li     sign-extended value of val[23:16] in the high halfword of the return value.
-        
-    
+
+
     \par Operation:
         \code
    val        = Rotate(val, rotate)
@@ -1406,23 +1408,23 @@ uint32_t __SXTB16_RORn(uint32_t val, uint32_r rotate);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Dual extracted 8-bit to 16-bit signed addition
-    
-    \details    This function enables you to extract two 8-bit values from the second operand (at bit 
-          positions [7:0] and [23:16]), sign-extend them to 16-bits each, and add the results to the 
+
+    \details    This function enables you to extract two 8-bit values from the second operand (at bit
+          positions [7:0] and [23:16]), sign-extend them to 16-bits each, and add the results to the
           first operand.
-                
-    \param      val1    values added to the zero-extended to 16-bit values. 
+
+    \param      val1    values added to the zero-extended to 16-bit values.
     \param      val2    two 8-bit values to be extracted and zero-extended.
 
 
-              
+
     \returns
-        the addition of \em val1 and \em val2, where the 8-bit values in 
+        the addition of \em val1 and \em val2, where the 8-bit values in
             val2[7:0] and val2[23:16] have been extracted and sign-extended prior to the addition.
-        
-    
+
+
     \par Operation:
         \code
    res[15:0]  = val1[15:0]  + SignExtended(val2[7:0])
@@ -1433,22 +1435,52 @@ uint32_t __SXTAB16(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
+    \brief      Rotate right, followed by sign extension of two 8-bits with add to 16-bits
+
+    \details    This function enables you to rotate the second operand by 8/16/24 bit as specified by the third
+          operand, extract two 8-bit values from the rotated result (at bit positions [7:0] and [23:16]),
+          sign-extend them to 16-bits each, and add the results to the first operand.
+
+    \param      val1    two 16-bit values in val1[15:0] and val1[31:16]
+    \param      val2    two 8-bit values in val[7:0] and val[23:16] to be sign-extended post rotation
+    \param      rotate  number of bits to rotate val2. Constant rotate value of 8, 16 and 24 can be
+                	optimally used with a single __SXTAB16 instruction. Any other valid constant rotate
+			value will result in use of two instructions, __ROR and __SXTAB16
+
+
+    \returns
+        the addition of \em val1 and \em val2, where the rotated 8-bit values in
+            val2[7:0] and val2[23:16] have been extracted and sign-extended prior to the addition.
+
+
+    \par Operation:
+        \code
+   val2       = Rotate(val2, rotate)
+   res[15:0]  = val1[15:0]  + SignExtended(val2[7:0])
+   res[31:16] = val1[31:16] + SignExtended(val2[23:16])
+        \endcode
+*/
+uint32_t __SXTAB16_RORn(uint32_t val1, uint32_t val2, uint32_r rotate);
+
+
+/**************************************************************************************************/
+/**
     \brief      Q setting sum of dual 16-bit signed multiply
-    
-    \details    This function enables you to perform two 16-bit signed multiplications, adding the 
+
+    \details    This function enables you to perform two 16-bit signed multiplications, adding the
           products together.<br>
           The Q bit is set if the addition overflows.
-                
+
     \param      val1    first 16-bit operands for each multiplication.
     \param      val2    second 16-bit operands for each multiplication.
 
 
-              
+
     \returns
         the sum of the products of the two 16-bit signed multiplications.
-        
-    
+
+
     \par Operation:
         \code
    p1 = val1[15:0]  * val2[15:0]
@@ -1460,23 +1492,23 @@ uint32_t __SMUAD(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Q setting sum of dual 16-bit signed multiply with exchange
-    
+
     \details    This function enables you to perform two 16-bit signed multiplications with exchanged
           halfwords of the second operand, adding the products together.<br>
           The Q bit is set if the addition overflows.
-                
+
     \param      val1    first 16-bit operands for each multiplication.
     \param      val2    second 16-bit operands for each multiplication.
 
 
-              
+
     \returns
         the sum of the products of the two 16-bit signed multiplications with exchanged
             halfwords of the second operand.
-        
-    
+
+
     \par Operation:
         \code
    p1 = val1[15:0]  * val2[31:16]
@@ -1488,9 +1520,9 @@ uint32_t __SMUADX(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      32-bit signed multiply with 32-bit truncated accumulator.
-    
+
     \details    This function enables you to perform a signed 32-bit multiplications, adding the most significant 32 bits
 	            of the 64-bit result to a 32-bit accumulate operand.<br>
 
@@ -1498,10 +1530,10 @@ uint32_t __SMUADX(uint32_t val1, uint32_t val2);
     \param      val2    second operand for multiplication.
     \param      val3    accumulate value.
 
-              
-    \returns    the product of multiplication (most significant 32 bits) is added to the accumulate 
+
+    \returns    the product of multiplication (most significant 32 bits) is added to the accumulate
             value, as a 32-bit integer.
-        
+
      \par Operation:
         \code
    p = val1 * val2
@@ -1512,23 +1544,23 @@ uint32_t __SMMLA (int32_t val1, int32_t val2, int32_t val3);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Q setting dual 16-bit signed multiply with single 32-bit accumulator
-    
-    \details    This function enables you to perform two signed 16-bit multiplications, adding both 
+
+    \details    This function enables you to perform two signed 16-bit multiplications, adding both
           results to a 32-bit accumulate operand.<br>
           The Q bit is set if the addition overflows. Overflow cannot occur during the multiplications.
-                
+
     \param      val1    first 16-bit operands for each multiplication.
     \param      val2    second 16-bit operands for each multiplication.
     \param      val3    accumulate value.
 
-              
+
     \returns
-        the product of each multiplication added to the accumulate 
+        the product of each multiplication added to the accumulate
             value, as a 32-bit integer.
-        
-    
+
+
     \par Operation:
         \code
    p1 = val1[15:0]  * val2[15:0]
@@ -1540,23 +1572,23 @@ uint32_t __SMLAD(uint32_t val1, uint32_t val2, uint32_t val3);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Q setting pre-exchanged dual 16-bit signed multiply with single 32-bit accumulator
-    
+
     \details    This function enables you to perform two signed 16-bit multiplications with exchanged
           halfwords of the second operand, adding both results to a 32-bit accumulate operand.<br>
           The Q bit is set if the addition overflows. Overflow cannot occur during the multiplications.
-                
+
     \param      val1    first 16-bit operands for each multiplication.
     \param      val2    second 16-bit operands for each multiplication.
     \param      val3    accumulate value.
 
-              
+
     \returns
         the product of each multiplication with exchanged
             halfwords of the second operand added to the accumulate value, as a 32-bit integer.
-        
-    
+
+
     \par Operation:
         \code
    p1 = val1[15:0]  * val2[31:16]
@@ -1568,23 +1600,23 @@ uint32_t __SMLADX(uint32_t val1, uint32_t val2, uint32_t val3);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Dual 16-bit signed multiply with single 64-bit accumulator
-    
-    \details    This function enables you to perform two signed 16-bit multiplications, adding both 
-          results to a 64-bit accumulate operand. Overflow is only possible as a result of the 64-bit 
-          addition. This overflow is not detected if it occurs. Instead, the result wraps around 
+
+    \details    This function enables you to perform two signed 16-bit multiplications, adding both
+          results to a 64-bit accumulate operand. Overflow is only possible as a result of the 64-bit
+          addition. This overflow is not detected if it occurs. Instead, the result wraps around
           modulo2<sup>64</sup>.
-                
+
     \param      val1    first 16-bit operands for each multiplication.
     \param      val2    second 16-bit operands for each multiplication.
     \param      val3    accumulate value.
 
-              
+
     \returns
         the product of each multiplication added to the accumulate value.
-        
-    
+
+
     \par Operation:
         \code
    p1 = val1[15:0]  * val2[15:0]
@@ -1598,23 +1630,23 @@ uint64_t __SMLALD(uint32_t val1, uint32_t val2, uint64_t val3);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Dual 16-bit signed multiply with exchange with single 64-bit accumulator
-    
-    \details    This function enables you to exchange the halfwords of the second operand, and perform 
-          two signed 16-bit multiplications, adding both results to a 64-bit accumulate operand. 
-          Overflow is only possible as a result of the 64-bit addition. This overflow is not detected 
+
+    \details    This function enables you to exchange the halfwords of the second operand, and perform
+          two signed 16-bit multiplications, adding both results to a 64-bit accumulate operand.
+          Overflow is only possible as a result of the 64-bit addition. This overflow is not detected
           if it occurs. Instead, the result wraps around modulo2<sup>64</sup>.
-                
+
     \param      val1    first 16-bit operands for each multiplication.
     \param      val2    second 16-bit operands for each multiplication.
     \param      val3    accumulate value.
 
-              
+
     \returns
         the product of each multiplication added to the accumulate value.
-        
-    
+
+
     \par Operation:
         \code
    p1 = val1[15:0]  * val2[31:16]
@@ -1628,21 +1660,21 @@ unsigned long long __SMLALDX(uint32_t val1, uint32_t val2, unsigned long long va
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Dual 16-bit signed multiply returning difference
-    
-    \details    This function enables you to perform two 16-bit signed multiplications, taking the 
-          difference of the products by subtracting the high halfword product from the low 
+
+    \details    This function enables you to perform two 16-bit signed multiplications, taking the
+          difference of the products by subtracting the high halfword product from the low
           halfword product.
-                
+
     \param      val1    first 16-bit operands for each multiplication.
     \param      val2    second 16-bit operands for each multiplication.
 
-              
+
     \returns
         the difference of the products of the two 16-bit signed multiplications.
-        
-    
+
+
     \par Operation:
         \code
    p1 = val1[15:0]  * val2[15:0]
@@ -1654,22 +1686,22 @@ uint32_t __SMUSD(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Dual 16-bit signed multiply with exchange returning difference
-    
-    \details    This function enables you to perform two 16-bit signed multiplications, subtracting one 
-          of the products from the other. The halfwords of the second operand are exchanged 
-          before performing the arithmetic. This produces top * bottom and bottom * top 
+
+    \details    This function enables you to perform two 16-bit signed multiplications, subtracting one
+          of the products from the other. The halfwords of the second operand are exchanged
+          before performing the arithmetic. This produces top * bottom and bottom * top
           multiplication.
-                
+
     \param      val1    first 16-bit operands for each multiplication.
     \param      val2    second 16-bit operands for each multiplication.
 
-              
+
     \returns
         the difference of the products of the two 16-bit signed multiplications.
-        
-    
+
+
     \par Operation:
         \code
    p1 = val1[15:0]  * val2[31:16]
@@ -1681,25 +1713,25 @@ uint32_t __SMUSDX(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Q setting dual 16-bit signed multiply subtract with 32-bit accumulate
-    
-    \details    This function enables you to perform two 16-bit signed multiplications, take the 
-          difference of the products, subtracting the high halfword product from the low halfword 
+
+    \details    This function enables you to perform two 16-bit signed multiplications, take the
+          difference of the products, subtracting the high halfword product from the low halfword
           product, and add the difference to a 32-bit accumulate operand.<br>
-          The Q bit is set if the accumulation overflows. Overflow cannot occur during the multiplications or the 
+          The Q bit is set if the accumulation overflows. Overflow cannot occur during the multiplications or the
           subtraction.
-                
+
     \param      val1    first 16-bit operands for each multiplication.
     \param      val2    second 16-bit operands for each multiplication.
     \param      val3    accumulate value.
 
-              
+
     \returns
-        the difference of the product of each multiplication, added 
+        the difference of the product of each multiplication, added
             to the accumulate value.
-        
-    
+
+
     \par Operation:
         \code
    p1 = val1[15:0]  * val2[15:0]
@@ -1711,24 +1743,24 @@ uint32_t __SMLSD(uint32_t val1, uint32_t val2, uint32_t val3);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Q setting dual 16-bit signed multiply with exchange subtract with 32-bit accumulate
-    
-    \details    This function enables you to exchange the halfwords in the second operand, then perform 
-          two 16-bit signed multiplications. The difference of the products is added to a 32-bit 
+
+    \details    This function enables you to exchange the halfwords in the second operand, then perform
+          two 16-bit signed multiplications. The difference of the products is added to a 32-bit
           accumulate operand.<br>
           The Q bit is set if the addition overflows. Overflow cannot occur during the multiplications or the subtraction.
-                
+
     \param      val1    first 16-bit operands for each multiplication.
     \param      val2    second 16-bit operands for each multiplication.
     \param      val3    accumulate value.
 
-              
+
     \returns
-        the difference of the product of each multiplication, added 
+        the difference of the product of each multiplication, added
             to the accumulate value.
-        
-    
+
+
     \par Operation:
         \code
    p1 = val1[15:0]  * val2[31:16]
@@ -1740,26 +1772,26 @@ uint32_t __SMLSDX(uint32_t val1, uint32_t val2, uint32_t val3);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Q setting dual 16-bit signed multiply subtract with 64-bit accumulate
-    
-    \details    This function It enables you to perform two 16-bit signed multiplications, take the 
-          difference of the products, subtracting the high halfword product from the low halfword 
-          product, and add the difference to a 64-bit accumulate operand. Overflow cannot occur 
-          during the multiplications or the subtraction. Overflow can occur as a result of the 64-bit 
-          addition, and this overflow is not detected. Instead, the result wraps round to  
+
+    \details    This function It enables you to perform two 16-bit signed multiplications, take the
+          difference of the products, subtracting the high halfword product from the low halfword
+          product, and add the difference to a 64-bit accumulate operand. Overflow cannot occur
+          during the multiplications or the subtraction. Overflow can occur as a result of the 64-bit
+          addition, and this overflow is not detected. Instead, the result wraps round to
           modulo2<sup>64</sup>.
-                
+
     \param      val1    first 16-bit operands for each multiplication.
     \param      val2    second 16-bit operands for each multiplication.
     \param      val3    accumulate value.
 
-              
+
     \returns
-        the difference of the product of each multiplication, 
+        the difference of the product of each multiplication,
             added to the accumulate value.
-        
-    
+
+
     \par Operation:
         \code
    p1 = val1[15:0]  * val2[15:0]
@@ -1771,25 +1803,25 @@ uint64_t __SMLSLD(uint32_t val1, uint32_t val2, uint64_t val3);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Q setting dual 16-bit signed multiply with exchange subtract with 64-bit accumulate
-    
-    \details    This function enables you to exchange the halfwords of the second operand, perform two 
-          16-bit multiplications, adding the difference of the products to a 64-bit accumulate 
-          operand. Overflow cannot occur during the multiplications or the subtraction. Overflow 
-          can occur as a result of the 64-bit addition, and this overflow is not detected. Instead, 
+
+    \details    This function enables you to exchange the halfwords of the second operand, perform two
+          16-bit multiplications, adding the difference of the products to a 64-bit accumulate
+          operand. Overflow cannot occur during the multiplications or the subtraction. Overflow
+          can occur as a result of the 64-bit addition, and this overflow is not detected. Instead,
           the result wraps round to modulo2<sup>64</sup>.
-                
+
     \param      val1    first 16-bit operands for each multiplication.
     \param      val2    second 16-bit operands for each multiplication.
     \param      val3    accumulate value.
 
-              
+
     \returns
-        the difference of the product of each multiplication, 
+        the difference of the product of each multiplication,
             added to the accumulate value.
-        
-    
+
+
     \par Operation:
         \code
    p1 = val1[15:0]  * val2[31:16]
@@ -1801,48 +1833,48 @@ unsigned long long __SMLSLDX(uint32_t val1, uint32_t val2, unsigned long long va
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Select bytes based on GE bits
-    
-    \details    This function inserts a SEL instruction into the instruction stream generated by the 
-          compiler. It enables you to select bytes from the input parameters, whereby the bytes 
-          that are selected depend upon the results of previous SIMD instruction function. The 
-          results of previous SIMD instruction function are represented by the Greater than or 
+
+    \details    This function inserts a SEL instruction into the instruction stream generated by the
+          compiler. It enables you to select bytes from the input parameters, whereby the bytes
+          that are selected depend upon the results of previous SIMD instruction function. The
+          results of previous SIMD instruction function are represented by the Greater than or
           Equal flags in the Application Program Status Register (APSR).
-          The __SEL function works equally well on both halfword and byte operand function 
-          results. This is because halfword operand operations set two (duplicate) GE bits per 
+          The __SEL function works equally well on both halfword and byte operand function
+          results. This is because halfword operand operations set two (duplicate) GE bits per
           value.
-                
+
     \param      val1    four selectable 8-bit values.
     \param      val2    four selectable 8-bit values.
 
-              
+
     \returns
-        The function selects bytes from the input parameters and returns them in the 
+        The function selects bytes from the input parameters and returns them in the
             return value, res, according to the following criteria:
             \li if APSR.GE[0] == 1 then res[7:0] = val1[7:0] else res[7:0] = val2[7:0]
             \li if APSR.GE[1] == 1 then res[15:8] = val1[15:8] else res[15:8] = val2[15:8]
             \li if APSR.GE[2] == 1 then res[23:16] = val1[23:16] else res[23:16] = val2[23:16]
             \li if APSR.GE[3] == 1 then res[31;24] = val1[31:24] else res = val2[31:24]
-  
+
 */
 uint32_t __SEL(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Q setting saturating add
-    
+
     \details    This function enables you to obtain the saturating add of two integers.<br>
          The Q bit is set if the operation saturates.
-                
+
     \param      val1    first summand of the saturating add operation.
     \param      val2    second summand of the saturating add operation.
 
-              
+
     \returns
         the saturating addition of val1 and val2.
-  
+
     \par Operation:
         \code
    res[31:0] = SAT(val1 + SAT(val2))
@@ -1852,19 +1884,19 @@ uint32_t __QADD(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
+/**
     \brief      Q setting saturating subtract
-    
+
     \details    This function enables you to obtain the saturating subtraction of two integers.<br>
          The Q bit is set if the operation saturates.
-                
+
     \param      val1    minuend of the saturating subtraction operation.
     \param      val2    subtrahend of the saturating subtraction operation.
 
-              
+
     \returns
         the saturating subtraction of val1 and val2.
-  
+
     \par Operation:
         \code
    res[31:0] = SAT(val1 - SAT(val2))
@@ -1874,52 +1906,52 @@ uint32_t __QSUB(uint32_t val1, uint32_t val2);
 
 
 /**************************************************************************************************/
-/** 
-    \brief      Halfword packing instruction. Combines bits[15:0] of <i>val1</i> 
+/**
+    \brief      Halfword packing instruction. Combines bits[15:0] of <i>val1</i>
                 with bits[31:16] of <i>val2</i> levitated with the <i>val3</i>.
-    
-    \details    Combine a halfword from one register with a halfword from another register. 
-                The second argument can be left-shifted before extraction of the halfword. The registers 
+
+    \details    Combine a halfword from one register with a halfword from another register.
+                The second argument can be left-shifted before extraction of the halfword. The registers
                 PC and SP are not allowed as arguments. This instruction does not change the flags.
-                
+
     \param      val1    first 16-bit operands
     \param      val2    second 16-bit operands
     \param      val3    value for left-shifting <i>val2</i>. Value range [0..31].
 
-              
+
     \returns
         the combination of halfwords.
-  
+
     \par Operation:
         \code
    res[15:0]  = val1[15:0]
-   res[31:16] = val2[31:16]<<val3 
+   res[31:16] = val2[31:16]<<val3
         \endcode
 */
 uint32_t __PKHBT(uint32_t val1, uint32_t val2, uint32_t val3);
 
 
 /**************************************************************************************************/
-/** 
-    \brief      Halfword packing instruction. Combines bits[31:16] of <i>val1</i> 
+/**
+    \brief      Halfword packing instruction. Combines bits[31:16] of <i>val1</i>
                 with bits[15:0] of <i>val2</i> right-shifted with the <i>val3</i>.
-    
-    \details    Combines a halfword from one register with a halfword from another register. 
-                The second argument can be right-shifted before extraction of the halfword. The registers 
+
+    \details    Combines a halfword from one register with a halfword from another register.
+                The second argument can be right-shifted before extraction of the halfword. The registers
                 PC and SP are not allowed as arguments. This instruction does not change the flags.
-                
+
     \param      val1    second 16-bit operands
     \param      val2    first 16-bit operands
     \param      val3    value for right-shifting <i>val2</i>. Value range [1..32].
 
-              
+
     \returns
         the combination of halfwords.
-  
+
     \par Operation:
         \code
    res[15:0]  = val2[15:0]>>val3
-   res[31:16] = val1[31:16] 
+   res[31:16] = val1[31:16]
         \endcode
 */
 uint32_t __PKHTB(uint32_t val1, uint32_t val2, uint32_t val3);