Skip to content

Commit

Permalink
CMSIS-DSP: Added Helium version of quaternion functions.
Browse files Browse the repository at this point in the history
  • Loading branch information
christophe0606 committed Feb 17, 2021
1 parent af1c54b commit 152a92d
Show file tree
Hide file tree
Showing 12 changed files with 622 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,74 @@
</pre>
Rotation matrix is saved in row order : R00 R01 R02 R10 R11 R12 R20 R21 R22
*/

#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)

#include "arm_helium_utils.h"

void arm_quaternion2rotation_f32(const float32_t *pInputQuaternions,
float32_t *pOutputRotations,
uint32_t nbQuaternions)
{
f32x4_t vec0,vec1, vec2 ,vec3;
float32_t q2q3, tmp1, tmp2 ;

for(uint32_t nb=0; nb < nbQuaternions; nb++)
{

// q0 q1 q2 q3
vec0 = vld1q(pInputQuaternions);

// q0^2 q1^2 q2^2 q3^2
vec1 = vmulq(vec0,vec0);

// q0^2 q1q0 q2q0 q3q0
vec2 = vmulq_n_f32(vec0, vgetq_lane(vec0,0));

// 2 (q0^2 q1q0 q2q0 q3q0)
vec2 = vmulq_n_f32(vec2, 2.0f);


// 2 q2q3
q2q3 = vgetq_lane(vec0,2) * vgetq_lane(vec0,3);
q2q3 = q2q3 * 2.0f;

// 2 (q0q1 q1^2 q2q1 q3q1)
vec3 = vmulq_n_f32(vec0, vgetq_lane(vec0,1));
vec3 = vmulq_n_f32(vec3, 2.0f);



vec0 = vsetq_lane(vgetq_lane(vec1,0) + vgetq_lane(vec1,1),vec0,0);
vec0 = vsetq_lane(vgetq_lane(vec0,0) - vgetq_lane(vec1,2),vec0,0);
vec0 = vsetq_lane(vgetq_lane(vec0,0) - vgetq_lane(vec1,3),vec0,0);
vec0 = vsetq_lane(vgetq_lane(vec3,2) - vgetq_lane(vec2,3),vec0,1);
vec0 = vsetq_lane(vgetq_lane(vec3,3) + vgetq_lane(vec2,2),vec0,2);
vec0 = vsetq_lane(vgetq_lane(vec3,2) + vgetq_lane(vec2,3),vec0,3);

vst1q(pOutputRotations, vec0);
pOutputRotations += 4;

tmp1 = vgetq_lane(vec1,0) - vgetq_lane(vec1,1);
tmp2 = vgetq_lane(vec1,2) - vgetq_lane(vec1,3);


vec0 = vsetq_lane(tmp1 + tmp2,vec0,0);
vec0 = vsetq_lane(q2q3 - vgetq_lane(vec2,1) ,vec0,1);
vec0 = vsetq_lane(vgetq_lane(vec3,3) - vgetq_lane(vec2,2),vec0,2);
vec0 = vsetq_lane(q2q3 + vgetq_lane(vec2,1) ,vec0,3);

vst1q(pOutputRotations, vec0);
pOutputRotations += 4;

*pOutputRotations = tmp1 - tmp2;
pOutputRotations ++;

pInputQuaternions += 4;
}
}

#else
void arm_quaternion2rotation_f32(const float32_t *pInputQuaternions,
float32_t *pOutputRotations,
uint32_t nbQuaternions)
Expand Down Expand Up @@ -103,6 +171,7 @@ void arm_quaternion2rotation_f32(const float32_t *pInputQuaternions,
pOutputRotations[6 + nb * 9] = zx; pOutputRotations[7 + nb * 9] = zy; pOutputRotations[8 + nb * 9] = zz;
}
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */

/**
@} end of QuatRot group
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,32 @@
@param[in] nbQuaternions number of quaternions in each vector
@return none
*/

#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)

#include "arm_helium_utils.h"
void arm_quaternion_conjugate_f32(const float32_t *pInputQuaternions,
float32_t *pConjugateQuaternions,
uint32_t nbQuaternions)
{
f32x4_t vec1;

for(uint32_t i=0; i < nbQuaternions; i++)
{
vec1 = vld1q(pInputQuaternions);


vec1 = vsetq_lane_f32(-vgetq_lane(vec1, 0),vec1,0);
vec1 = vnegq_f32(vec1);

vst1q(pConjugateQuaternions, vec1);


pInputQuaternions += 4;
pConjugateQuaternions += 4;
}
}
#else
void arm_quaternion_conjugate_f32(const float32_t *pInputQuaternions,
float32_t *pConjugateQuaternions,
uint32_t nbQuaternions)
Expand All @@ -62,6 +88,7 @@ void arm_quaternion_conjugate_f32(const float32_t *pInputQuaternions,
pConjugateQuaternions[4 * i + 3] = -pInputQuaternions[4 * i + 3];
}
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */

/**
@} end of QuatConjugate group
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,39 @@
*/


#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)

#include "arm_helium_utils.h"

void arm_quaternion_inverse_f32(const float32_t *pInputQuaternions,
float32_t *pInverseQuaternions,
uint32_t nbQuaternions)
{
f32x4_t vec1,vec2;
float32_t squaredSum;

for(uint32_t i=0; i < nbQuaternions; i++)
{

vec1 = vld1q(pInputQuaternions);
vec2 = vmulq(vec1,vec1);
squaredSum = vecAddAcrossF32Mve(vec2);


vec1 = vmulq_n_f32(vec1, 1.0f / squaredSum);
vec1 = vsetq_lane_f32(-vgetq_lane(vec1, 0),vec1,0);
vec1 = vnegq_f32(vec1);

vst1q(pInverseQuaternions, vec1);


pInputQuaternions += 4;
pInverseQuaternions += 4;

}
}

#else
void arm_quaternion_inverse_f32(const float32_t *pInputQuaternions,
float32_t *pInverseQuaternions,
uint32_t nbQuaternions)
Expand All @@ -72,6 +104,7 @@ void arm_quaternion_inverse_f32(const float32_t *pInputQuaternions,
pInverseQuaternions[4 * i + 3] = -pInputQuaternions[4 * i + 3] / temp;
}
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */

/**
@} end of QuatInverse group
Expand Down
26 changes: 26 additions & 0 deletions CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_norm_f32.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,31 @@
*/


#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)

#include "arm_helium_utils.h"

void arm_quaternion_norm_f32(const float32_t *pInputQuaternions,
float32_t *pNorms,
uint32_t nbQuaternions)
{
f32x4_t vec1;
float32_t squaredSum;

for(uint32_t i=0; i < nbQuaternions; i++)
{
vec1 = vld1q(pInputQuaternions);
vec1 = vmulq(vec1,vec1);
squaredSum = vecAddAcrossF32Mve(vec1);
arm_sqrt_f32(squaredSum,pNorms);

pInputQuaternions+= 4;
pNorms ++;
}

}

#else

void arm_quaternion_norm_f32(const float32_t *pInputQuaternions,
float32_t *pNorms,
Expand All @@ -67,6 +92,7 @@ void arm_quaternion_norm_f32(const float32_t *pInputQuaternions,
pNorms[i] = sqrtf(temp);
}
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */

/**
@} end of QuatNorm group
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,34 @@
@param[in] nbQuaternions number of quaternions in each vector
@return none
*/

#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)

#include "arm_helium_utils.h"

void arm_quaternion_normalize_f32(const float32_t *pInputQuaternions,
float32_t *pNormalizedQuaternions,
uint32_t nbQuaternions)
{
f32x4_t vec1,vec2;
float32_t squaredSum,norm;

for(uint32_t i=0; i < nbQuaternions; i++)
{
vec1 = vld1q(pInputQuaternions);
vec2 = vmulq(vec1,vec1);
squaredSum = vecAddAcrossF32Mve(vec2);
arm_sqrt_f32(squaredSum,&norm);
vec1 = vmulq_n_f32(vec1, 1.0f / norm);
vst1q(pNormalizedQuaternions, vec1);

pInputQuaternions += 4;
pNormalizedQuaternions += 4;

}
}

#else
void arm_quaternion_normalize_f32(const float32_t *pInputQuaternions,
float32_t *pNormalizedQuaternions,
uint32_t nbQuaternions)
Expand All @@ -69,6 +97,7 @@ void arm_quaternion_normalize_f32(const float32_t *pInputQuaternions,
pNormalizedQuaternions[4 * i + 3] = pInputQuaternions[4 * i + 3] / temp;
}
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */

/**
@} end of QuatNormalized group
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,75 @@
@brief Floating-point elementwise product two quaternions.
@param[in] qa first array of quaternions
@param[in] qb second array of quaternions
@param[out] r elementwise product of quaternions
@param[out] qr elementwise product of quaternions
@param[in] nbQuaternions number of quaternions in the array
@return none
*/

#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)

#include "arm_helium_utils.h"

void arm_quaternion_product_f32(const float32_t *qa,
const float32_t *qb,
float32_t *qr,
uint32_t nbQuaternions)
{
static uint32_t patternA[4] = { 0, 1, 0, 1 };
static uint32_t patternB[4] = { 3, 2, 3, 2 };
static uint32_t patternC[4] = { 3, 2, 1, 0 };
static float32_t signA[4] = { -1, -1, 1, 1 };

uint32x4_t vecA = vld1q_u32(patternA);
uint32x4_t vecB = vld1q_u32(patternB);
uint32x4_t vecC = vld1q_u32(patternC);
f32x4_t vecSignA = vld1q_f32(signA);

while (nbQuaternions > 0U)
{
f32x4_t vecTmpA, vecTmpB, vecAcc;

vecTmpA = vldrwq_gather_shifted_offset_f32(qa, vecA);
vecTmpB = vld1q(qb);
/*
* vcmul(r, [a1, a2, a1, a2], [b1, b2, b3, b4], 0)
*/
vecAcc = vcmulq(vecTmpA, vecTmpB);
/*
* vcmla(r, [a1, a2, a1, a2], [b1, b2, b3, b4], 90)
*/
vecAcc = vcmlaq_rot90(vecAcc, vecTmpA, vecTmpB);

vecTmpA = vldrwq_gather_shifted_offset_f32(qa, vecB);
vecTmpB = vldrwq_gather_shifted_offset_f32(qb, vecC);
/*
* build [-b4, -b3, b2, b1]
*/
vecTmpB = vecTmpB * vecSignA;
/*
* vcmla(r, [a4, a3, a4, a3], [-b4, -b3, b2, b1], 270)
*/
vecAcc = vcmlaq_rot270(vecAcc, vecTmpA, vecTmpB);
/*
* vcmla(r, [a4, a3, a4, a3], [-b4, -b3, b2, b1], 0)
*/
vecAcc = vcmlaq(vecAcc, vecTmpA, vecTmpB);
/*
* store accumulator
*/
vst1q_f32(qr, vecAcc);

/* move to next quaternion */
qa += 4;
qb += 4;
qr += 4;

nbQuaternions--;
}
}

#else

void arm_quaternion_product_f32(const float32_t *qa,
const float32_t *qb,
float32_t *r,
Expand All @@ -64,6 +129,7 @@ void arm_quaternion_product_f32(const float32_t *qa,
r += 4;
}
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */

/**
@} end of QuatProd group
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,48 @@
@brief Floating-point product of two quaternions.
@param[in] qa first quaternion
@param[in] qb second quaternion
@param[out] r product of two quaternions
@param[out] qr product of two quaternions
@return none
*/

#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)

#include "arm_helium_utils.h"
void arm_quaternion_product_single_f32(const float32_t *qa,
const float32_t *qb,
float32_t *qr)
{
static uint32_t patternA[4] = { 0, 1, 0, 1 };
static uint32_t patternB[4] = { 3, 2, 3, 2 };
static uint32_t patternC[4] = { 3, 2, 1, 0 };
static float32_t signA[4] = { -1, -1, 1, 1 };

uint32x4_t vecA = vld1q_u32(patternA);
uint32x4_t vecB = vld1q_u32(patternB);
uint32x4_t vecC = vld1q_u32(patternC);
f32x4_t vecSignA = vld1q_f32(signA);


f32x4_t vecTmpA, vecTmpB, vecAcc;

vecTmpA = vldrwq_gather_shifted_offset_f32(qa, vecA);
vecTmpB = vld1q_f32(qb);

vecAcc = vcmulq_f32(vecTmpA, vecTmpB);
vecAcc = vcmlaq_rot90_f32(vecAcc, vecTmpA, vecTmpB);

vecTmpA = vldrwq_gather_shifted_offset_f32(qa, vecB);
vecTmpB = vldrwq_gather_shifted_offset_f32(qb, vecC);

vecTmpB = vecTmpB * vecSignA;

vecAcc = vcmlaq_rot270_f32(vecAcc, vecTmpA, vecTmpB);
vecAcc = vcmlaq_f32(vecAcc, vecTmpA, vecTmpB);

vst1q_f32(qr, vecAcc);
}

#else
void arm_quaternion_product_single_f32(const float32_t *qa,
const float32_t *qb,
float32_t *r)
Expand All @@ -58,6 +97,7 @@ void arm_quaternion_product_single_f32(const float32_t *qa,
r[2] = qa[0] * qb[2] + qa[2] * qb[0] + qa[3] * qb[1] - qa[1] * qb[3];
r[3] = qa[0] * qb[3] + qa[3] * qb[0] + qa[1] * qb[2] - qa[2] * qb[1];
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */

/**
@} end of QuatProdSingle group
Expand Down
Loading

0 comments on commit 152a92d

Please sign in to comment.