-
Notifications
You must be signed in to change notification settings - Fork 20
/
Copy pathGMS_32f_exp_32f.h
125 lines (97 loc) · 4.79 KB
/
GMS_32f_exp_32f.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#ifndef __GMS_32F_EXP_32F_H__
#define __GMS_32F_EXP_32F_H__
/*
Based on VOLK project.
Modified by Bernard Gingold on:
Date: 09-10-2021 13:34AM +00200
contact: beniekg@gmail.com
Few modification were added to original
implementation (ICC pragmas, alignment directives and code layout rescheduled,
unrolling completely 2-iteration for-loops)
*/
/*
* Copyright 2018 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
* GNU Radio is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3, or (at your option)
* any later version.
*
* GNU Radio is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Radio; see the file COPYING. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street,
* Boston, MA 02110-1301, USA.
*/
/* SIMD (SSE4) implementation of exp
Inspired by Intel Approximate Math library, and based on the
corresponding algorithms of the cephes math library
*/
/* Copyright (C) 2007 Julien Pommier
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
(this is the zlib license)
*/
#include <immintrin.h>
#include <cstdint>
#include "GMS_config.h"
#if !defined(DSP_32F_EXP_32F_BLOCK)
#define DSP_32F_EXP_32F_BLOCK \
const __m256 one = _mm256_set1_ps(1.0F); \
const __m256 exp_hi = _mm256_set1_ps(88.3762626647949F); \
const __m256 exp_lo = _mm256_set1_ps(-88.3762626647949F); \
const __m256 log2EF = _mm256_set1_ps(1.44269504088896341F); \
const __m256 half = _mm256_set1_ps(0.5F); \
const __m256 exp_C1 = _mm256_set1_ps(0.693359375F); \
const __m256 exp_C2 = _mm256_set1_ps(-2.12194440e-4F); \
const __m256i pi32_0x7f = _mm256_set1_epi32(0x7f); \
const __m256 exp_p0 = _mm256_set1_ps(1.9875691500e-4F); \
const __m256 exp_p1 = _mm256_set1_ps(1.3981999507e-3F); \
const __m256 exp_p2 = _mm256_set1_ps(8.3334519073e-3F); \
const __m256 exp_p3 = _mm256_set1_ps(4.1665795894e-2F); \
const __m256 exp_p4 = _mm256_set1_ps(1.6666665459e-1F); \
const __m256 exp_p5 = _mm256_set1_ps(5.0000001201e-1F); \
__m256 aVal = _mm256_setzero_ps(); \
__m256 bVal = aVal; \
__m256 tmp = aVal; \
__m256 fx = aVal; \
__m256 mask = aVal; \
__m256 pow2n = aVal; \
__m256 z = aVal; \
__m256 y = aVal; \
__m256i emm0; \
int32_ idx = 0; \
const int32_t len = npoints / 8;
#endif
__ATTR_HOT__
__ATTR_ALIGN__(32)
void dsp_32f_exp_32f_u_avx_looped(float * __restrict b,
const float * __restrict a,
const int32_t npoints);
__ATTR_HOT__
__ATTR_ALIGN__(32)
void dsp_32f_exp_32f_a_avx_looped(float * __restrict __ATTR_ALIGN__(32) b,
const float * __restrict __ATTR_ALIGN__(32) a,
const int32_t npoints);
__ATTR_HOT__
__ATTR_ALIGN__(32)
__ATTR_VECTORCALL__
__m256 dsp_32f_exp_32f_avx(const __m256 v);
#endif /*__GMS_32F_EXP_32F_H__*/