GNU Radio 3.7.2.1-77 C++ API
volk_32f_invsqrt_32f.h
Go to the documentation of this file.
1 #ifndef INCLUDED_volk_32f_invsqrt_32f_a_H
2 #define INCLUDED_volk_32f_invsqrt_32f_a_H
3 
4 #include <inttypes.h>
5 #include <stdio.h>
6 #include <math.h>
7 #include <string.h>
8 
9 static inline float Q_rsqrt( float number )
10 {
11  float x2;
12  const float threehalfs = 1.5F;
13  union f32_to_i32 {
14  int32_t i;
15  float f;
16  } u;
17 
18  x2 = number * 0.5F;
19  u.f = number;
20  u.i = 0x5f3759df - ( u.i >> 1 ); // what the fuck?
21  u.f = u.f * ( threehalfs - ( x2 * u.f * u.f ) ); // 1st iteration
22  //u.f = u.f * ( threehalfs - ( x2 * u.f * u.f ) ); // 2nd iteration, this can be removed
23 
24  return u.f;
25 }
26 
27 #ifdef LV_HAVE_SSE
28 #include <xmmintrin.h>
29 /*!
30  \brief Sqrts the two input vectors and store their results in the third vector
31  \param cVector The vector where the results will be stored
32  \param aVector One of the vectors to be invsqrted
33  \param num_points The number of values in aVector and bVector to be invsqrted together and stored into cVector
34 */
35 static inline void volk_32f_invsqrt_32f_a_sse(float* cVector, const float* aVector, unsigned int num_points){
36  unsigned int number = 0;
37  const unsigned int quarterPoints = num_points / 4;
38 
39  float* cPtr = cVector;
40  const float* aPtr = aVector;
41 
42  __m128 aVal, cVal;
43  for(;number < quarterPoints; number++){
44 
45  aVal = _mm_load_ps(aPtr);
46 
47  cVal = _mm_rsqrt_ps(aVal);
48 
49  _mm_store_ps(cPtr,cVal); // Store the results back into the C container
50 
51  aPtr += 4;
52  cPtr += 4;
53  }
54 
55  number = quarterPoints * 4;
56  for(;number < num_points; number++){
57  *cPtr++ = Q_rsqrt(*aPtr++);
58  }
59 }
60 #endif /* LV_HAVE_SSE */
61 
62 #ifdef LV_HAVE_GENERIC
63 /*!
64  \brief Sqrts the two input vectors and store their results in the third vector
65  \param cVector The vector where the results will be stored
66  \param aVector One of the vectors to be invsqrted
67  \param num_points The number of values in aVector and bVector to be invsqrted together and stored into cVector
68 */
69 static inline void volk_32f_invsqrt_32f_generic(float* cVector, const float* aVector, unsigned int num_points){
70  float* cPtr = cVector;
71  const float* aPtr = aVector;
72  unsigned int number = 0;
73  for(number = 0; number < num_points; number++){
74  *cPtr++ = Q_rsqrt(*aPtr++);
75  }
76 }
77 #endif /* LV_HAVE_GENERIC */
78 
79 #endif /* INCLUDED_volk_32f_invsqrt_32f_a_H */