GNU Radio 3.6.3.1 C++ API
volk_32f_s32f_convert_8i_u.h
Go to the documentation of this file.
1 #ifndef INCLUDED_volk_32f_s32f_convert_8i_u_H
2 #define INCLUDED_volk_32f_s32f_convert_8i_u_H
3 
4 #include <inttypes.h>
5 #include <stdio.h>
6 
7 #ifdef LV_HAVE_SSE2
8 #include <emmintrin.h>
9  /*!
10  \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 8 bit integer value
11  \param inputVector The floating point input data buffer
12  \param outputVector The 8 bit output data buffer
13  \param scalar The value multiplied against each point in the input buffer
14  \param num_points The number of data values to be converted
15  \note Input buffer does NOT need to be properly aligned
16  */
17 static inline void volk_32f_s32f_convert_8i_u_sse2(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
18  unsigned int number = 0;
19 
20  const unsigned int sixteenthPoints = num_points / 16;
21 
22  const float* inputVectorPtr = (const float*)inputVector;
23  int8_t* outputVectorPtr = outputVector;
24 
25  float min_val = -128;
26  float max_val = 127;
27  float r;
28 
29  __m128 vScalar = _mm_set_ps1(scalar);
30  __m128 inputVal1, inputVal2, inputVal3, inputVal4;
31  __m128i intInputVal1, intInputVal2, intInputVal3, intInputVal4;
32  __m128 vmin_val = _mm_set_ps1(min_val);
33  __m128 vmax_val = _mm_set_ps1(max_val);
34 
35  for(;number < sixteenthPoints; number++){
36  inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
37  inputVal2 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
38  inputVal3 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
39  inputVal4 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
40 
41  inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
42  inputVal2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
43  inputVal3 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal3, vScalar), vmax_val), vmin_val);
44  inputVal4 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal4, vScalar), vmax_val), vmin_val);
45 
46  intInputVal1 = _mm_cvtps_epi32(inputVal1);
47  intInputVal2 = _mm_cvtps_epi32(inputVal2);
48  intInputVal3 = _mm_cvtps_epi32(inputVal3);
49  intInputVal4 = _mm_cvtps_epi32(inputVal4);
50 
51  intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
52  intInputVal3 = _mm_packs_epi32(intInputVal3, intInputVal4);
53 
54  intInputVal1 = _mm_packs_epi16(intInputVal1, intInputVal3);
55 
56  _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
57  outputVectorPtr += 16;
58  }
59 
60  number = sixteenthPoints * 16;
61  for(; number < num_points; number++){
62  r = inputVector[number] * scalar;
63  if(r > max_val)
64  r = max_val;
65  else if(r < min_val)
66  r = min_val;
67  outputVector[number] = (int16_t)(r);
68  }
69 }
70 #endif /* LV_HAVE_SSE2 */
71 
72 #ifdef LV_HAVE_SSE
73 #include <xmmintrin.h>
74  /*!
75  \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 8 bit integer value
76  \param inputVector The floating point input data buffer
77  \param outputVector The 8 bit output data buffer
78  \param scalar The value multiplied against each point in the input buffer
79  \param num_points The number of data values to be converted
80  \note Input buffer does NOT need to be properly aligned
81  */
82 static inline void volk_32f_s32f_convert_8i_u_sse(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
83  unsigned int number = 0;
84 
85  const unsigned int quarterPoints = num_points / 4;
86 
87  const float* inputVectorPtr = (const float*)inputVector;
88  int8_t* outputVectorPtr = outputVector;
89 
90  float min_val = -128;
91  float max_val = 127;
92  float r;
93 
94  __m128 vScalar = _mm_set_ps1(scalar);
95  __m128 ret;
96  __m128 vmin_val = _mm_set_ps1(min_val);
97  __m128 vmax_val = _mm_set_ps1(max_val);
98 
99  __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
100 
101  for(;number < quarterPoints; number++){
102  ret = _mm_loadu_ps(inputVectorPtr);
103  inputVectorPtr += 4;
104 
105  ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
106 
107  _mm_store_ps(outputFloatBuffer, ret);
108  *outputVectorPtr++ = (int8_t)(outputFloatBuffer[0]);
109  *outputVectorPtr++ = (int8_t)(outputFloatBuffer[1]);
110  *outputVectorPtr++ = (int8_t)(outputFloatBuffer[2]);
111  *outputVectorPtr++ = (int8_t)(outputFloatBuffer[3]);
112  }
113 
114  number = quarterPoints * 4;
115  for(; number < num_points; number++){
116  r = inputVector[number] * scalar;
117  if(r > max_val)
118  r = max_val;
119  else if(r < min_val)
120  r = min_val;
121  outputVector[number] = (int16_t)(r);
122  }
123 }
124 #endif /* LV_HAVE_SSE */
125 
126 #ifdef LV_HAVE_GENERIC
127  /*!
128  \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 8 bit integer value
129  \param inputVector The floating point input data buffer
130  \param outputVector The 8 bit output data buffer
131  \param scalar The value multiplied against each point in the input buffer
132  \param num_points The number of data values to be converted
133  \note Input buffer does NOT need to be properly aligned
134  */
135 static inline void volk_32f_s32f_convert_8i_u_generic(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
136  int8_t* outputVectorPtr = outputVector;
137  const float* inputVectorPtr = inputVector;
138  unsigned int number = 0;
139  float min_val = -128;
140  float max_val = 127;
141  float r;
142 
143  for(number = 0; number < num_points; number++){
144  r = *inputVectorPtr++ * scalar;
145  if(r > max_val)
146  r = max_val;
147  else if(r < min_val)
148  r = min_val;
149  *outputVectorPtr++ = (int16_t)(r);
150  }
151 }
152 #endif /* LV_HAVE_GENERIC */
153 
154 
155 
156 
157 #endif /* INCLUDED_volk_32f_s32f_convert_8i_u_H */