GNU Radio 3.6.3.1 C++ API
volk_32fc_s32fc_x2_rotator_32fc_a.h
Go to the documentation of this file.
1 #ifndef INCLUDED_volk_32fc_s32fc_rotator_32fc_a_H
2 #define INCLUDED_volk_32fc_s32fc_rotator_32fc_a_H
3 
4 
5 #include <volk/volk_complex.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #define ROTATOR_RELOAD 512
9 
10 
11 #ifdef LV_HAVE_GENERIC
12 
13 /*!
14  \brief rotate input vector at fixed rate per sample from initial phase offset
15  \param outVector The vector where the results will be stored
16  \param inVector Vector to be rotated
17  \param phase_inc rotational velocity
18  \param phase initial phase offset
19  \param num_points The number of values in inVector to be rotated and stored into cVector
20 */
21 
22 
23 static inline void volk_32fc_s32fc_x2_rotator_32fc_a_generic(lv_32fc_t* outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points){
24  unsigned int i = 0;
25  int j = 0;
26  for(i = 0; i < (unsigned int)(num_points/ROTATOR_RELOAD); ++i) {
27  for(j = 0; j < ROTATOR_RELOAD; ++j) {
28  *outVector++ = *inVector++ * (*phase);
29  (*phase) *= phase_inc;
30  }
31  (*phase) /= abs((*phase));
32  }
33  for(i = 0; i < num_points%ROTATOR_RELOAD; ++i) {
34  *outVector++ = *inVector++ * (*phase);
35  (*phase) *= phase_inc;
36  }
37 
38 }
39 #endif /* LV_HAVE_GENERIC */
40 
41 
42 #ifdef LV_HAVE_SSE4_1
43 #include <smmintrin.h>
44 
45 static inline void volk_32fc_s32fc_x2_rotator_32fc_a_sse4_1(lv_32fc_t* outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points){
46  lv_32fc_t* cPtr = outVector;
47  const lv_32fc_t* aPtr = inVector;
48  lv_32fc_t incr = 1;
49  lv_32fc_t phase_Ptr[2] = {(*phase), (*phase)};
50 
51  unsigned int i, j = 0;
52 
53  for(i = 0; i < 2; ++i) {
54  phase_Ptr[i] *= incr;
55  incr *= (phase_inc);
56  }
57 
58  /*printf("%f, %f\n", lv_creal(phase_Ptr[0]), lv_cimag(phase_Ptr[0]));
59  printf("%f, %f\n", lv_creal(phase_Ptr[1]), lv_cimag(phase_Ptr[1]));
60  printf("%f, %f\n", lv_creal(phase_Ptr[2]), lv_cimag(phase_Ptr[2]));
61  printf("%f, %f\n", lv_creal(phase_Ptr[3]), lv_cimag(phase_Ptr[3]));
62  printf("incr: %f, %f\n", lv_creal(incr), lv_cimag(incr));*/
63  __m128 aVal, phase_Val, inc_Val, yl, yh, tmp1, tmp2, z, ylp, yhp, tmp1p, tmp2p;
64 
65  phase_Val = _mm_loadu_ps((float*)phase_Ptr);
66  inc_Val = _mm_set_ps(lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr));
67 
68  const unsigned int halfPoints = num_points / 2;
69 
70 
71  for(i = 0; i < (unsigned int)(halfPoints/ROTATOR_RELOAD); i++) {
72  for(j = 0; j < ROTATOR_RELOAD; ++j) {
73 
74  aVal = _mm_load_ps((float*)aPtr);
75 
76  yl = _mm_moveldup_ps(phase_Val);
77  yh = _mm_movehdup_ps(phase_Val);
78  ylp = _mm_moveldup_ps(inc_Val);
79  yhp = _mm_movehdup_ps(inc_Val);
80 
81  tmp1 = _mm_mul_ps(aVal, yl);
82  tmp1p = _mm_mul_ps(phase_Val, ylp);
83 
84  aVal = _mm_shuffle_ps(aVal, aVal, 0xB1);
85  phase_Val = _mm_shuffle_ps(phase_Val, phase_Val, 0xB1);
86  tmp2 = _mm_mul_ps(aVal, yh);
87  tmp2p = _mm_mul_ps(phase_Val, yhp);
88 
89  z = _mm_addsub_ps(tmp1, tmp2);
90  phase_Val = _mm_addsub_ps(tmp1p, tmp2p);
91 
92  _mm_store_ps((float*)cPtr, z);
93 
94  aPtr += 2;
95  cPtr += 2;
96  }
97  tmp1 = _mm_mul_ps(phase_Val, phase_Val);
98  tmp2 = _mm_hadd_ps(tmp1, tmp1);
99  tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8);
100  phase_Val = _mm_div_ps(phase_Val, tmp1);
101  }
102  for(i = 0; i < halfPoints%ROTATOR_RELOAD; ++i) {
103  aVal = _mm_load_ps((float*)aPtr);
104 
105  yl = _mm_moveldup_ps(phase_Val);
106  yh = _mm_movehdup_ps(phase_Val);
107  ylp = _mm_moveldup_ps(inc_Val);
108  yhp = _mm_movehdup_ps(inc_Val);
109 
110  tmp1 = _mm_mul_ps(aVal, yl);
111 
112  tmp1p = _mm_mul_ps(phase_Val, ylp);
113 
114  aVal = _mm_shuffle_ps(aVal, aVal, 0xB1);
115  phase_Val = _mm_shuffle_ps(phase_Val, phase_Val, 0xB1);
116  tmp2 = _mm_mul_ps(aVal, yh);
117  tmp2p = _mm_mul_ps(phase_Val, yhp);
118 
119  z = _mm_addsub_ps(tmp1, tmp2);
120  phase_Val = _mm_addsub_ps(tmp1p, tmp2p);
121 
122  _mm_store_ps((float*)cPtr, z);
123 
124  aPtr += 2;
125  cPtr += 2;
126  }
127 
128  _mm_storeu_ps((float*)phase_Ptr, phase_Val);
129  for(i = 0; i < num_points%2; ++i) {
130  *cPtr++ = *aPtr++ * phase_Ptr[0];
131  phase_Ptr[0] *= (phase_inc);
132  }
133 
134  (*phase) = phase_Ptr[0];
135 
136 }
137 
138 #endif /* LV_HAVE_SSE4_1 */
139 
140 
141 #ifdef LV_HAVE_AVX
142 #include <immintrin.h>
143 
144 /*!
145  \brief rotate input vector at fixed rate per sample from initial phase offset
146  \param outVector The vector where the results will be stored
147  \param inVector Vector to be rotated
148  \param phase_inc rotational velocity
149  \param phase initial phase offset
150  \param num_points The number of values in inVector to be rotated and stored into cVector
151 */
152 
153 
154 
155 
156 static inline void volk_32fc_s32fc_x2_rotator_32fc_a_avx(lv_32fc_t* outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points){
157  lv_32fc_t* cPtr = outVector;
158  const lv_32fc_t* aPtr = inVector;
159  lv_32fc_t incr = 1;
160  lv_32fc_t phase_Ptr[4] = {(*phase), (*phase), (*phase), (*phase)};
161 
162  unsigned int i, j = 0;
163 
164  for(i = 0; i < 4; ++i) {
165  phase_Ptr[i] *= incr;
166  incr *= (phase_inc);
167  }
168 
169  /*printf("%f, %f\n", lv_creal(phase_Ptr[0]), lv_cimag(phase_Ptr[0]));
170  printf("%f, %f\n", lv_creal(phase_Ptr[1]), lv_cimag(phase_Ptr[1]));
171  printf("%f, %f\n", lv_creal(phase_Ptr[2]), lv_cimag(phase_Ptr[2]));
172  printf("%f, %f\n", lv_creal(phase_Ptr[3]), lv_cimag(phase_Ptr[3]));
173  printf("incr: %f, %f\n", lv_creal(incr), lv_cimag(incr));*/
174  __m256 aVal, phase_Val, inc_Val, yl, yh, tmp1, tmp2, z, ylp, yhp, tmp1p, tmp2p;
175 
176  phase_Val = _mm256_loadu_ps((float*)phase_Ptr);
177  inc_Val = _mm256_set_ps(lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr));
178  const unsigned int fourthPoints = num_points / 4;
179 
180 
181  for(i = 0; i < (unsigned int)(fourthPoints/ROTATOR_RELOAD); i++) {
182  for(j = 0; j < ROTATOR_RELOAD; ++j) {
183 
184  aVal = _mm256_load_ps((float*)aPtr);
185 
186  yl = _mm256_moveldup_ps(phase_Val);
187  yh = _mm256_movehdup_ps(phase_Val);
188  ylp = _mm256_moveldup_ps(inc_Val);
189  yhp = _mm256_movehdup_ps(inc_Val);
190 
191  tmp1 = _mm256_mul_ps(aVal, yl);
192  tmp1p = _mm256_mul_ps(phase_Val, ylp);
193 
194  aVal = _mm256_shuffle_ps(aVal, aVal, 0xB1);
195  phase_Val = _mm256_shuffle_ps(phase_Val, phase_Val, 0xB1);
196  tmp2 = _mm256_mul_ps(aVal, yh);
197  tmp2p = _mm256_mul_ps(phase_Val, yhp);
198 
199  z = _mm256_addsub_ps(tmp1, tmp2);
200  phase_Val = _mm256_addsub_ps(tmp1p, tmp2p);
201 
202  _mm256_store_ps((float*)cPtr, z);
203 
204  aPtr += 4;
205  cPtr += 4;
206  }
207  tmp1 = _mm256_mul_ps(phase_Val, phase_Val);
208  tmp2 = _mm256_hadd_ps(tmp1, tmp1);
209  tmp1 = _mm256_shuffle_ps(tmp2, tmp2, 0xD8);
210  phase_Val = _mm256_div_ps(phase_Val, tmp1);
211  }
212  for(i = 0; i < fourthPoints%ROTATOR_RELOAD; ++i) {
213  aVal = _mm256_load_ps((float*)aPtr);
214 
215  yl = _mm256_moveldup_ps(phase_Val);
216  yh = _mm256_movehdup_ps(phase_Val);
217  ylp = _mm256_moveldup_ps(inc_Val);
218  yhp = _mm256_movehdup_ps(inc_Val);
219 
220  tmp1 = _mm256_mul_ps(aVal, yl);
221 
222  tmp1p = _mm256_mul_ps(phase_Val, ylp);
223 
224  aVal = _mm256_shuffle_ps(aVal, aVal, 0xB1);
225  phase_Val = _mm256_shuffle_ps(phase_Val, phase_Val, 0xB1);
226  tmp2 = _mm256_mul_ps(aVal, yh);
227  tmp2p = _mm256_mul_ps(phase_Val, yhp);
228 
229  z = _mm256_addsub_ps(tmp1, tmp2);
230  phase_Val = _mm256_addsub_ps(tmp1p, tmp2p);
231 
232  _mm256_store_ps((float*)cPtr, z);
233 
234  aPtr += 4;
235  cPtr += 4;
236  }
237 
238  _mm256_storeu_ps((float*)phase_Ptr, phase_Val);
239  for(i = 0; i < num_points%4; ++i) {
240  *cPtr++ = *aPtr++ * phase_Ptr[0];
241  phase_Ptr[0] *= (phase_inc);
242  }
243 
244  (*phase) = phase_Ptr[0];
245 
246 }
247 
248 #endif /* LV_HAVE_AVX */
249 
250 
251 
252 
253 
254 
255 
256 
257 #endif /* INCLUDED_volk_32fc_s32fc_rotator_32fc_a_H */