GNU Radio 3.6.2git-107-gbf8700a2 C++ API
|
00001 #ifndef INCLUDED_volk_32fc_s32fc_rotator_32fc_a_H 00002 #define INCLUDED_volk_32fc_s32fc_rotator_32fc_a_H 00003 00004 00005 #include <volk/volk_complex.h> 00006 #include <stdio.h> 00007 #include <stdlib.h> 00008 #define ROTATOR_RELOAD 512 00009 00010 00011 #ifdef LV_HAVE_GENERIC 00012 00013 /*! 00014 \brief rotate input vector at fixed rate per sample from initial phase offset 00015 \param outVector The vector where the results will be stored 00016 \param inVector Vector to be rotated 00017 \param phase_inc rotational velocity 00018 \param phase initial phase offset 00019 \param num_points The number of values in inVector to be rotated and stored into cVector 00020 */ 00021 00022 00023 static inline void volk_32fc_s32fc_x2_rotator_32fc_a_generic(lv_32fc_t* outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points){ 00024 *phase = lv_cmake(1.0, 0.0); 00025 unsigned int i = 0; 00026 int j = 0; 00027 for(i = 0; i < (unsigned int)(num_points/ROTATOR_RELOAD); ++i) { 00028 for(j = 0; j < ROTATOR_RELOAD; ++j) { 00029 *outVector++ = *inVector++ * (*phase); 00030 (*phase) *= phase_inc; 00031 } 00032 (*phase) /= abs((*phase)); 00033 } 00034 for(i = 0; i < num_points%ROTATOR_RELOAD; ++i) { 00035 *outVector++ = *inVector++ * (*phase); 00036 (*phase) *= phase_inc; 00037 } 00038 00039 } 00040 #endif /* LV_HAVE_GENERIC */ 00041 00042 00043 #ifdef LV_HAVE_SSE4_1 00044 #include <smmintrin.h> 00045 00046 static inline void volk_32fc_s32fc_x2_rotator_32fc_a_sse4_1(lv_32fc_t* outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points){ 00047 *phase = lv_cmake(1.0, 0.0); 00048 lv_32fc_t* cPtr = outVector; 00049 const lv_32fc_t* aPtr = inVector; 00050 lv_32fc_t incr = 1; 00051 lv_32fc_t phase_Ptr[2] = {(*phase), (*phase)}; 00052 00053 unsigned int i, j = 0; 00054 00055 for(i = 0; i < 2; ++i) { 00056 phase_Ptr[i] *= incr; 00057 incr *= (phase_inc); 00058 } 00059 00060 /*printf("%f, %f\n", lv_creal(phase_Ptr[0]), lv_cimag(phase_Ptr[0])); 00061 printf("%f, %f\n", lv_creal(phase_Ptr[1]), lv_cimag(phase_Ptr[1])); 00062 printf("%f, %f\n", lv_creal(phase_Ptr[2]), lv_cimag(phase_Ptr[2])); 00063 printf("%f, %f\n", lv_creal(phase_Ptr[3]), lv_cimag(phase_Ptr[3])); 00064 printf("incr: %f, %f\n", lv_creal(incr), lv_cimag(incr));*/ 00065 __m128 aVal, phase_Val, inc_Val, yl, yh, tmp1, tmp2, z, ylp, yhp, tmp1p, tmp2p; 00066 00067 phase_Val = _mm_loadu_ps((float*)phase_Ptr); 00068 inc_Val = _mm_set_ps(lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr)); 00069 00070 const unsigned int halfPoints = num_points / 2; 00071 00072 00073 for(i = 0; i < (unsigned int)(halfPoints/ROTATOR_RELOAD); i++) { 00074 for(j = 0; j < ROTATOR_RELOAD; ++j) { 00075 00076 aVal = _mm_load_ps((float*)aPtr); 00077 00078 yl = _mm_moveldup_ps(phase_Val); 00079 yh = _mm_movehdup_ps(phase_Val); 00080 ylp = _mm_moveldup_ps(inc_Val); 00081 yhp = _mm_movehdup_ps(inc_Val); 00082 00083 tmp1 = _mm_mul_ps(aVal, yl); 00084 tmp1p = _mm_mul_ps(phase_Val, ylp); 00085 00086 aVal = _mm_shuffle_ps(aVal, aVal, 0xB1); 00087 phase_Val = _mm_shuffle_ps(phase_Val, phase_Val, 0xB1); 00088 tmp2 = _mm_mul_ps(aVal, yh); 00089 tmp2p = _mm_mul_ps(phase_Val, yhp); 00090 00091 z = _mm_addsub_ps(tmp1, tmp2); 00092 phase_Val = _mm_addsub_ps(tmp1p, tmp2p); 00093 00094 _mm_store_ps((float*)cPtr, z); 00095 00096 aPtr += 2; 00097 cPtr += 2; 00098 } 00099 tmp1 = _mm_mul_ps(phase_Val, phase_Val); 00100 tmp2 = _mm_hadd_ps(tmp1, tmp1); 00101 tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8); 00102 phase_Val = _mm_div_ps(phase_Val, tmp1); 00103 } 00104 for(i = 0; i < halfPoints%ROTATOR_RELOAD; ++i) { 00105 aVal = _mm_load_ps((float*)aPtr); 00106 00107 yl = _mm_moveldup_ps(phase_Val); 00108 yh = _mm_movehdup_ps(phase_Val); 00109 ylp = _mm_moveldup_ps(inc_Val); 00110 yhp = _mm_movehdup_ps(inc_Val); 00111 00112 tmp1 = _mm_mul_ps(aVal, yl); 00113 00114 tmp1p = _mm_mul_ps(phase_Val, ylp); 00115 00116 aVal = _mm_shuffle_ps(aVal, aVal, 0xB1); 00117 phase_Val = _mm_shuffle_ps(phase_Val, phase_Val, 0xB1); 00118 tmp2 = _mm_mul_ps(aVal, yh); 00119 tmp2p = _mm_mul_ps(phase_Val, yhp); 00120 00121 z = _mm_addsub_ps(tmp1, tmp2); 00122 phase_Val = _mm_addsub_ps(tmp1p, tmp2p); 00123 00124 _mm_store_ps((float*)cPtr, z); 00125 00126 aPtr += 2; 00127 cPtr += 2; 00128 } 00129 00130 _mm_storeu_ps((float*)phase_Ptr, phase_Val); 00131 for(i = 0; i < num_points%2; ++i) { 00132 *cPtr++ = *aPtr++ * phase_Ptr[0]; 00133 phase_Ptr[0] *= (phase_inc); 00134 } 00135 00136 (*phase) = phase_Ptr[0]; 00137 00138 } 00139 00140 #endif /* LV_HAVE_SSE4_1 */ 00141 00142 00143 #ifdef LV_HAVE_AVX 00144 #include <immintrin.h> 00145 00146 /*! 00147 \brief rotate input vector at fixed rate per sample from initial phase offset 00148 \param outVector The vector where the results will be stored 00149 \param inVector Vector to be rotated 00150 \param phase_inc rotational velocity 00151 \param phase initial phase offset 00152 \param num_points The number of values in inVector to be rotated and stored into cVector 00153 */ 00154 00155 00156 00157 00158 static inline void volk_32fc_s32fc_x2_rotator_32fc_a_avx(lv_32fc_t* outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points){ 00159 *phase = lv_cmake(1.0, 0.0); 00160 lv_32fc_t* cPtr = outVector; 00161 const lv_32fc_t* aPtr = inVector; 00162 lv_32fc_t incr = 1; 00163 lv_32fc_t phase_Ptr[4] = {(*phase), (*phase), (*phase), (*phase)}; 00164 00165 unsigned int i, j = 0; 00166 00167 for(i = 0; i < 4; ++i) { 00168 phase_Ptr[i] *= incr; 00169 incr *= (phase_inc); 00170 } 00171 00172 /*printf("%f, %f\n", lv_creal(phase_Ptr[0]), lv_cimag(phase_Ptr[0])); 00173 printf("%f, %f\n", lv_creal(phase_Ptr[1]), lv_cimag(phase_Ptr[1])); 00174 printf("%f, %f\n", lv_creal(phase_Ptr[2]), lv_cimag(phase_Ptr[2])); 00175 printf("%f, %f\n", lv_creal(phase_Ptr[3]), lv_cimag(phase_Ptr[3])); 00176 printf("incr: %f, %f\n", lv_creal(incr), lv_cimag(incr));*/ 00177 __m256 aVal, phase_Val, inc_Val, yl, yh, tmp1, tmp2, z, ylp, yhp, tmp1p, tmp2p; 00178 00179 phase_Val = _mm256_loadu_ps((float*)phase_Ptr); 00180 inc_Val = _mm256_set_ps(lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr)); 00181 const unsigned int fourthPoints = num_points / 4; 00182 00183 00184 for(i = 0; i < (unsigned int)(fourthPoints/ROTATOR_RELOAD); i++) { 00185 for(j = 0; j < ROTATOR_RELOAD; ++j) { 00186 00187 aVal = _mm256_load_ps((float*)aPtr); 00188 00189 yl = _mm256_moveldup_ps(phase_Val); 00190 yh = _mm256_movehdup_ps(phase_Val); 00191 ylp = _mm256_moveldup_ps(inc_Val); 00192 yhp = _mm256_movehdup_ps(inc_Val); 00193 00194 tmp1 = _mm256_mul_ps(aVal, yl); 00195 tmp1p = _mm256_mul_ps(phase_Val, ylp); 00196 00197 aVal = _mm256_shuffle_ps(aVal, aVal, 0xB1); 00198 phase_Val = _mm256_shuffle_ps(phase_Val, phase_Val, 0xB1); 00199 tmp2 = _mm256_mul_ps(aVal, yh); 00200 tmp2p = _mm256_mul_ps(phase_Val, yhp); 00201 00202 z = _mm256_addsub_ps(tmp1, tmp2); 00203 phase_Val = _mm256_addsub_ps(tmp1p, tmp2p); 00204 00205 _mm256_store_ps((float*)cPtr, z); 00206 00207 aPtr += 4; 00208 cPtr += 4; 00209 } 00210 tmp1 = _mm256_mul_ps(phase_Val, phase_Val); 00211 tmp2 = _mm256_hadd_ps(tmp1, tmp1); 00212 tmp1 = _mm256_shuffle_ps(tmp2, tmp2, 0xD8); 00213 phase_Val = _mm256_div_ps(phase_Val, tmp1); 00214 } 00215 for(i = 0; i < fourthPoints%ROTATOR_RELOAD; ++i) { 00216 aVal = _mm256_load_ps((float*)aPtr); 00217 00218 yl = _mm256_moveldup_ps(phase_Val); 00219 yh = _mm256_movehdup_ps(phase_Val); 00220 ylp = _mm256_moveldup_ps(inc_Val); 00221 yhp = _mm256_movehdup_ps(inc_Val); 00222 00223 tmp1 = _mm256_mul_ps(aVal, yl); 00224 00225 tmp1p = _mm256_mul_ps(phase_Val, ylp); 00226 00227 aVal = _mm256_shuffle_ps(aVal, aVal, 0xB1); 00228 phase_Val = _mm256_shuffle_ps(phase_Val, phase_Val, 0xB1); 00229 tmp2 = _mm256_mul_ps(aVal, yh); 00230 tmp2p = _mm256_mul_ps(phase_Val, yhp); 00231 00232 z = _mm256_addsub_ps(tmp1, tmp2); 00233 phase_Val = _mm256_addsub_ps(tmp1p, tmp2p); 00234 00235 _mm256_store_ps((float*)cPtr, z); 00236 00237 aPtr += 4; 00238 cPtr += 4; 00239 } 00240 00241 _mm256_storeu_ps((float*)phase_Ptr, phase_Val); 00242 for(i = 0; i < num_points%4; ++i) { 00243 *cPtr++ = *aPtr++ * phase_Ptr[0]; 00244 phase_Ptr[0] *= (phase_inc); 00245 } 00246 00247 (*phase) = phase_Ptr[0]; 00248 00249 } 00250 00251 #endif /* LV_HAVE_AVX */ 00252 00253 00254 00255 00256 00257 00258 00259 00260 #endif /* INCLUDED_volk_32fc_s32fc_rotator_32fc_a_H */