GNU Radio 3.6.1-14 C++ API
|
00001 #ifndef INCLUDED_volk_32fc_s32fc_rotator_32fc_a_H 00002 #define INCLUDED_volk_32fc_s32fc_rotator_32fc_a_H 00003 00004 00005 #include <volk/volk_complex.h> 00006 #include <stdio.h> 00007 #include <stdlib.h> 00008 #define ROTATOR_RELOAD 512 00009 00010 00011 #ifdef LV_HAVE_GENERIC 00012 00013 /*! 00014 \brief rotate input vector at fixed rate per sample from initial phase offset 00015 \param outVector The vector where the results will be stored 00016 \param inVector Vector to be rotated 00017 \param phase_inc rotational velocity 00018 \param phase initial phase offset 00019 \param num_points The number of values in inVector to be rotated and stored into cVector 00020 */ 00021 00022 00023 static inline void volk_32fc_s32fc_x2_rotator_32fc_a_generic(lv_32fc_t* outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points){ 00024 *phase = lv_cmake(1.0, 0.0); 00025 unsigned int i = 0; 00026 int j = 0; 00027 for(i = 0; i < (unsigned int)(num_points/ROTATOR_RELOAD); ++i) { 00028 for(j = 0; j < ROTATOR_RELOAD; ++j) { 00029 *outVector++ = *inVector++ * (*phase); 00030 (*phase) *= phase_inc; 00031 } 00032 (*phase) /= abs((*phase)); 00033 } 00034 for(i = 0; i < num_points%ROTATOR_RELOAD; ++i) { 00035 *outVector++ = *inVector++ * (*phase); 00036 (*phase) *= phase_inc; 00037 } 00038 00039 } 00040 #endif /* LV_HAVE_GENERIC */ 00041 00042 00043 #ifdef LV_HAVE_SSE4_1 00044 #include <smmintrin.h> 00045 00046 static inline void volk_32fc_s32fc_x2_rotator_32fc_a_sse4_1(lv_32fc_t* outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points){ 00047 *phase = lv_cmake(1.0, 0.0); 00048 lv_32fc_t* cPtr = outVector; 00049 const lv_32fc_t* aPtr = inVector; 00050 lv_32fc_t incr = 1; 00051 lv_32fc_t phase_Ptr[2] = {(*phase), (*phase)}; 00052 00053 unsigned int i, j = 0; 00054 00055 for(i = 0; i < 2; ++i) { 00056 phase_Ptr[i] *= incr; 00057 incr *= (phase_inc); 00058 } 00059 00060 /*printf("%f, %f\n", lv_creal(phase_Ptr[0]), lv_cimag(phase_Ptr[0])); 00061 printf("%f, %f\n", lv_creal(phase_Ptr[1]), lv_cimag(phase_Ptr[1])); 00062 printf("%f, %f\n", lv_creal(phase_Ptr[2]), lv_cimag(phase_Ptr[2])); 00063 printf("%f, %f\n", lv_creal(phase_Ptr[3]), lv_cimag(phase_Ptr[3])); 00064 printf("incr: %f, %f\n", lv_creal(incr), lv_cimag(incr));*/ 00065 __m128 aVal, phase_Val, inc_Val, yl, yh, tmp1, tmp2, z, ylp, yhp, tmp1p, tmp2p; 00066 00067 phase_Val = _mm_loadu_ps((float*)phase_Ptr); 00068 inc_Val = _mm_set_ps(lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr)); 00069 00070 const unsigned int halfPoints = num_points / 2; 00071 00072 00073 for(i = 0; i < (unsigned int)(halfPoints/ROTATOR_RELOAD); i++) { 00074 for(j = 0; j < ROTATOR_RELOAD; ++j) { 00075 00076 aVal = _mm_load_ps((float*)aPtr); 00077 00078 yl = _mm_moveldup_ps(phase_Val); 00079 yh = _mm_movehdup_ps(phase_Val); 00080 ylp = _mm_moveldup_ps(inc_Val); 00081 yhp = _mm_movehdup_ps(inc_Val); 00082 00083 tmp1 = _mm_mul_ps(aVal, yl); 00084 tmp1p = _mm_mul_ps(phase_Val, ylp); 00085 00086 aVal = _mm_shuffle_ps(aVal, aVal, 0xB1); 00087 phase_Val = _mm_shuffle_ps(phase_Val, phase_Val, 0xB1); 00088 tmp2 = _mm_mul_ps(aVal, yh); 00089 tmp2p = _mm_mul_ps(phase_Val, yhp); 00090 00091 z = _mm_addsub_ps(tmp1, tmp2); 00092 phase_Val = _mm_addsub_ps(tmp1p, tmp2p); 00093 00094 _mm_store_ps((float*)cPtr, z); 00095 00096 aPtr += 2; 00097 cPtr += 2; 00098 } 00099 tmp1 = _mm_mul_ps(phase_Val, phase_Val); 00100 tmp2 = _mm_hadd_ps(tmp1, tmp1); 00101 tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8); 00102 phase_Val = _mm_div_ps(phase_Val, tmp1); 00103 } 00104 for(i = 0; i < halfPoints%ROTATOR_RELOAD; ++i) { 00105 aVal = _mm_load_ps((float*)aPtr); 00106 00107 yl = _mm_moveldup_ps(phase_Val); 00108 yh = _mm_movehdup_ps(phase_Val); 00109 ylp = _mm_moveldup_ps(inc_Val); 00110 yhp = _mm_movehdup_ps(inc_Val); 00111 00112 tmp1 = _mm_mul_ps(aVal, yl); 00113 00114 tmp1p = _mm_mul_ps(phase_Val, ylp); 00115 00116 aVal = _mm_shuffle_ps(aVal, aVal, 0xB1); 00117 phase_Val = _mm_shuffle_ps(phase_Val, phase_Val, 0xB1); 00118 tmp2 = _mm_mul_ps(aVal, yh); 00119 tmp2p = _mm_mul_ps(phase_Val, yhp); 00120 00121 z = _mm_addsub_ps(tmp1, tmp2); 00122 phase_Val = _mm_addsub_ps(tmp1p, tmp2p); 00123 00124 _mm_store_ps((float*)cPtr, z); 00125 00126 aPtr += 2; 00127 cPtr += 2; 00128 } 00129 00130 _mm_storeu_ps((float*)phase_Ptr, phase_Val); 00131 for(i = 0; i < num_points%2; ++i) { 00132 *cPtr++ = *aPtr++ * phase_Ptr[0]; 00133 phase_Ptr[0] *= (phase_inc); 00134 } 00135 00136 (*phase) = phase_Ptr[0]; 00137 00138 } 00139 00140 #endif /* LV_HAVE_SSE4_1 */ 00141 00142 00143 #ifdef LV_HAVE_AVX 00144 #include <immintrin.h> 00145 00146 /*! 00147 \brief rotate input vector at fixed rate per sample from initial phase offset 00148 \param outVector The vector where the results will be stored 00149 \param inVector Vector to be rotated 00150 \param phase_inc rotational velocity 00151 \param phase initial phase offset 00152 \param num_points The number of values in inVector to be rotated and stored into cVector 00153 */ 00154 00155 00156 00157 00158 static inline void volk_32fc_s32fc_x2_rotator_32fc_a_avx(lv_32fc_t* outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points){ 00159 *phase = lv_cmake(1.0, 0.0); 00160 lv_32fc_t* cPtr = outVector; 00161 const lv_32fc_t* aPtr = inVector; 00162 lv_32fc_t incr = 1; 00163 lv_32fc_t phase_Ptr[4] = {(*phase), (*phase), (*phase), (*phase)}; 00164 00165 unsigned int i, j = 0; 00166 00167 for(i = 0; i < 4; ++i) { 00168 phase_Ptr[i] *= incr; 00169 incr *= (phase_inc); 00170 } 00171 00172 /*printf("%f, %f\n", lv_creal(phase_Ptr[0]), lv_cimag(phase_Ptr[0])); 00173 printf("%f, %f\n", lv_creal(phase_Ptr[1]), lv_cimag(phase_Ptr[1])); 00174 printf("%f, %f\n", lv_creal(phase_Ptr[2]), lv_cimag(phase_Ptr[2])); 00175 printf("%f, %f\n", lv_creal(phase_Ptr[3]), lv_cimag(phase_Ptr[3])); 00176 printf("incr: %f, %f\n", lv_creal(incr), lv_cimag(incr));*/ 00177 __m256 aVal, phase_Val, inc_Val, yl, yh, tmp1, tmp2, z, ylp, yhp, tmp1p, tmp2p, negated, zeros; 00178 00179 phase_Val = _mm256_loadu_ps((float*)phase_Ptr); 00180 inc_Val = _mm256_set_ps(lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr)); 00181 zeros = _mm256_set1_ps(0.0); 00182 negated = _mm256_set1_ps(-1.0); 00183 const unsigned int fourthPoints = num_points / 4; 00184 00185 00186 for(i = 0; i < (unsigned int)(fourthPoints/ROTATOR_RELOAD); i++) { 00187 for(j = 0; j < ROTATOR_RELOAD; ++j) { 00188 00189 aVal = _mm256_load_ps((float*)aPtr); 00190 00191 yl = _mm256_moveldup_ps(phase_Val); 00192 yh = _mm256_movehdup_ps(phase_Val); 00193 ylp = _mm256_moveldup_ps(inc_Val); 00194 yhp = _mm256_movehdup_ps(inc_Val); 00195 00196 tmp1 = _mm256_mul_ps(aVal, yl); 00197 tmp1p = _mm256_mul_ps(phase_Val, ylp); 00198 00199 aVal = _mm256_shuffle_ps(aVal, aVal, 0xB1); 00200 phase_Val = _mm256_shuffle_ps(phase_Val, phase_Val, 0xB1); 00201 tmp2 = _mm256_mul_ps(aVal, yh); 00202 tmp2p = _mm256_mul_ps(phase_Val, yhp); 00203 00204 z = _mm256_addsub_ps(tmp1, tmp2); 00205 phase_Val = _mm256_addsub_ps(tmp1p, tmp2p); 00206 00207 _mm256_store_ps((float*)cPtr, z); 00208 00209 aPtr += 4; 00210 cPtr += 4; 00211 } 00212 tmp1 = _mm256_mul_ps(phase_Val, phase_Val); 00213 tmp2 = _mm256_hadd_ps(tmp1, tmp1); 00214 tmp1 = _mm256_shuffle_ps(tmp2, tmp2, 0xD8); 00215 phase_Val = _mm256_div_ps(phase_Val, tmp1); 00216 } 00217 for(i = 0; i < fourthPoints%ROTATOR_RELOAD; ++i) { 00218 aVal = _mm256_load_ps((float*)aPtr); 00219 00220 yl = _mm256_moveldup_ps(phase_Val); 00221 yh = _mm256_movehdup_ps(phase_Val); 00222 ylp = _mm256_moveldup_ps(inc_Val); 00223 yhp = _mm256_movehdup_ps(inc_Val); 00224 00225 tmp1 = _mm256_mul_ps(aVal, yl); 00226 00227 tmp1p = _mm256_mul_ps(phase_Val, ylp); 00228 00229 aVal = _mm256_shuffle_ps(aVal, aVal, 0xB1); 00230 phase_Val = _mm256_shuffle_ps(phase_Val, phase_Val, 0xB1); 00231 tmp2 = _mm256_mul_ps(aVal, yh); 00232 tmp2p = _mm256_mul_ps(phase_Val, yhp); 00233 00234 z = _mm256_addsub_ps(tmp1, tmp2); 00235 phase_Val = _mm256_addsub_ps(tmp1p, tmp2p); 00236 00237 _mm256_store_ps((float*)cPtr, z); 00238 00239 aPtr += 4; 00240 cPtr += 4; 00241 } 00242 00243 _mm256_storeu_ps((float*)phase_Ptr, phase_Val); 00244 for(i = 0; i < num_points%4; ++i) { 00245 *cPtr++ = *aPtr++ * phase_Ptr[0]; 00246 phase_Ptr[0] *= (phase_inc); 00247 } 00248 00249 (*phase) = phase_Ptr[0]; 00250 00251 } 00252 00253 #endif /* LV_HAVE_AVX */ 00254 00255 00256 00257 00258 00259 00260 00261 00262 #endif /* INCLUDED_volk_32fc_s32fc_rotator_32fc_a_H */