GNU Radio 3.6.1-23 C++ API
volk_32fc_s32fc_x2_rotator_32fc_a.h
Go to the documentation of this file.
00001 #ifndef INCLUDED_volk_32fc_s32fc_rotator_32fc_a_H
00002 #define INCLUDED_volk_32fc_s32fc_rotator_32fc_a_H
00003 
00004 
00005 #include <volk/volk_complex.h>
00006 #include <stdio.h>
00007 #include <stdlib.h>
00008 #define ROTATOR_RELOAD 512
00009 
00010 
00011 #ifdef LV_HAVE_GENERIC
00012 
00013 /*!
00014   \brief rotate input vector at fixed rate per sample from initial phase offset
00015   \param outVector The vector where the results will be stored
00016   \param inVector Vector to be rotated
00017   \param phase_inc rotational velocity
00018   \param phase initial phase offset
00019   \param num_points The number of values in inVector to be rotated and stored into cVector
00020 */
00021 
00022 
00023 static inline void volk_32fc_s32fc_x2_rotator_32fc_a_generic(lv_32fc_t* outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points){    
00024     unsigned int i = 0; 
00025     int j = 0;    
00026     for(i = 0; i < (unsigned int)(num_points/ROTATOR_RELOAD); ++i) {
00027         for(j = 0; j < ROTATOR_RELOAD; ++j) {
00028             *outVector++ = *inVector++ * (*phase);
00029             (*phase) *= phase_inc;
00030         }
00031         (*phase) /= abs((*phase));
00032     }
00033     for(i = 0; i < num_points%ROTATOR_RELOAD; ++i) {
00034         *outVector++ = *inVector++ * (*phase);
00035         (*phase) *= phase_inc;
00036     }
00037     
00038 }
00039 #endif /* LV_HAVE_GENERIC */
00040 
00041 
00042 #ifdef LV_HAVE_SSE4_1
00043 #include <smmintrin.h>
00044 
00045 static inline void volk_32fc_s32fc_x2_rotator_32fc_a_sse4_1(lv_32fc_t* outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points){
00046     lv_32fc_t* cPtr = outVector;
00047     const lv_32fc_t* aPtr = inVector;
00048     lv_32fc_t incr = 1;
00049     lv_32fc_t phase_Ptr[2] = {(*phase), (*phase)};
00050     
00051     unsigned int i, j = 0;
00052 
00053     for(i = 0; i < 2; ++i) {
00054         phase_Ptr[i] *= incr;
00055         incr *= (phase_inc);
00056     }
00057 
00058     /*printf("%f, %f\n", lv_creal(phase_Ptr[0]), lv_cimag(phase_Ptr[0]));
00059     printf("%f, %f\n", lv_creal(phase_Ptr[1]), lv_cimag(phase_Ptr[1]));
00060     printf("%f, %f\n", lv_creal(phase_Ptr[2]), lv_cimag(phase_Ptr[2]));
00061     printf("%f, %f\n", lv_creal(phase_Ptr[3]), lv_cimag(phase_Ptr[3]));
00062     printf("incr: %f, %f\n", lv_creal(incr), lv_cimag(incr));*/
00063     __m128 aVal, phase_Val, inc_Val, yl, yh, tmp1, tmp2, z, ylp, yhp, tmp1p, tmp2p;
00064     
00065     phase_Val = _mm_loadu_ps((float*)phase_Ptr);
00066     inc_Val = _mm_set_ps(lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr));
00067     
00068     const unsigned int halfPoints = num_points / 2;
00069 
00070     
00071     for(i = 0; i < (unsigned int)(halfPoints/ROTATOR_RELOAD); i++) {
00072         for(j = 0; j < ROTATOR_RELOAD; ++j) {
00073             
00074             aVal = _mm_load_ps((float*)aPtr);
00075             
00076             yl = _mm_moveldup_ps(phase_Val);
00077             yh = _mm_movehdup_ps(phase_Val);
00078             ylp = _mm_moveldup_ps(inc_Val);
00079             yhp = _mm_movehdup_ps(inc_Val);
00080             
00081             tmp1 = _mm_mul_ps(aVal, yl);
00082             tmp1p = _mm_mul_ps(phase_Val, ylp);
00083             
00084             aVal = _mm_shuffle_ps(aVal, aVal, 0xB1);
00085             phase_Val = _mm_shuffle_ps(phase_Val, phase_Val, 0xB1);
00086             tmp2 = _mm_mul_ps(aVal, yh);
00087             tmp2p = _mm_mul_ps(phase_Val, yhp);
00088             
00089             z = _mm_addsub_ps(tmp1, tmp2);
00090             phase_Val = _mm_addsub_ps(tmp1p, tmp2p);
00091             
00092             _mm_store_ps((float*)cPtr, z);
00093             
00094             aPtr += 2;
00095             cPtr += 2;
00096         }
00097         tmp1 = _mm_mul_ps(phase_Val, phase_Val);
00098         tmp2 = _mm_hadd_ps(tmp1, tmp1);
00099         tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8);
00100         phase_Val = _mm_div_ps(phase_Val, tmp1);
00101     }
00102     for(i = 0; i < halfPoints%ROTATOR_RELOAD; ++i) {
00103         aVal = _mm_load_ps((float*)aPtr);
00104         
00105         yl = _mm_moveldup_ps(phase_Val);
00106         yh = _mm_movehdup_ps(phase_Val);
00107         ylp = _mm_moveldup_ps(inc_Val);
00108         yhp = _mm_movehdup_ps(inc_Val);
00109         
00110         tmp1 = _mm_mul_ps(aVal, yl);
00111 
00112         tmp1p = _mm_mul_ps(phase_Val, ylp);
00113         
00114         aVal = _mm_shuffle_ps(aVal, aVal, 0xB1);
00115         phase_Val = _mm_shuffle_ps(phase_Val, phase_Val, 0xB1);
00116         tmp2 = _mm_mul_ps(aVal, yh);
00117         tmp2p = _mm_mul_ps(phase_Val, yhp);
00118         
00119         z = _mm_addsub_ps(tmp1, tmp2);
00120         phase_Val = _mm_addsub_ps(tmp1p, tmp2p);
00121         
00122         _mm_store_ps((float*)cPtr, z);
00123         
00124         aPtr += 2;
00125         cPtr += 2;
00126     }
00127 
00128     _mm_storeu_ps((float*)phase_Ptr, phase_Val);
00129     for(i = 0; i < num_points%2; ++i) {
00130         *cPtr++ = *aPtr++ * phase_Ptr[0];
00131         phase_Ptr[0] *= (phase_inc);
00132     }
00133      
00134     (*phase) = phase_Ptr[0];
00135 
00136 }
00137     
00138 #endif /* LV_HAVE_SSE4_1 */
00139 
00140 
00141 #ifdef LV_HAVE_AVX
00142 #include <immintrin.h>
00143 
00144 /*!
00145   \brief rotate input vector at fixed rate per sample from initial phase offset
00146   \param outVector The vector where the results will be stored
00147   \param inVector Vector to be rotated
00148   \param phase_inc rotational velocity
00149   \param phase initial phase offset
00150   \param num_points The number of values in inVector to be rotated and stored into cVector
00151 */
00152 
00153 
00154 
00155 
00156 static inline void volk_32fc_s32fc_x2_rotator_32fc_a_avx(lv_32fc_t* outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points){
00157     lv_32fc_t* cPtr = outVector;
00158     const lv_32fc_t* aPtr = inVector;
00159     lv_32fc_t incr = 1;
00160     lv_32fc_t phase_Ptr[4] = {(*phase), (*phase), (*phase), (*phase)};
00161     
00162     unsigned int i, j = 0;
00163 
00164     for(i = 0; i < 4; ++i) {
00165         phase_Ptr[i] *= incr;
00166         incr *= (phase_inc);
00167     }
00168 
00169     /*printf("%f, %f\n", lv_creal(phase_Ptr[0]), lv_cimag(phase_Ptr[0]));
00170     printf("%f, %f\n", lv_creal(phase_Ptr[1]), lv_cimag(phase_Ptr[1]));
00171     printf("%f, %f\n", lv_creal(phase_Ptr[2]), lv_cimag(phase_Ptr[2]));
00172     printf("%f, %f\n", lv_creal(phase_Ptr[3]), lv_cimag(phase_Ptr[3]));
00173     printf("incr: %f, %f\n", lv_creal(incr), lv_cimag(incr));*/
00174     __m256 aVal, phase_Val, inc_Val, yl, yh, tmp1, tmp2, z, ylp, yhp, tmp1p, tmp2p, negated, zeros;
00175     
00176     phase_Val = _mm256_loadu_ps((float*)phase_Ptr);
00177     inc_Val = _mm256_set_ps(lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr));
00178     zeros = _mm256_set1_ps(0.0);
00179     negated = _mm256_set1_ps(-1.0);
00180     const unsigned int fourthPoints = num_points / 4;
00181 
00182     
00183     for(i = 0; i < (unsigned int)(fourthPoints/ROTATOR_RELOAD); i++) {
00184         for(j = 0; j < ROTATOR_RELOAD; ++j) {
00185             
00186             aVal = _mm256_load_ps((float*)aPtr);
00187             
00188             yl = _mm256_moveldup_ps(phase_Val);
00189             yh = _mm256_movehdup_ps(phase_Val);
00190             ylp = _mm256_moveldup_ps(inc_Val);
00191             yhp = _mm256_movehdup_ps(inc_Val);
00192             
00193             tmp1 = _mm256_mul_ps(aVal, yl);
00194             tmp1p = _mm256_mul_ps(phase_Val, ylp);
00195             
00196             aVal = _mm256_shuffle_ps(aVal, aVal, 0xB1);
00197             phase_Val = _mm256_shuffle_ps(phase_Val, phase_Val, 0xB1);
00198             tmp2 = _mm256_mul_ps(aVal, yh);
00199             tmp2p = _mm256_mul_ps(phase_Val, yhp);
00200             
00201             z = _mm256_addsub_ps(tmp1, tmp2);
00202             phase_Val = _mm256_addsub_ps(tmp1p, tmp2p);
00203             
00204             _mm256_store_ps((float*)cPtr, z);
00205             
00206             aPtr += 4;
00207             cPtr += 4;
00208         }
00209         tmp1 = _mm256_mul_ps(phase_Val, phase_Val);
00210         tmp2 = _mm256_hadd_ps(tmp1, tmp1);
00211         tmp1 = _mm256_shuffle_ps(tmp2, tmp2, 0xD8);
00212         phase_Val = _mm256_div_ps(phase_Val, tmp1);
00213     }
00214     for(i = 0; i < fourthPoints%ROTATOR_RELOAD; ++i) {
00215         aVal = _mm256_load_ps((float*)aPtr);
00216         
00217         yl = _mm256_moveldup_ps(phase_Val);
00218         yh = _mm256_movehdup_ps(phase_Val);
00219         ylp = _mm256_moveldup_ps(inc_Val);
00220         yhp = _mm256_movehdup_ps(inc_Val);
00221         
00222         tmp1 = _mm256_mul_ps(aVal, yl);
00223 
00224         tmp1p = _mm256_mul_ps(phase_Val, ylp);
00225         
00226         aVal = _mm256_shuffle_ps(aVal, aVal, 0xB1);
00227         phase_Val = _mm256_shuffle_ps(phase_Val, phase_Val, 0xB1);
00228         tmp2 = _mm256_mul_ps(aVal, yh);
00229         tmp2p = _mm256_mul_ps(phase_Val, yhp);
00230         
00231         z = _mm256_addsub_ps(tmp1, tmp2);
00232         phase_Val = _mm256_addsub_ps(tmp1p, tmp2p);
00233         
00234         _mm256_store_ps((float*)cPtr, z);
00235         
00236         aPtr += 4;
00237         cPtr += 4;
00238     }
00239 
00240     _mm256_storeu_ps((float*)phase_Ptr, phase_Val);
00241     for(i = 0; i < num_points%4; ++i) {
00242         *cPtr++ = *aPtr++ * phase_Ptr[0];
00243         phase_Ptr[0] *= (phase_inc);
00244     }
00245      
00246     (*phase) = phase_Ptr[0];
00247 
00248 }
00249     
00250 #endif /* LV_HAVE_AVX */
00251 
00252 
00253 
00254 
00255 
00256 
00257 
00258 
00259 #endif /* INCLUDED_volk_32fc_s32fc_rotator_32fc_a_H */