GNU Radio 3.6.2git-107-gbf8700a2 C++ API
volk_32fc_s32fc_x2_rotator_32fc_a.h
Go to the documentation of this file.
00001 #ifndef INCLUDED_volk_32fc_s32fc_rotator_32fc_a_H
00002 #define INCLUDED_volk_32fc_s32fc_rotator_32fc_a_H
00003 
00004 
00005 #include <volk/volk_complex.h>
00006 #include <stdio.h>
00007 #include <stdlib.h>
00008 #define ROTATOR_RELOAD 512
00009 
00010 
00011 #ifdef LV_HAVE_GENERIC
00012 
00013 /*!
00014   \brief rotate input vector at fixed rate per sample from initial phase offset
00015   \param outVector The vector where the results will be stored
00016   \param inVector Vector to be rotated
00017   \param phase_inc rotational velocity
00018   \param phase initial phase offset
00019   \param num_points The number of values in inVector to be rotated and stored into cVector
00020 */
00021 
00022 
00023 static inline void volk_32fc_s32fc_x2_rotator_32fc_a_generic(lv_32fc_t* outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points){    
00024     *phase = lv_cmake(1.0, 0.0);
00025     unsigned int i = 0; 
00026     int j = 0;    
00027     for(i = 0; i < (unsigned int)(num_points/ROTATOR_RELOAD); ++i) {
00028         for(j = 0; j < ROTATOR_RELOAD; ++j) {
00029             *outVector++ = *inVector++ * (*phase);
00030             (*phase) *= phase_inc;
00031         }
00032         (*phase) /= abs((*phase));
00033     }
00034     for(i = 0; i < num_points%ROTATOR_RELOAD; ++i) {
00035         *outVector++ = *inVector++ * (*phase);
00036         (*phase) *= phase_inc;
00037     }
00038     
00039 }
00040 #endif /* LV_HAVE_GENERIC */
00041 
00042 
00043 #ifdef LV_HAVE_SSE4_1
00044 #include <smmintrin.h>
00045 
00046 static inline void volk_32fc_s32fc_x2_rotator_32fc_a_sse4_1(lv_32fc_t* outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points){
00047     *phase = lv_cmake(1.0, 0.0);
00048     lv_32fc_t* cPtr = outVector;
00049     const lv_32fc_t* aPtr = inVector;
00050     lv_32fc_t incr = 1;
00051     lv_32fc_t phase_Ptr[2] = {(*phase), (*phase)};
00052     
00053     unsigned int i, j = 0;
00054 
00055     for(i = 0; i < 2; ++i) {
00056         phase_Ptr[i] *= incr;
00057         incr *= (phase_inc);
00058     }
00059 
00060     /*printf("%f, %f\n", lv_creal(phase_Ptr[0]), lv_cimag(phase_Ptr[0]));
00061     printf("%f, %f\n", lv_creal(phase_Ptr[1]), lv_cimag(phase_Ptr[1]));
00062     printf("%f, %f\n", lv_creal(phase_Ptr[2]), lv_cimag(phase_Ptr[2]));
00063     printf("%f, %f\n", lv_creal(phase_Ptr[3]), lv_cimag(phase_Ptr[3]));
00064     printf("incr: %f, %f\n", lv_creal(incr), lv_cimag(incr));*/
00065     __m128 aVal, phase_Val, inc_Val, yl, yh, tmp1, tmp2, z, ylp, yhp, tmp1p, tmp2p;
00066     
00067     phase_Val = _mm_loadu_ps((float*)phase_Ptr);
00068     inc_Val = _mm_set_ps(lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr));
00069     
00070     const unsigned int halfPoints = num_points / 2;
00071 
00072     
00073     for(i = 0; i < (unsigned int)(halfPoints/ROTATOR_RELOAD); i++) {
00074         for(j = 0; j < ROTATOR_RELOAD; ++j) {
00075             
00076             aVal = _mm_load_ps((float*)aPtr);
00077             
00078             yl = _mm_moveldup_ps(phase_Val);
00079             yh = _mm_movehdup_ps(phase_Val);
00080             ylp = _mm_moveldup_ps(inc_Val);
00081             yhp = _mm_movehdup_ps(inc_Val);
00082             
00083             tmp1 = _mm_mul_ps(aVal, yl);
00084             tmp1p = _mm_mul_ps(phase_Val, ylp);
00085             
00086             aVal = _mm_shuffle_ps(aVal, aVal, 0xB1);
00087             phase_Val = _mm_shuffle_ps(phase_Val, phase_Val, 0xB1);
00088             tmp2 = _mm_mul_ps(aVal, yh);
00089             tmp2p = _mm_mul_ps(phase_Val, yhp);
00090             
00091             z = _mm_addsub_ps(tmp1, tmp2);
00092             phase_Val = _mm_addsub_ps(tmp1p, tmp2p);
00093             
00094             _mm_store_ps((float*)cPtr, z);
00095             
00096             aPtr += 2;
00097             cPtr += 2;
00098         }
00099         tmp1 = _mm_mul_ps(phase_Val, phase_Val);
00100         tmp2 = _mm_hadd_ps(tmp1, tmp1);
00101         tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8);
00102         phase_Val = _mm_div_ps(phase_Val, tmp1);
00103     }
00104     for(i = 0; i < halfPoints%ROTATOR_RELOAD; ++i) {
00105         aVal = _mm_load_ps((float*)aPtr);
00106         
00107         yl = _mm_moveldup_ps(phase_Val);
00108         yh = _mm_movehdup_ps(phase_Val);
00109         ylp = _mm_moveldup_ps(inc_Val);
00110         yhp = _mm_movehdup_ps(inc_Val);
00111         
00112         tmp1 = _mm_mul_ps(aVal, yl);
00113 
00114         tmp1p = _mm_mul_ps(phase_Val, ylp);
00115         
00116         aVal = _mm_shuffle_ps(aVal, aVal, 0xB1);
00117         phase_Val = _mm_shuffle_ps(phase_Val, phase_Val, 0xB1);
00118         tmp2 = _mm_mul_ps(aVal, yh);
00119         tmp2p = _mm_mul_ps(phase_Val, yhp);
00120         
00121         z = _mm_addsub_ps(tmp1, tmp2);
00122         phase_Val = _mm_addsub_ps(tmp1p, tmp2p);
00123         
00124         _mm_store_ps((float*)cPtr, z);
00125         
00126         aPtr += 2;
00127         cPtr += 2;
00128     }
00129 
00130     _mm_storeu_ps((float*)phase_Ptr, phase_Val);
00131     for(i = 0; i < num_points%2; ++i) {
00132         *cPtr++ = *aPtr++ * phase_Ptr[0];
00133         phase_Ptr[0] *= (phase_inc);
00134     }
00135      
00136     (*phase) = phase_Ptr[0];
00137 
00138 }
00139     
00140 #endif /* LV_HAVE_SSE4_1 */
00141 
00142 
00143 #ifdef LV_HAVE_AVX
00144 #include <immintrin.h>
00145 
00146 /*!
00147   \brief rotate input vector at fixed rate per sample from initial phase offset
00148   \param outVector The vector where the results will be stored
00149   \param inVector Vector to be rotated
00150   \param phase_inc rotational velocity
00151   \param phase initial phase offset
00152   \param num_points The number of values in inVector to be rotated and stored into cVector
00153 */
00154 
00155 
00156 
00157 
00158 static inline void volk_32fc_s32fc_x2_rotator_32fc_a_avx(lv_32fc_t* outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points){
00159     *phase = lv_cmake(1.0, 0.0);
00160     lv_32fc_t* cPtr = outVector;
00161     const lv_32fc_t* aPtr = inVector;
00162     lv_32fc_t incr = 1;
00163     lv_32fc_t phase_Ptr[4] = {(*phase), (*phase), (*phase), (*phase)};
00164     
00165     unsigned int i, j = 0;
00166 
00167     for(i = 0; i < 4; ++i) {
00168         phase_Ptr[i] *= incr;
00169         incr *= (phase_inc);
00170     }
00171 
00172     /*printf("%f, %f\n", lv_creal(phase_Ptr[0]), lv_cimag(phase_Ptr[0]));
00173     printf("%f, %f\n", lv_creal(phase_Ptr[1]), lv_cimag(phase_Ptr[1]));
00174     printf("%f, %f\n", lv_creal(phase_Ptr[2]), lv_cimag(phase_Ptr[2]));
00175     printf("%f, %f\n", lv_creal(phase_Ptr[3]), lv_cimag(phase_Ptr[3]));
00176     printf("incr: %f, %f\n", lv_creal(incr), lv_cimag(incr));*/
00177     __m256 aVal, phase_Val, inc_Val, yl, yh, tmp1, tmp2, z, ylp, yhp, tmp1p, tmp2p;
00178     
00179     phase_Val = _mm256_loadu_ps((float*)phase_Ptr);
00180     inc_Val = _mm256_set_ps(lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr));
00181     const unsigned int fourthPoints = num_points / 4;
00182 
00183     
00184     for(i = 0; i < (unsigned int)(fourthPoints/ROTATOR_RELOAD); i++) {
00185         for(j = 0; j < ROTATOR_RELOAD; ++j) {
00186             
00187             aVal = _mm256_load_ps((float*)aPtr);
00188             
00189             yl = _mm256_moveldup_ps(phase_Val);
00190             yh = _mm256_movehdup_ps(phase_Val);
00191             ylp = _mm256_moveldup_ps(inc_Val);
00192             yhp = _mm256_movehdup_ps(inc_Val);
00193             
00194             tmp1 = _mm256_mul_ps(aVal, yl);
00195             tmp1p = _mm256_mul_ps(phase_Val, ylp);
00196             
00197             aVal = _mm256_shuffle_ps(aVal, aVal, 0xB1);
00198             phase_Val = _mm256_shuffle_ps(phase_Val, phase_Val, 0xB1);
00199             tmp2 = _mm256_mul_ps(aVal, yh);
00200             tmp2p = _mm256_mul_ps(phase_Val, yhp);
00201             
00202             z = _mm256_addsub_ps(tmp1, tmp2);
00203             phase_Val = _mm256_addsub_ps(tmp1p, tmp2p);
00204             
00205             _mm256_store_ps((float*)cPtr, z);
00206             
00207             aPtr += 4;
00208             cPtr += 4;
00209         }
00210         tmp1 = _mm256_mul_ps(phase_Val, phase_Val);
00211         tmp2 = _mm256_hadd_ps(tmp1, tmp1);
00212         tmp1 = _mm256_shuffle_ps(tmp2, tmp2, 0xD8);
00213         phase_Val = _mm256_div_ps(phase_Val, tmp1);
00214     }
00215     for(i = 0; i < fourthPoints%ROTATOR_RELOAD; ++i) {
00216         aVal = _mm256_load_ps((float*)aPtr);
00217         
00218         yl = _mm256_moveldup_ps(phase_Val);
00219         yh = _mm256_movehdup_ps(phase_Val);
00220         ylp = _mm256_moveldup_ps(inc_Val);
00221         yhp = _mm256_movehdup_ps(inc_Val);
00222         
00223         tmp1 = _mm256_mul_ps(aVal, yl);
00224 
00225         tmp1p = _mm256_mul_ps(phase_Val, ylp);
00226         
00227         aVal = _mm256_shuffle_ps(aVal, aVal, 0xB1);
00228         phase_Val = _mm256_shuffle_ps(phase_Val, phase_Val, 0xB1);
00229         tmp2 = _mm256_mul_ps(aVal, yh);
00230         tmp2p = _mm256_mul_ps(phase_Val, yhp);
00231         
00232         z = _mm256_addsub_ps(tmp1, tmp2);
00233         phase_Val = _mm256_addsub_ps(tmp1p, tmp2p);
00234         
00235         _mm256_store_ps((float*)cPtr, z);
00236         
00237         aPtr += 4;
00238         cPtr += 4;
00239     }
00240 
00241     _mm256_storeu_ps((float*)phase_Ptr, phase_Val);
00242     for(i = 0; i < num_points%4; ++i) {
00243         *cPtr++ = *aPtr++ * phase_Ptr[0];
00244         phase_Ptr[0] *= (phase_inc);
00245     }
00246      
00247     (*phase) = phase_Ptr[0];
00248 
00249 }
00250     
00251 #endif /* LV_HAVE_AVX */
00252 
00253 
00254 
00255 
00256 
00257 
00258 
00259 
00260 #endif /* INCLUDED_volk_32fc_s32fc_rotator_32fc_a_H */