GNU Radio 3.6.1-14 C++ API
volk_32fc_s32fc_x2_rotator_32fc_a.h
Go to the documentation of this file.
00001 #ifndef INCLUDED_volk_32fc_s32fc_rotator_32fc_a_H
00002 #define INCLUDED_volk_32fc_s32fc_rotator_32fc_a_H
00003 
00004 
00005 #include <volk/volk_complex.h>
00006 #include <stdio.h>
00007 #include <stdlib.h>
00008 #define ROTATOR_RELOAD 512
00009 
00010 
00011 #ifdef LV_HAVE_GENERIC
00012 
00013 /*!
00014   \brief rotate input vector at fixed rate per sample from initial phase offset
00015   \param outVector The vector where the results will be stored
00016   \param inVector Vector to be rotated
00017   \param phase_inc rotational velocity
00018   \param phase initial phase offset
00019   \param num_points The number of values in inVector to be rotated and stored into cVector
00020 */
00021 
00022 
00023 static inline void volk_32fc_s32fc_x2_rotator_32fc_a_generic(lv_32fc_t* outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points){    
00024     *phase = lv_cmake(1.0, 0.0);
00025     unsigned int i = 0; 
00026     int j = 0;    
00027     for(i = 0; i < (unsigned int)(num_points/ROTATOR_RELOAD); ++i) {
00028         for(j = 0; j < ROTATOR_RELOAD; ++j) {
00029             *outVector++ = *inVector++ * (*phase);
00030             (*phase) *= phase_inc;
00031         }
00032         (*phase) /= abs((*phase));
00033     }
00034     for(i = 0; i < num_points%ROTATOR_RELOAD; ++i) {
00035         *outVector++ = *inVector++ * (*phase);
00036         (*phase) *= phase_inc;
00037     }
00038     
00039 }
00040 #endif /* LV_HAVE_GENERIC */
00041 
00042 
00043 #ifdef LV_HAVE_SSE4_1
00044 #include <smmintrin.h>
00045 
00046 static inline void volk_32fc_s32fc_x2_rotator_32fc_a_sse4_1(lv_32fc_t* outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points){
00047     *phase = lv_cmake(1.0, 0.0);
00048     lv_32fc_t* cPtr = outVector;
00049     const lv_32fc_t* aPtr = inVector;
00050     lv_32fc_t incr = 1;
00051     lv_32fc_t phase_Ptr[2] = {(*phase), (*phase)};
00052     
00053     unsigned int i, j = 0;
00054 
00055     for(i = 0; i < 2; ++i) {
00056         phase_Ptr[i] *= incr;
00057         incr *= (phase_inc);
00058     }
00059 
00060     /*printf("%f, %f\n", lv_creal(phase_Ptr[0]), lv_cimag(phase_Ptr[0]));
00061     printf("%f, %f\n", lv_creal(phase_Ptr[1]), lv_cimag(phase_Ptr[1]));
00062     printf("%f, %f\n", lv_creal(phase_Ptr[2]), lv_cimag(phase_Ptr[2]));
00063     printf("%f, %f\n", lv_creal(phase_Ptr[3]), lv_cimag(phase_Ptr[3]));
00064     printf("incr: %f, %f\n", lv_creal(incr), lv_cimag(incr));*/
00065     __m128 aVal, phase_Val, inc_Val, yl, yh, tmp1, tmp2, z, ylp, yhp, tmp1p, tmp2p;
00066     
00067     phase_Val = _mm_loadu_ps((float*)phase_Ptr);
00068     inc_Val = _mm_set_ps(lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr));
00069     
00070     const unsigned int halfPoints = num_points / 2;
00071 
00072     
00073     for(i = 0; i < (unsigned int)(halfPoints/ROTATOR_RELOAD); i++) {
00074         for(j = 0; j < ROTATOR_RELOAD; ++j) {
00075             
00076             aVal = _mm_load_ps((float*)aPtr);
00077             
00078             yl = _mm_moveldup_ps(phase_Val);
00079             yh = _mm_movehdup_ps(phase_Val);
00080             ylp = _mm_moveldup_ps(inc_Val);
00081             yhp = _mm_movehdup_ps(inc_Val);
00082             
00083             tmp1 = _mm_mul_ps(aVal, yl);
00084             tmp1p = _mm_mul_ps(phase_Val, ylp);
00085             
00086             aVal = _mm_shuffle_ps(aVal, aVal, 0xB1);
00087             phase_Val = _mm_shuffle_ps(phase_Val, phase_Val, 0xB1);
00088             tmp2 = _mm_mul_ps(aVal, yh);
00089             tmp2p = _mm_mul_ps(phase_Val, yhp);
00090             
00091             z = _mm_addsub_ps(tmp1, tmp2);
00092             phase_Val = _mm_addsub_ps(tmp1p, tmp2p);
00093             
00094             _mm_store_ps((float*)cPtr, z);
00095             
00096             aPtr += 2;
00097             cPtr += 2;
00098         }
00099         tmp1 = _mm_mul_ps(phase_Val, phase_Val);
00100         tmp2 = _mm_hadd_ps(tmp1, tmp1);
00101         tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8);
00102         phase_Val = _mm_div_ps(phase_Val, tmp1);
00103     }
00104     for(i = 0; i < halfPoints%ROTATOR_RELOAD; ++i) {
00105         aVal = _mm_load_ps((float*)aPtr);
00106         
00107         yl = _mm_moveldup_ps(phase_Val);
00108         yh = _mm_movehdup_ps(phase_Val);
00109         ylp = _mm_moveldup_ps(inc_Val);
00110         yhp = _mm_movehdup_ps(inc_Val);
00111         
00112         tmp1 = _mm_mul_ps(aVal, yl);
00113 
00114         tmp1p = _mm_mul_ps(phase_Val, ylp);
00115         
00116         aVal = _mm_shuffle_ps(aVal, aVal, 0xB1);
00117         phase_Val = _mm_shuffle_ps(phase_Val, phase_Val, 0xB1);
00118         tmp2 = _mm_mul_ps(aVal, yh);
00119         tmp2p = _mm_mul_ps(phase_Val, yhp);
00120         
00121         z = _mm_addsub_ps(tmp1, tmp2);
00122         phase_Val = _mm_addsub_ps(tmp1p, tmp2p);
00123         
00124         _mm_store_ps((float*)cPtr, z);
00125         
00126         aPtr += 2;
00127         cPtr += 2;
00128     }
00129 
00130     _mm_storeu_ps((float*)phase_Ptr, phase_Val);
00131     for(i = 0; i < num_points%2; ++i) {
00132         *cPtr++ = *aPtr++ * phase_Ptr[0];
00133         phase_Ptr[0] *= (phase_inc);
00134     }
00135      
00136     (*phase) = phase_Ptr[0];
00137 
00138 }
00139     
00140 #endif /* LV_HAVE_SSE4_1 */
00141 
00142 
00143 #ifdef LV_HAVE_AVX
00144 #include <immintrin.h>
00145 
00146 /*!
00147   \brief rotate input vector at fixed rate per sample from initial phase offset
00148   \param outVector The vector where the results will be stored
00149   \param inVector Vector to be rotated
00150   \param phase_inc rotational velocity
00151   \param phase initial phase offset
00152   \param num_points The number of values in inVector to be rotated and stored into cVector
00153 */
00154 
00155 
00156 
00157 
00158 static inline void volk_32fc_s32fc_x2_rotator_32fc_a_avx(lv_32fc_t* outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points){
00159     *phase = lv_cmake(1.0, 0.0);
00160     lv_32fc_t* cPtr = outVector;
00161     const lv_32fc_t* aPtr = inVector;
00162     lv_32fc_t incr = 1;
00163     lv_32fc_t phase_Ptr[4] = {(*phase), (*phase), (*phase), (*phase)};
00164     
00165     unsigned int i, j = 0;
00166 
00167     for(i = 0; i < 4; ++i) {
00168         phase_Ptr[i] *= incr;
00169         incr *= (phase_inc);
00170     }
00171 
00172     /*printf("%f, %f\n", lv_creal(phase_Ptr[0]), lv_cimag(phase_Ptr[0]));
00173     printf("%f, %f\n", lv_creal(phase_Ptr[1]), lv_cimag(phase_Ptr[1]));
00174     printf("%f, %f\n", lv_creal(phase_Ptr[2]), lv_cimag(phase_Ptr[2]));
00175     printf("%f, %f\n", lv_creal(phase_Ptr[3]), lv_cimag(phase_Ptr[3]));
00176     printf("incr: %f, %f\n", lv_creal(incr), lv_cimag(incr));*/
00177     __m256 aVal, phase_Val, inc_Val, yl, yh, tmp1, tmp2, z, ylp, yhp, tmp1p, tmp2p, negated, zeros;
00178     
00179     phase_Val = _mm256_loadu_ps((float*)phase_Ptr);
00180     inc_Val = _mm256_set_ps(lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr));
00181     zeros = _mm256_set1_ps(0.0);
00182     negated = _mm256_set1_ps(-1.0);
00183     const unsigned int fourthPoints = num_points / 4;
00184 
00185     
00186     for(i = 0; i < (unsigned int)(fourthPoints/ROTATOR_RELOAD); i++) {
00187         for(j = 0; j < ROTATOR_RELOAD; ++j) {
00188             
00189             aVal = _mm256_load_ps((float*)aPtr);
00190             
00191             yl = _mm256_moveldup_ps(phase_Val);
00192             yh = _mm256_movehdup_ps(phase_Val);
00193             ylp = _mm256_moveldup_ps(inc_Val);
00194             yhp = _mm256_movehdup_ps(inc_Val);
00195             
00196             tmp1 = _mm256_mul_ps(aVal, yl);
00197             tmp1p = _mm256_mul_ps(phase_Val, ylp);
00198             
00199             aVal = _mm256_shuffle_ps(aVal, aVal, 0xB1);
00200             phase_Val = _mm256_shuffle_ps(phase_Val, phase_Val, 0xB1);
00201             tmp2 = _mm256_mul_ps(aVal, yh);
00202             tmp2p = _mm256_mul_ps(phase_Val, yhp);
00203             
00204             z = _mm256_addsub_ps(tmp1, tmp2);
00205             phase_Val = _mm256_addsub_ps(tmp1p, tmp2p);
00206             
00207             _mm256_store_ps((float*)cPtr, z);
00208             
00209             aPtr += 4;
00210             cPtr += 4;
00211         }
00212         tmp1 = _mm256_mul_ps(phase_Val, phase_Val);
00213         tmp2 = _mm256_hadd_ps(tmp1, tmp1);
00214         tmp1 = _mm256_shuffle_ps(tmp2, tmp2, 0xD8);
00215         phase_Val = _mm256_div_ps(phase_Val, tmp1);
00216     }
00217     for(i = 0; i < fourthPoints%ROTATOR_RELOAD; ++i) {
00218         aVal = _mm256_load_ps((float*)aPtr);
00219         
00220         yl = _mm256_moveldup_ps(phase_Val);
00221         yh = _mm256_movehdup_ps(phase_Val);
00222         ylp = _mm256_moveldup_ps(inc_Val);
00223         yhp = _mm256_movehdup_ps(inc_Val);
00224         
00225         tmp1 = _mm256_mul_ps(aVal, yl);
00226 
00227         tmp1p = _mm256_mul_ps(phase_Val, ylp);
00228         
00229         aVal = _mm256_shuffle_ps(aVal, aVal, 0xB1);
00230         phase_Val = _mm256_shuffle_ps(phase_Val, phase_Val, 0xB1);
00231         tmp2 = _mm256_mul_ps(aVal, yh);
00232         tmp2p = _mm256_mul_ps(phase_Val, yhp);
00233         
00234         z = _mm256_addsub_ps(tmp1, tmp2);
00235         phase_Val = _mm256_addsub_ps(tmp1p, tmp2p);
00236         
00237         _mm256_store_ps((float*)cPtr, z);
00238         
00239         aPtr += 4;
00240         cPtr += 4;
00241     }
00242 
00243     _mm256_storeu_ps((float*)phase_Ptr, phase_Val);
00244     for(i = 0; i < num_points%4; ++i) {
00245         *cPtr++ = *aPtr++ * phase_Ptr[0];
00246         phase_Ptr[0] *= (phase_inc);
00247     }
00248      
00249     (*phase) = phase_Ptr[0];
00250 
00251 }
00252     
00253 #endif /* LV_HAVE_AVX */
00254 
00255 
00256 
00257 
00258 
00259 
00260 
00261 
00262 #endif /* INCLUDED_volk_32fc_s32fc_rotator_32fc_a_H */