1 #ifndef INCLUDED_volk_32fc_x2_multiply_32fc_u_H
2 #define INCLUDED_volk_32fc_x2_multiply_32fc_u_H
10 #include <pmmintrin.h>
18 static inline void volk_32fc_x2_multiply_32fc_u_sse3(
lv_32fc_t* cVector,
const lv_32fc_t* aVector,
const lv_32fc_t* bVector,
unsigned int num_points){
19 unsigned int number = 0;
20 const unsigned int halfPoints = num_points / 2;
22 __m128 x, y, yl, yh, z, tmp1, tmp2;
27 for(;number < halfPoints; number++){
29 x = _mm_loadu_ps((
float*)a);
30 y = _mm_loadu_ps((
float*)b);
32 yl = _mm_moveldup_ps(y);
33 yh = _mm_movehdup_ps(y);
35 tmp1 = _mm_mul_ps(x,yl);
37 x = _mm_shuffle_ps(x,x,0xB1);
39 tmp2 = _mm_mul_ps(x,yh);
41 z = _mm_addsub_ps(tmp1,tmp2);
43 _mm_storeu_ps((
float*)c,z);
50 if((num_points % 2) != 0) {
56 #ifdef LV_HAVE_GENERIC
64 static inline void volk_32fc_x2_multiply_32fc_u_generic(
lv_32fc_t* cVector,
const lv_32fc_t* aVector,
const lv_32fc_t* bVector,
unsigned int num_points){
68 unsigned int number = 0;
70 for(number = 0; number < num_points; number++){
71 *cPtr++ = (*aPtr++) * (*bPtr++);