[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Commit-gnuradio] [gnuradio] 08/12: volk: add 32fc_32f_multiply_32fc_a_a
From: |
git |
Subject: |
[Commit-gnuradio] [gnuradio] 08/12: volk: add 32fc_32f_multiply_32fc_a_avx protokernel |
Date: |
Thu, 16 Jan 2014 20:33:25 +0000 (UTC) |
This is an automated email from the git hooks/post-receive script.
jcorgan pushed a commit to branch master
in repository gnuradio.
commit eb0a608e4df6ee46d8815efb4ff2779fc93c7030
Author: Nathan West <address@hidden>
Date: Mon Jan 13 16:23:27 2014 -0600
volk: add 32fc_32f_multiply_32fc_a_avx protokernel
---
volk/kernels/volk/volk_32fc_32f_multiply_32fc.h | 61 ++++++++++++++++++++++++-
1 file changed, 60 insertions(+), 1 deletion(-)
diff --git a/volk/kernels/volk/volk_32fc_32f_multiply_32fc.h
b/volk/kernels/volk/volk_32fc_32f_multiply_32fc.h
index 104e325..a7e81bd 100644
--- a/volk/kernels/volk/volk_32fc_32f_multiply_32fc.h
+++ b/volk/kernels/volk/volk_32fc_32f_multiply_32fc.h
@@ -4,6 +4,64 @@
#include <inttypes.h>
#include <stdio.h>
+#ifdef LV_HAVE_AVX
+#include <immintrin.h>
+ /*!
+ \brief Multiplies the input complex vector with the input float vector and
store their results in the third vector
+ \param cVector The vector where the results will be stored
+ \param aVector The complex vector to be multiplied
+ \param bVector The vectors containing the float values to be multiplied
against each complex value in aVector
+ \param num_points The number of values in aVector and bVector to be
multiplied together and stored into cVector
+ */
+static inline void volk_32fc_32f_multiply_32fc_a_avx(lv_32fc_t* cVector, const
lv_32fc_t* aVector, const float* bVector, unsigned int num_points)
+{
+ unsigned int number = 0;
+ const unsigned int eighthPoints = num_points / 8;
+
+ lv_32fc_t* cPtr = cVector;
+ const lv_32fc_t* aPtr = aVector;
+ const float* bPtr= bVector;
+
+ __m256 aVal1, aVal2, bVal, bVal1, bVal2, cVal1, cVal2;
+
+ __m256i permute_mask = _mm256_set_epi32(3, 3, 2, 2, 1, 1, 0, 0);
+
+ for(;number < eighthPoints; number++){
+
+ aVal1 = _mm256_load_ps((float *)aPtr);
+ aPtr += 4;
+
+ aVal2 = _mm256_load_ps((float *)aPtr);
+ aPtr += 4;
+
+ bVal = _mm256_load_ps(bPtr); // b0|b1|b2|b3|b4|b5|b6|b7
+ bPtr += 8;
+
+ bVal1 = _mm256_permute2f128_ps(bVal, bVal, 0x00); //
b0|b1|b2|b3|b0|b1|b2|b3
+ bVal2 = _mm256_permute2f128_ps(bVal, bVal, 0x11); //
b4|b5|b6|b7|b4|b5|b6|b7
+
+ bVal1 = _mm256_permutevar_ps(bVal1, permute_mask); //
b0|b0|b1|b1|b2|b2|b3|b3
+ bVal2 = _mm256_permutevar_ps(bVal2, permute_mask); //
b4|b4|b5|b5|b6|b6|b7|b7
+
+ cVal1 = _mm256_mul_ps(aVal1, bVal1);
+ cVal2 = _mm256_mul_ps(aVal2, bVal2);
+
+ _mm256_store_ps((float*)cPtr,cVal1); // Store the results back into the
C container
+ cPtr += 4;
+
+ _mm256_store_ps((float*)cPtr,cVal2); // Store the results back into the
C container
+ cPtr += 4;
+ }
+
+ number = eighthPoints * 8;
+ for(;number < num_points; ++number){
+ *cPtr++ = (*aPtr++) * (*bPtr++);
+ }
+}
+#endif /* LV_HAVE_AVX */
+
+
+
#ifdef LV_HAVE_SSE
#include <xmmintrin.h>
/*!
@@ -13,7 +71,8 @@
\param bVector The vectors containing the float values to be multiplied
against each complex value in aVector
\param num_points The number of values in aVector and bVector to be
multiplied together and stored into cVector
*/
-static inline void volk_32fc_32f_multiply_32fc_a_sse(lv_32fc_t* cVector, const
lv_32fc_t* aVector, const float* bVector, unsigned int num_points){
+static inline void volk_32fc_32f_multiply_32fc_a_sse(lv_32fc_t* cVector, const
lv_32fc_t* aVector, const float* bVector, unsigned int num_points)
+{
unsigned int number = 0;
const unsigned int quarterPoints = num_points / 4;
- [Commit-gnuradio] [gnuradio] branch master updated (f7f28bb -> 67aa043), git, 2014/01/16
- [Commit-gnuradio] [gnuradio] 09/12: Merge remote-tracking branch 'softerhardware/master', git, 2014/01/16
- [Commit-gnuradio] [gnuradio] 06/12: volk: add 32f_invsqrt_32f avx proto kernel, git, 2014/01/16
- [Commit-gnuradio] [gnuradio] 04/12: digital: Allow for different lengths of pilot carrier- and -symbol allocations, git, 2014/01/16
- [Commit-gnuradio] [gnuradio] 12/12: Merge remote-tracking branch 'martin/ofdm-fixes', git, 2014/01/16
- [Commit-gnuradio] [gnuradio] 01/12: Support for >1 jack port, various sizes of jack buffers, git, 2014/01/16
- [Commit-gnuradio] [gnuradio] 03/12: digital: allow to change packet length tag name in GRC, git, 2014/01/16
- [Commit-gnuradio] [gnuradio] 08/12: volk: add 32fc_32f_multiply_32fc_a_avx protokernel,
git <=
- [Commit-gnuradio] [gnuradio] 07/12: volk: add 32fc_32f_dot_prod_32fc u/a_avx protokernel, git, 2014/01/16
- [Commit-gnuradio] [gnuradio] 05/12: modtool: linkage info for OSX, git, 2014/01/16
- [Commit-gnuradio] [gnuradio] 11/12: Merge remote-tracking branch 'martin/modtool_osx', git, 2014/01/16
- [Commit-gnuradio] [gnuradio] 02/12: digital: propagate tags to packet_header, git, 2014/01/16
- [Commit-gnuradio] [gnuradio] 10/12: Merge remote-tracking branch 'nwest/volk_avx', git, 2014/01/16