[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Commit-gnuradio] [gnuradio] 01/03: volk: adding a binary slicer kernel
From: |
git |
Subject: |
[Commit-gnuradio] [gnuradio] 01/03: volk: adding a binary slicer kernel |
Date: |
Fri, 13 Jun 2014 23:52:13 +0000 (UTC) |
This is an automated email from the git hooks/post-receive script.
jcorgan pushed a commit to branch master
in repository gnuradio.
commit b2370cf57999815c0fcd6fd7a10217b4607e3be6
Author: Nathan West <address@hidden>
Date: Mon Jun 9 22:53:50 2014 -0400
volk: adding a binary slicer kernel
---
volk/apps/volk_profile.cc | 1 +
volk/kernels/volk/volk_32f_binary_slicer_32i.h | 221 +++++++++++++++++++++++++
volk/lib/testqa.cc | 1 +
3 files changed, 223 insertions(+)
diff --git a/volk/apps/volk_profile.cc b/volk/apps/volk_profile.cc
index 97b0a5b..10426e2 100644
--- a/volk/apps/volk_profile.cc
+++ b/volk/apps/volk_profile.cc
@@ -153,6 +153,7 @@ int main(int argc, char *argv[]) {
//VOLK_PROFILE(volk_32fc_s32fc_multiply_32fc, 1e-4, lv_32fc_t(1.0, 0.5),
204602, 1000, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_32fc_s32fc_multiply_32fc, 1e-4, 0, 204602, 1000,
&results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_32f_s32f_multiply_32f, 1e-4, 1.0, 204602, 10000,
&results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_32f_binary_slicer_32i, 0, 1.0, 204602, 10000, &results,
benchmark_mode, kernel_regex);
// Until we can update the config on a kernel by kernel basis
// do not overwrite volk_config when using a regex.
diff --git a/volk/kernels/volk/volk_32f_binary_slicer_32i.h
b/volk/kernels/volk/volk_32f_binary_slicer_32i.h
new file mode 100644
index 0000000..6444897
--- /dev/null
+++ b/volk/kernels/volk/volk_32f_binary_slicer_32i.h
@@ -0,0 +1,221 @@
+#ifndef INCLUDED_volk_32f_binary_slicer_32f_H
+#define INCLUDED_volk_32f_binary_slicer_32f_H
+
+
+#ifdef LV_HAVE_GENERIC
+/*!
+ \brief Returns integer 1 if float input is greater than or equal to 0, 1
otherwise
+ \param cVector The int output (either 0 or 1)
+ \param aVector The float input
+ \param num_points The number of values in aVector and stored into cVector
+*/
+static inline void volk_32f_binary_slicer_32i_generic(int* cVector, const
float* aVector, unsigned int num_points){
+ int* cPtr = cVector;
+ const float* aPtr = aVector;
+ unsigned int number = 0;
+
+ for(number = 0; number < num_points; number++){
+ if( *aPtr++ >= 0) {
+ *cPtr++ = 1;
+ }
+ else {
+ *cPtr++ = 0;
+ }
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+
+#ifdef LV_HAVE_SSE2
+#include <emmintrin.h>
+/*!
+ \brief Returns integer 1 if float input is greater than or equal to 0, 1
otherwise
+ \param cVector The int output (either 0 or 1)
+ \param aVector The float input
+ \param num_points The number of values in aVector and stored into cVector
+*/
+static inline void volk_32f_binary_slicer_32i_a_sse2(int* cVector, const
float* aVector, unsigned int num_points){
+ int* cPtr = cVector;
+ const float* aPtr = aVector;
+ unsigned int number = 0;
+
+ float binary_float_buffer[4];
+ unsigned int quarter_points = num_points / 4;
+ __m128 a_val, res_f, binary_f;
+ __m128i res_i, binary_i;
+ __m128 zero_val;
+ zero_val = _mm_set1_ps (0.0f);
+
+ for(number = 0; number < quarter_points; number++){
+ a_val = _mm_load_ps(aPtr);
+
+ res_f = _mm_cmpge_ps (a_val, zero_val);
+ res_i = _mm_cvtps_epi32 (res_f);
+ binary_i = _mm_srli_epi32 (res_i, 31);
+
+
+ _mm_store_si128((__m128i*)cPtr, binary_i);
+
+
+ cPtr += 4;
+ aPtr += 4;
+ }
+
+ for(number = quarter_points * 4; number < num_points; number++){
+ if( *aPtr++ >= 0) {
+ *cPtr++ = 1;
+ }
+ else {
+ *cPtr++ = 0;
+ }
+ }
+}
+#endif /* LV_HAVE_SSE2 */
+
+
+#ifdef LV_HAVE_AVX
+#include <immintrin.h>
+/*!
+ \brief Returns integer 1 if float input is greater than or equal to 0, 1
otherwise
+ \param cVector The int output (either 0 or 1)
+ \param aVector The float input
+ \param num_points The number of values in aVector and stored into cVector
+*/
+static inline void volk_32f_binary_slicer_32i_a_avx(int* cVector, const float*
aVector, unsigned int num_points){
+ int* cPtr = cVector;
+ const float* aPtr = aVector;
+ unsigned int number = 0;
+
+ float binary_float_buffer[4];
+ unsigned int quarter_points = num_points / 8;
+ __m256 a_val, res_f, binary_f;
+ __m256i res_i, binary_i;
+ __m256 zero_val, one_val;
+ zero_val = _mm256_set1_ps (0.0f);
+ one_val = _mm256_set1_ps (1.0f);
+
+ for(number = 0; number < quarter_points; number++){
+ a_val = _mm256_load_ps(aPtr);
+
+ res_f = _mm256_cmp_ps (a_val, zero_val, 13);
+ binary_f = _mm256_and_ps (res_f, one_val);
+ binary_i = _mm256_cvtps_epi32(binary_f);
+
+
+
+ _mm256_store_si256((__m256i *)cPtr, binary_i);
+
+
+ cPtr += 8;
+ aPtr += 8;
+ }
+
+ for(number = quarter_points * 8; number < num_points; number++){
+ if( *aPtr++ >= 0) {
+ *cPtr++ = 1;
+ }
+ else {
+ *cPtr++ = 0;
+ }
+ }
+}
+#endif /* LV_HAVE_SSE2 */
+
+
+#ifdef LV_HAVE_SSE2
+#include <emmintrin.h>
+/*!
+ \brief Returns integer 1 if float input is greater than or equal to 0, 1
otherwise
+ \param cVector The int output (either 0 or 1)
+ \param aVector The float input
+ \param num_points The number of values in aVector and stored into cVector
+*/
+static inline void volk_32f_binary_slicer_32i_u_sse2(int* cVector, const
float* aVector, unsigned int num_points){
+ int* cPtr = cVector;
+ const float* aPtr = aVector;
+ unsigned int number = 0;
+
+ float binary_float_buffer[4];
+ unsigned int quarter_points = num_points / 4;
+ __m128 a_val, res_f, binary_f;
+ __m128i res_i, binary_i;
+ __m128 zero_val;
+ zero_val = _mm_set1_ps (0.0f);
+
+ for(number = 0; number < quarter_points; number++){
+ a_val = _mm_loadu_ps(aPtr);
+
+ res_f = _mm_cmpge_ps (a_val, zero_val);
+ res_i = _mm_cvtps_epi32 (res_f);
+ binary_i = _mm_srli_epi32 (res_i, 31);
+
+
+ _mm_storeu_si128((__m128i*)cPtr, binary_i);
+
+
+ cPtr += 4;
+ aPtr += 4;
+ }
+
+ for(number = quarter_points * 4; number < num_points; number++){
+ if( *aPtr++ >= 0) {
+ *cPtr++ = 1;
+ }
+ else {
+ *cPtr++ = 0;
+ }
+ }
+}
+#endif /* LV_HAVE_SSE2 */
+
+
+#ifdef LV_HAVE_AVX
+#include <immintrin.h>
+/*!
+ \brief Returns integer 1 if float input is greater than or equal to 0, 1
otherwise
+ \param cVector The int output (either 0 or 1)
+ \param aVector The float input
+ \param num_points The number of values in aVector and stored into cVector
+*/
+static inline void volk_32f_binary_slicer_32i_u_avx(int* cVector, const float*
aVector, unsigned int num_points){
+ int* cPtr = cVector;
+ const float* aPtr = aVector;
+ unsigned int number = 0;
+
+ float binary_float_buffer[4];
+ unsigned int quarter_points = num_points / 8;
+ __m256 a_val, res_f, binary_f;
+ __m256i res_i, binary_i;
+ __m256 zero_val, one_val;
+ zero_val = _mm256_set1_ps (0.0f);
+ one_val = _mm256_set1_ps (1.0f);
+
+ for(number = 0; number < quarter_points; number++){
+ a_val = _mm256_loadu_ps(aPtr);
+
+ res_f = _mm256_cmp_ps (a_val, zero_val, 13);
+ binary_f = _mm256_and_ps (res_f, one_val);
+ binary_i = _mm256_cvtps_epi32(binary_f);
+
+
+
+ _mm256_storeu_si256((__m256i*)cPtr, binary_i);
+
+
+ cPtr += 8;
+ aPtr += 8;
+ }
+
+ for(number = quarter_points * 8; number < num_points; number++){
+ if( *aPtr++ >= 0) {
+ *cPtr++ = 1;
+ }
+ else {
+ *cPtr++ = 0;
+ }
+ }
+}
+#endif /* LV_HAVE_SSE2 */
+
+
+
+#endif /* INCLUDED_volk_32f_binary_slicer_32f_H */
diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc
index f97a646..41093d2 100644
--- a/volk/lib/testqa.cc
+++ b/volk/lib/testqa.cc
@@ -89,3 +89,4 @@ VOLK_RUN_TESTS(volk_32fc_s32fc_multiply_32fc, 1e-4, 0, 20462,
1);
VOLK_RUN_TESTS(volk_32f_s32f_multiply_32f, 1e-4, 0, 20462, 1);
VOLK_RUN_TESTS(volk_32fc_s32fc_rotatorpuppet_32fc, 1e-3,
(lv_32fc_t)lv_cmake(0.953939201, 0.3), 20462, 1);
VOLK_RUN_TESTS(volk_32f_invsqrt_32f, 1e-2, 0, 20462, 1);
+VOLK_RUN_TESTS(volk_32f_binary_slicer_32i, 0, 0, 20462, 1);