summaryrefslogtreecommitdiff
path: root/third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation_neon.c
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation_neon.c')
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation_neon.c87
1 files changed, 87 insertions, 0 deletions
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation_neon.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation_neon.c
new file mode 100644
index 00000000..918b6715
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation_neon.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+#include <arm_neon.h>
+
+static inline void DotProductWithScaleNeon(int32_t* cross_correlation,
+ const int16_t* vector1,
+ const int16_t* vector2,
+ size_t length,
+ int scaling) {
+ size_t i = 0;
+ size_t len1 = length >> 3;
+ size_t len2 = length & 7;
+ int64x2_t sum0 = vdupq_n_s64(0);
+ int64x2_t sum1 = vdupq_n_s64(0);
+
+ for (i = len1; i > 0; i -= 1) {
+ int16x8_t seq1_16x8 = vld1q_s16(vector1);
+ int16x8_t seq2_16x8 = vld1q_s16(vector2);
+#if defined(WEBRTC_ARCH_ARM64)
+ int32x4_t tmp0 = vmull_s16(vget_low_s16(seq1_16x8),
+ vget_low_s16(seq2_16x8));
+ int32x4_t tmp1 = vmull_high_s16(seq1_16x8, seq2_16x8);
+#else
+ int32x4_t tmp0 = vmull_s16(vget_low_s16(seq1_16x8),
+ vget_low_s16(seq2_16x8));
+ int32x4_t tmp1 = vmull_s16(vget_high_s16(seq1_16x8),
+ vget_high_s16(seq2_16x8));
+#endif
+ sum0 = vpadalq_s32(sum0, tmp0);
+ sum1 = vpadalq_s32(sum1, tmp1);
+ vector1 += 8;
+ vector2 += 8;
+ }
+
+ // Calculate the rest of the samples.
+ int64_t sum_res = 0;
+ for (i = len2; i > 0; i -= 1) {
+ sum_res += WEBRTC_SPL_MUL_16_16(*vector1, *vector2);
+ vector1++;
+ vector2++;
+ }
+
+ sum0 = vaddq_s64(sum0, sum1);
+#if defined(WEBRTC_ARCH_ARM64)
+ int64_t sum2 = vaddvq_s64(sum0);
+ *cross_correlation = (int32_t)((sum2 + sum_res) >> scaling);
+#else
+ int64x1_t shift = vdup_n_s64(-scaling);
+ int64x1_t sum2 = vadd_s64(vget_low_s64(sum0), vget_high_s64(sum0));
+ sum2 = vadd_s64(sum2, vdup_n_s64(sum_res));
+ sum2 = vshl_s64(sum2, shift);
+ vst1_lane_s32(cross_correlation, vreinterpret_s32_s64(sum2), 0);
+#endif
+}
+
+/* NEON version of WebRtcSpl_CrossCorrelation() for ARM32/64 platforms. */
+void WebRtcSpl_CrossCorrelationNeon(int32_t* cross_correlation,
+ const int16_t* seq1,
+ const int16_t* seq2,
+ size_t dim_seq,
+ size_t dim_cross_correlation,
+ int right_shifts,
+ int step_seq2) {
+ size_t i = 0;
+
+ for (i = 0; i < dim_cross_correlation; i++) {
+ const int16_t* seq1_ptr = seq1;
+ const int16_t* seq2_ptr = seq2 + (step_seq2 * i);
+
+ DotProductWithScaleNeon(cross_correlation,
+ seq1_ptr,
+ seq2_ptr,
+ dim_seq,
+ right_shifts);
+ cross_correlation++;
+ }
+}