summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xaconfigure236
-rw-r--r--aconfigure.ac155
-rw-r--r--build.mak.in14
-rwxr-xr-xconfigure-android1
-rw-r--r--pjmedia/build/os-auto.mak.in24
-rw-r--r--third_party/build/os-auto.mak.in40
-rw-r--r--third_party/build/webrtc/Makefile100
-rw-r--r--third_party/build/webrtc/notes.txt2
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/fft4g.c1332
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/fft4g.h25
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/ring_buffer.c247
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/ring_buffer.h66
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/auto_corr_to_refl_coef.c103
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/auto_correlation.c65
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse.c108
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse_arm.S119
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse_mips.c176
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft.c298
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft_mips.c328
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft_tables.h148
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/copy_set_operations.c82
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation.c30
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation_mips.c104
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation_neon.c87
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/division_operations.c138
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/dot_product_with_scale.c32
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast.c48
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast_mips.c169
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast_neon.c217
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/energy.c39
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar.c89
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12.c42
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12_armv7.S218
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12_mips.c140
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ma_fast_q12.c45
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/get_hanning_window.c77
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/get_scaling_square.c46
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/ilbc_specific_functions.c90
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/include/real_fft.h97
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/include/signal_processing_library.h1645
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl.h173
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl_armv7.h136
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl_mips.h225
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/levinson_durbin.c246
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/lpc_to_refl_coef.c56
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations.c224
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations_mips.c376
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations_neon.c283
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/randomization_functions.c115
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/real_fft.c102
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/real_fft_unittest.cc108
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/refl_coef_to_lpc.c59
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/resample.c505
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_48khz.c186
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2.c183
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_internal.c679
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_internal.h47
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_mips.c290
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_fractional.c239
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/signal_processing_unittest.cc579
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_init.c140
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt.c184
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor.c77
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor_arm.S110
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor_mips.c207
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/splitting_filter.c208
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/sqrt_of_one_minus_x_squared.c35
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/vector_scaling_operations.c165
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/signal_processing/vector_scaling_operations_mips.c57
-rw-r--r--third_party/webrtc/src/webrtc/common_audio/wav_file.h115
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_common.h32
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core.c1929
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core.h129
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_internal.h202
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_mips.c774
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_neon.c736
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_sse2.c731
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft.c589
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft.h61
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft_mips.c1187
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft_neon.c355
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft_sse2.c427
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_resampler.c209
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_resampler.h39
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation.c923
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation_internal.h67
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation_unittest.cc48
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/aec/include/echo_cancellation.h245
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/aec/system_delay_unittest.cc602
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core.c1233
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core.h434
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_c.c771
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_mips.c1566
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_neon.c212
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_defines.h87
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/aecm/echo_control_mobile.c702
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h218
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging.h86
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging_file_handling.cc57
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging_file_handling.h41
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/ns/defines.h49
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/ns/include/noise_suppression.h116
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/ns/include/noise_suppression_x.h88
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/ns/noise_suppression.c59
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/ns/noise_suppression_x.c46
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/ns/ns_core.c1416
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/ns/ns_core.h190
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core.c2112
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core.h263
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core_c.c261
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core_mips.c1002
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core_neon.c598
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_defines.h64
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/ns/windows_private.h574
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator.c684
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator.h251
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_internal.h48
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_unittest.cc621
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.c485
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h244
-rw-r--r--third_party/webrtc/src/webrtc/system_wrappers/interface/compile_assert_c.h24
-rw-r--r--third_party/webrtc/src/webrtc/system_wrappers/interface/cpu_features_wrapper.h51
-rw-r--r--third_party/webrtc/src/webrtc/system_wrappers/source/cpu_features.cc72
-rw-r--r--third_party/webrtc/src/webrtc/system_wrappers/source/cpu_features_android.c15
-rw-r--r--third_party/webrtc/src/webrtc/typedefs.h122
125 files changed, 37073 insertions, 205 deletions
diff --git a/aconfigure b/aconfigure
index bf630a18..d9fe9292 100755
--- a/aconfigure
+++ b/aconfigure
@@ -622,6 +622,10 @@ ac_subst_vars='LTLIBOBJS
LIBOBJS
ac_main_obj
ac_host
+ac_webrtc_ldflags
+ac_webrtc_cflags
+ac_webrtc_instset
+ac_no_webrtc
ac_no_yuv
opus_present
opus_h_present
@@ -642,8 +646,6 @@ libssl_present
openssl_h_present
ac_ssl_has_aes_gcm
ac_no_ssl
-ac_webrtc_ldflags
-ac_webrtc_cflags
ac_openh264_ldflags
ac_openh264_cflags
ac_v4l2_ldflags
@@ -682,6 +684,7 @@ ac_pa_cflags
ac_external_pa
ac_pjmedia_snd
ac_pjmedia_resample
+ac_external_webrtc
ac_external_yuv
ac_srtp_shutdown_present
ac_srtp_deinit_present
@@ -781,6 +784,7 @@ with_external_speex
with_external_gsm
with_external_srtp
with_external_yuv
+with_external_webrtc
enable_resample
enable_sound
with_external_pa
@@ -806,8 +810,6 @@ enable_ffmpeg
enable_v4l2
with_openh264
enable_openh264
-with_webrtc
-enable_webrtc
enable_ipp
with_ipp
with_ipp_samples
@@ -823,6 +825,7 @@ enable_silk
with_opus
enable_opus
enable_libyuv
+enable_libwebrtc
'
ac_precious_vars='build_alias
host_alias
@@ -1475,7 +1478,6 @@ Optional Features:
--disable-ffmpeg Disable ffmpeg (default: not disabled)
--disable-v4l2 Disable Video4Linux2 (default: not disabled)
--disable-openh264 Disable OpenH264 (default: not disabled)
- --disable-webrtc Exclude webrtc in the build
--enable-ipp Enable Intel IPP support. Specify the Intel IPP
package and samples location using IPPROOT and
IPPSAMPLES env var or with --with-ipp and
@@ -1492,6 +1494,7 @@ Optional Features:
autodetect)
--disable-libyuv Exclude libyuv in the build
+ --disable-libwebrtc Exclude libwebrtc in the build
Optional Packages:
--with-PACKAGE[=ARG] use PACKAGE [ARG=yes]
@@ -1516,6 +1519,11 @@ Optional Packages:
make sure that libyuv is accessible to use (hint:
use CFLAGS and LDFLAGS env var to set the
include/lib paths)
+ --with-external-webrtc Use external webrtc development files, not the one
+ in "third_party" directory. When this option is set,
+ make sure that webrtc is accessible to use (hint:
+ use CFLAGS and LDFLAGS env var to set the
+ include/lib paths)
--with-external-pa Use external PortAudio development files, not the
one in "third_party" directory. When this option is
set, make sure that PortAudio is accessible to use
@@ -1524,7 +1532,6 @@ Optional Packages:
--with-sdl=DIR Specify alternate libSDL prefix
--with-ffmpeg=DIR Specify alternate FFMPEG prefix
--with-openh264=DIR Specify alternate OpenH264 prefix
- --with-webrtc=DIR Specify alternate WebRtc prefix
--with-ipp=DIR Specify the Intel IPP location
--with-ipp-samples=DIR Specify the Intel IPP samples location
--with-ipp-arch=ARCH Specify the Intel IPP ARCH suffix, e.g. "64" or
@@ -6117,6 +6124,45 @@ fi
+ac_external_webrtc=0
+
+
+# Check whether --with-external-webrtc was given.
+if test "${with_external_webrtc+set}" = set; then :
+ withval=$with_external_webrtc;
+ if test "x$with_external_webrtc" != "xno"; then
+ # Test webrtc installation
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking if external webrtc devkit is installed" >&5
+$as_echo_n "checking if external webrtc devkit is installed... " >&6; }
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <webrtc/modules/audio_processing/aec/aec_core.h>
+ #include <webrtc/modules/audio_processing/aec/include/echo_cancellation.h>
+
+int
+main ()
+{
+WebRtcAec_Create();
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes!!" >&5
+$as_echo "yes!!" >&6; }
+ ac_external_webrtc="1"
+
+else
+ as_fn_error $? "Unable to use external webrtc. If webrtc development files are not available in the default locations, use CFLAGS and LDFLAGS env var to set the include/lib paths" "$LINENO" 5
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ fi
+
+
+fi
+
+
+
ac_pjmedia_resample=libresample
# Check whether --enable-resample was given.
@@ -7401,123 +7447,6 @@ fi
-
-# Check whether --with-webrtc was given.
-if test "${with_webrtc+set}" = set; then :
- withval=$with_webrtc;
-else
- with_webrtc=no
-
-fi
-
-
-if test "x$ac_cross_compile" != "x" -a "x$with_webrtc" = "xno"; then
- enable_webrtc=no
-fi
-
-
-
-# Check whether --enable-webrtc was given.
-if test "${enable_webrtc+set}" = set; then :
- enableval=$enable_webrtc; if test "$enable_webrtc" = "no"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: Checking if webrtc is disabled...yes" >&5
-$as_echo "Checking if webrtc is disabled...yes" >&6; }
- fi
-else
-
- if test "x$with_webrtc" != "xno" -a "x$with_webrtc" != "x"; then
- WEBRTC_PREFIX=$with_webrtc
- WEBRTC_CFLAGS="-I$WEBRTC_PREFIX/src"
-
- case $target in
- *-apple-darwin_ios*)
- case $ARCH in
- *arm*)
- WEBRTC_CFLAGS="-DPJMEDIA_WEBRTC_AEC_USE_MOBILE=1 $WEBRTC_CFLAGS"
- WEBRTC_LDFLAGS="-L$WEBRTC_PREFIX/src/out_ios/Release-iphoneos"
- WEBRTC_LIBS="-laudio_processing_neon -lcommon_audio_neon"
- ;;
- *)
- ;;
- esac
- ;;
- *mingw* | *cygw* | *win32* | *w32* | *darwin* | *linux* | *android*)
- WEBRTC_LDFLAGS="-L$WEBRTC_PREFIX/src/out/Release"
- WEBRTC_LIBS="-laudio_processing_sse2"
- ;;
- *)
- ;;
- esac
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: Using webrtc prefix... $with_webrtc" >&5
-$as_echo "Using webrtc prefix... $with_webrtc" >&6; }
- else
- WEBRTC_CFLAGS=""
- WEBRTC_LDFLAGS=""
- fi
-
- WEBRTC_LIBS="$WEBRTC_LIBS -laudio_processing -lcommon_audio -lsystem_wrappers"
-
- SAVED_LIBS="$LIBS"
- SAVED_LDFLAGS="$LDFLAGS"
- SAVED_CFLAGS="$CFLAGS"
-
- LIBS="$WEBRTC_LIBS $LIBS"
- LDFLAGS="$WEBRTC_LDFLAGS $LDFLAGS"
- CFLAGS="$WEBRTC_CFLAGS $CFLAGS"
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for WebRtcAec_Process in -laudio_processing" >&5
-$as_echo_n "checking for WebRtcAec_Process in -laudio_processing... " >&6; }
-if ${ac_cv_lib_audio_processing_WebRtcAec_Process+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- ac_check_lib_save_LIBS=$LIBS
-LIBS="-laudio_processing
- $LIBS"
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-
-/* Override any GCC internal prototype to avoid an error.
- Use char because int might match the return type of a GCC
- builtin and then its argument prototype would still apply. */
-#ifdef __cplusplus
-extern "C"
-#endif
-char WebRtcAec_Process ();
-int
-main ()
-{
-return WebRtcAec_Process ();
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
- ac_cv_lib_audio_processing_WebRtcAec_Process=yes
-else
- ac_cv_lib_audio_processing_WebRtcAec_Process=no
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
-LIBS=$ac_check_lib_save_LIBS
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_audio_processing_WebRtcAec_Process" >&5
-$as_echo "$ac_cv_lib_audio_processing_WebRtcAec_Process" >&6; }
-if test "x$ac_cv_lib_audio_processing_WebRtcAec_Process" = xyes; then :
- ac_webrtc_cflags="-DPJMEDIA_HAS_WEBRTC_AEC=1 $WEBRTC_CFLAGS"
- ac_webrtc_ldflags="$WEBRTC_LDFLAGS $WEBRTC_LIBS"
-
-else
- LIBS="$SAVED_LIBS"
- LDFLAGS="$SAVED_LDFLAGS"
- CFLAGS="$SAVED_CFLAGS"
-
-fi
-
-
-fi
-
-
# Check whether --enable-ipp was given.
if test "${enable_ipp+set}" = set; then :
enableval=$enable_ipp;
@@ -8473,6 +8402,67 @@ fi
+
+
+# Check whether --enable-libwebrtc was given.
+if test "${enable_libwebrtc+set}" = set; then :
+ enableval=$enable_libwebrtc; if test "$enable_libwebrtc" = "no"; then
+ ac_no_webrtc=1
+ $as_echo "#define PJMEDIA_HAS_LIBWEBRTC 0" >>confdefs.h
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: Checking if libwebrtc is disabled...yes" >&5
+$as_echo "Checking if libwebrtc is disabled...yes" >&6; }
+ fi
+else
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: Checking if libwebrtc is disabled...no" >&5
+$as_echo "Checking if libwebrtc is disabled...no" >&6; }
+ case $target in
+ *-apple-darwin_ios*)
+ case $target in
+ *arm*)
+ ac_webrtc_instset=neon
+ ;;
+ *)
+ ac_webrtc_instset=sse2
+ ;;
+ esac
+ ;;
+ *android*)
+ case $TARGET_ABI in
+ armeabi-v7a)
+ ac_webrtc_instset=neon
+ ac_webrtc_cflags="-mfloat-abi=softfp -mfpu=neon"
+ ;;
+ armeabi)
+ ac_webrtc_instset=neon
+ ac_webrtc_cflags="-mfloat-abi=softfp -mfpu=neon -march=armv7"
+ ;;
+ arm64*)
+ ac_webrtc_instset=neon
+ ac_webrtc_cflags="-DWEBRTC_ARCH_ARM64"
+ ;;
+ mips*)
+ ac_webrtc_instset=mips
+ ;;
+ *)
+ ac_webrtc_instset=sse2
+ ;;
+ esac
+ ;;
+ *mingw* | *cygw* | *win32* | *w32* | *darwin* | *linux*)
+ ac_webrtc_instset=sse2
+ ;;
+ *)
+ ;;
+ esac
+
+fi
+
+
+
+
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if select() needs correct nfds" >&5
$as_echo_n "checking if select() needs correct nfds... " >&6; }
case $target in
diff --git a/aconfigure.ac b/aconfigure.ac
index ff868589..06eebd3d 100644
--- a/aconfigure.ac
+++ b/aconfigure.ac
@@ -598,6 +598,28 @@ AC_ARG_WITH(external-yuv,
)
+dnl # Use external webrtc installation
+AC_SUBST(ac_external_webrtc,0)
+AC_ARG_WITH(external-webrtc,
+ AS_HELP_STRING([--with-external-webrtc],
+ [Use external webrtc development files, not the one in "third_party" directory. When this option is set, make sure that webrtc is accessible to use (hint: use CFLAGS and LDFLAGS env var to set the include/lib paths)]),
+ [
+ if test "x$with_external_webrtc" != "xno"; then
+ # Test webrtc installation
+ AC_MSG_CHECKING([if external webrtc devkit is installed])
+ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include <webrtc/modules/audio_processing/aec/aec_core.h>
+ #include <webrtc/modules/audio_processing/aec/include/echo_cancellation.h>
+ ]],
+ [WebRtcAec_Create();])],
+ [AC_MSG_RESULT(yes!!)
+ ac_external_webrtc="1"
+ ],
+ [AC_MSG_ERROR([Unable to use external webrtc. If webrtc development files are not available in the default locations, use CFLAGS and LDFLAGS env var to set the include/lib paths])])
+ fi
+ ]
+ )
+
+
dnl # Resample implementation
AC_SUBST(ac_pjmedia_resample,libresample)
AC_ARG_ENABLE(resample,
@@ -1243,82 +1265,6 @@ AC_ARG_ENABLE(openh264,
])
-dnl # WebRtc alt prefix
-AC_ARG_WITH(webrtc,
- AS_HELP_STRING([--with-webrtc=DIR],
- [Specify alternate WebRtc prefix]),
- [],
- [with_webrtc=no]
- )
-
-dnl # Do not use default webrtc installation if we are cross-compiling
-if test "x$ac_cross_compile" != "x" -a "x$with_webrtc" = "xno"; then
- enable_webrtc=no
-fi
-
-dnl # WebRtc
-AC_SUBST(ac_webrtc_cflags)
-AC_SUBST(ac_webrtc_ldflags)
-AC_ARG_ENABLE(webrtc,
- AS_HELP_STRING([--disable-webrtc],
- [Exclude webrtc in the build]),
- [if test "$enable_webrtc" = "no"; then
- AC_MSG_RESULT([Checking if webrtc is disabled...yes])
- fi],
- [
- if test "x$with_webrtc" != "xno" -a "x$with_webrtc" != "x"; then
- WEBRTC_PREFIX=$with_webrtc
- WEBRTC_CFLAGS="-I$WEBRTC_PREFIX/src"
-
- case $target in
- *-apple-darwin_ios*)
- case $ARCH in
- *arm*)
- WEBRTC_CFLAGS="-DPJMEDIA_WEBRTC_AEC_USE_MOBILE=1 $WEBRTC_CFLAGS"
- WEBRTC_LDFLAGS="-L$WEBRTC_PREFIX/src/out_ios/Release-iphoneos"
- WEBRTC_LIBS="-laudio_processing_neon -lcommon_audio_neon"
- ;;
- *)
- ;;
- esac
- ;;
- *mingw* | *cygw* | *win32* | *w32* | *darwin* | *linux* | *android*)
- WEBRTC_LDFLAGS="-L$WEBRTC_PREFIX/src/out/Release"
- WEBRTC_LIBS="-laudio_processing_sse2"
- ;;
- *)
- ;;
- esac
-
- AC_MSG_RESULT([Using webrtc prefix... $with_webrtc])
- else
- WEBRTC_CFLAGS=""
- WEBRTC_LDFLAGS=""
- fi
-
- WEBRTC_LIBS="$WEBRTC_LIBS -laudio_processing -lcommon_audio -lsystem_wrappers"
-
- SAVED_LIBS="$LIBS"
- SAVED_LDFLAGS="$LDFLAGS"
- SAVED_CFLAGS="$CFLAGS"
-
- LIBS="$WEBRTC_LIBS $LIBS"
- LDFLAGS="$WEBRTC_LDFLAGS $LDFLAGS"
- CFLAGS="$WEBRTC_CFLAGS $CFLAGS"
-
- AC_CHECK_LIB(audio_processing,
- WebRtcAec_Process,
- [ ac_webrtc_cflags="-DPJMEDIA_HAS_WEBRTC_AEC=1 $WEBRTC_CFLAGS"
- ac_webrtc_ldflags="$WEBRTC_LDFLAGS $WEBRTC_LIBS"
- ],
- [ LIBS="$SAVED_LIBS"
- LDFLAGS="$SAVED_LDFLAGS"
- CFLAGS="$SAVED_CFLAGS"
- ],
- []
- )
- ])
-
dnl ########################################################
dnl # Intel IPP support
dnl #
@@ -1819,6 +1765,63 @@ AC_ARG_ENABLE(libyuv,
AC_MSG_RESULT([Checking if libyuv is disabled...no]))
+dnl # Include webrtc
+AC_SUBST(ac_no_webrtc)
+AC_SUBST(ac_webrtc_instset)
+AC_SUBST(ac_webrtc_cflags)
+AC_SUBST(ac_webrtc_ldflags)
+AC_ARG_ENABLE(libwebrtc,
+ AS_HELP_STRING([--disable-libwebrtc],
+ [Exclude libwebrtc in the build]),
+ [if test "$enable_libwebrtc" = "no"; then
+ [ac_no_webrtc=1]
+ AC_DEFINE(PJMEDIA_HAS_LIBWEBRTC,0)
+ AC_MSG_RESULT([Checking if libwebrtc is disabled...yes])
+ fi],
+ [
+ AC_MSG_RESULT([Checking if libwebrtc is disabled...no])
+ case $target in
+ *-apple-darwin_ios*)
+ case $target in
+ *arm*)
+ ac_webrtc_instset=neon
+ ;;
+ *)
+ ac_webrtc_instset=sse2
+ ;;
+ esac
+ ;;
+ *android*)
+ case $TARGET_ABI in
+ armeabi-v7a)
+ ac_webrtc_instset=neon
+ ac_webrtc_cflags="-mfloat-abi=softfp -mfpu=neon"
+ ;;
+ armeabi)
+ ac_webrtc_instset=neon
+ ac_webrtc_cflags="-mfloat-abi=softfp -mfpu=neon -march=armv7"
+ ;;
+ arm64*)
+ ac_webrtc_instset=neon
+ ac_webrtc_cflags="-DWEBRTC_ARCH_ARM64"
+ ;;
+ mips*)
+ ac_webrtc_instset=mips
+ ;;
+ *)
+ ac_webrtc_instset=sse2
+ ;;
+ esac
+ ;;
+ *mingw* | *cygw* | *win32* | *w32* | *darwin* | *linux*)
+ ac_webrtc_instset=sse2
+ ;;
+ *)
+ ;;
+ esac
+ ])
+
+
dnl ##########################################
dnl #
dnl # MANUAL CONFIG
diff --git a/build.mak.in b/build.mak.in
index 802211ce..eb286633 100644
--- a/build.mak.in
+++ b/build.mak.in
@@ -135,6 +135,20 @@ endif
endif
endif
+ifneq (@ac_no_webrtc@,1)
+ifeq (@ac_external_webrtc@,1)
+APP_THIRD_PARTY_EXT += -lwebrtc
+else
+APP_THIRD_PARTY_LIB_FILES += $(PJ_DIR)/third_party/lib/libwebrtc-$(LIB_SUFFIX)
+ifeq ($(PJ_SHARED_LIBRARIES),)
+APP_THIRD_PARTY_LIBS += -lwebrtc-$(TARGET_NAME)
+else
+APP_THIRD_PARTY_LIBS += -lwebrtc
+APP_THIRD_PARTY_LIB_FILES += $(PJ_DIR)/third_party/lib/libwebrtc.$(SHLIB_SUFFIX).$(PJ_VERSION_MAJOR) $(PJ_DIR)/third_party/lib/libwebrtc.$(SHLIB_SUFFIX)
+endif
+endif
+endif
+
# Additional flags
@ac_build_mak_vars@
diff --git a/configure-android b/configure-android
index 477b4c3e..b7397bf2 100755
--- a/configure-android
+++ b/configure-android
@@ -149,6 +149,7 @@ else
exit 1
fi
+ export TARGET_ABI="${TARGET_ABI}"
export CC="${ANDROID_TC}/bin/${TARGET_HOST}-gcc"
export CXX="${ANDROID_TC}/bin/${TARGET_HOST}-g++"
export AR="${ANDROID_TC}/bin/${TARGET_HOST}-ar"
diff --git a/pjmedia/build/os-auto.mak.in b/pjmedia/build/os-auto.mak.in
index 63e95a3c..4754030b 100644
--- a/pjmedia/build/os-auto.mak.in
+++ b/pjmedia/build/os-auto.mak.in
@@ -32,18 +32,14 @@ ANDROID_CFLAGS = @ac_android_cflags@
OPENH264_CFLAGS = @ac_openh264_cflags@
OPENH264_LDFLAGS = @ac_openh264_ldflags@
-# WebRtc
-WEBRTC_CFLAGS = @ac_webrtc_cflags@
-WEBRTC_LDFLAGS = @ac_webrtc_ldflags@
-
# PJMEDIA features exclusion
export CFLAGS += @ac_no_small_filter@ @ac_no_large_filter@ @ac_no_speex_aec@ \
$(SDL_CFLAGS) $(FFMPEG_CFLAGS) $(V4L2_CFLAGS) $(QT_CFLAGS) \
$(DARWIN_CFLAGS) $(ANDROID_CFLAGS) \
- $(OPENH264_CFLAGS) $(WEBRTC_CFLAGS)
+ $(OPENH264_CFLAGS)
export LDFLAGS += $(SDL_LDFLAGS) $(FFMPEG_LDFLAGS) $(V4L2_LDFLAGS) \
- $(OPENH264_LDFLAGS) $(WEBRTC_LDFLAGS)
+ $(OPENH264_LDFLAGS)
# Define the desired sound device backend
# Valid values are:
@@ -203,6 +199,22 @@ export CFLAGS += -I$(THIRD_PARTY)/yuv/include
endif
endif
+#
+# libwebrtc
+#
+ifeq (@ac_no_webrtc@,1)
+export CFLAGS += -DPJMEDIA_HAS_WEBRTC_AEC=0
+else
+export CFLAGS += -DPJMEDIA_HAS_WEBRTC_AEC=1
+ifneq ($(findstring arm,$(@ac_webrtc_instset@)),)
+export CFLAGS += -DPJMEDIA_WEBRTC_AEC_USE_MOBILE=1
+endif
+
+ifeq (@ac_external_webrtc@,0)
+export CFLAGS += -I$(THIRD_PARTY)/webrtc/src
+endif
+endif
+
#
# MacOSX specific
diff --git a/third_party/build/os-auto.mak.in b/third_party/build/os-auto.mak.in
index 66653fa1..5eac2fd2 100644
--- a/third_party/build/os-auto.mak.in
+++ b/third_party/build/os-auto.mak.in
@@ -64,3 +64,43 @@ else
DIRS += yuv
endif
endif
+
+ifneq (@ac_no_webrtc@,1)
+ifeq (@ac_external_webrtc@,1)
+# External webrtc
+else
+DIRS += webrtc
+WEBRTC_OTHER_CFLAGS = -fexceptions -DWEBRTC_POSIX=1 @ac_webrtc_cflags@
+ifneq ($(findstring sse2,@ac_webrtc_instset@),)
+ WEBRTC_SRC = \
+ modules/audio_processing/aec/aec_core_sse2.o \
+ modules/audio_processing/aec/aec_rdft_sse2.o \
+ modules/audio_processing/aecm/aecm_core_c.o \
+ modules/audio_processing/ns/nsx_core_c.o \
+ system_wrappers/source/cpu_features.o
+else ifneq ($(findstring neon,@ac_webrtc_instset@),)
+ WEBRTC_SRC = \
+ modules/audio_processing/aec/aec_core_neon.o \
+ modules/audio_processing/aec/aec_rdft_neon.o \
+ modules/audio_processing/aecm/aecm_core_c.o \
+ modules/audio_processing/aecm/aecm_core_neon.o \
+ modules/audio_processing/ns/nsx_core_c.o \
+ modules/audio_processing/ns/nsx_core_neon.o \
+ common_audio/signal_processing/cross_correlation_neon.o \
+ common_audio/signal_processing/downsample_fast_neon.o \
+ common_audio/signal_processing/min_max_operations_neon.o
+ WEBRTC_OTHER_CFLAGS += -DWEBRTC_HAS_NEON
+else ifneq ($(findstring mips,@ac_webrtc_instset@),)
+ WEBRTC_SRC = \
+ modules/audio_processing/aec/aec_core_mips.o \
+ modules/audio_processing/aec/aec_rdft_mips.o \
+ modules/audio_processing/aecm/aecm_core_mips.o \
+ modules/audio_processing/ns/nsx_core_mips.o \
+ common_audio/signal_processing/cross_correlation_mips.o \
+ common_audio/signal_processing/downsample_fast_mips.o \
+ common_audio/signal_processing/min_max_operations_mips.o
+
+ WEBRTC_OTHER_CFLAGS += -DMIPS_FPU_LE
+endif
+endif
+endif
diff --git a/third_party/build/webrtc/Makefile b/third_party/build/webrtc/Makefile
new file mode 100644
index 00000000..3e654379
--- /dev/null
+++ b/third_party/build/webrtc/Makefile
@@ -0,0 +1,100 @@
+include ../../../build.mak
+include ../../../build/common.mak
+include ../os-$(OS_NAME).mak
+
+export LIBDIR := ../../lib
+
+RULES_MAK := $(PJDIR)/build/rules.mak
+
+export WEBRTC_LIB := libwebrtc-$(TARGET_NAME)$(LIBEXT)
+
+ifeq ($(PJ_SHARED_LIBRARIES),)
+else
+export WEBRTC_SONAME := libwebrtc.$(SHLIB_SUFFIX)
+export WEBRTC_SHLIB := $(WEBRTC_SONAME).$(PJ_VERSION_MAJOR)
+endif
+
+###############################################################################
+# Gather all flags.
+#
+export _CFLAGS := $(CC_CFLAGS) $(OS_CFLAGS) $(HOST_CFLAGS) $(M_CFLAGS) \
+ $(CFLAGS) $(CC_INC). $(CC_INC)../../webrtc/src \
+ $(CC_INC)../../../pjlib/include
+export _CXXFLAGS:= $(_CFLAGS) $(CC_CXXFLAGS) $(OS_CXXFLAGS) $(M_CXXFLAGS) \
+ $(HOST_CXXFLAGS) $(CXXFLAGS)
+export _LDFLAGS := $(CC_LDFLAGS) $(OS_LDFLAGS) $(M_LDFLAGS) $(HOST_LDFLAGS) \
+ $(LDFLAGS)
+
+export WEBRTC_SRCDIR = ../../webrtc/src/webrtc/
+export WEBRTC_OBJS = \
+ modules/audio_processing/aec/aec_core.o \
+ modules/audio_processing/aec/aec_rdft.o \
+ modules/audio_processing/aec/aec_resampler.o \
+ modules/audio_processing/aec/echo_cancellation.o \
+ modules/audio_processing/aecm/aecm_core.o \
+ modules/audio_processing/aecm/echo_control_mobile.o \
+ modules/audio_processing/ns/noise_suppression.o \
+ modules/audio_processing/ns/noise_suppression_x.o \
+ modules/audio_processing/ns/ns_core.o \
+ modules/audio_processing/ns/nsx_core.o \
+ modules/audio_processing/utility/delay_estimator_wrapper.o \
+ modules/audio_processing/utility/delay_estimator.o \
+ common_audio/fft4g.o \
+ common_audio/ring_buffer.o \
+ common_audio/signal_processing/complex_bit_reverse.o \
+ common_audio/signal_processing/complex_fft.o \
+ common_audio/signal_processing/copy_set_operations.o \
+ common_audio/signal_processing/cross_correlation.o \
+ common_audio/signal_processing/division_operations.o \
+ common_audio/signal_processing/downsample_fast.o \
+ common_audio/signal_processing/energy.o \
+ common_audio/signal_processing/get_scaling_square.o \
+ common_audio/signal_processing/min_max_operations.o \
+ common_audio/signal_processing/randomization_functions.o \
+ common_audio/signal_processing/real_fft.o \
+ common_audio/signal_processing/spl_init.o \
+ common_audio/signal_processing/spl_sqrt.o \
+ common_audio/signal_processing/spl_sqrt_floor.o \
+ common_audio/signal_processing/vector_scaling_operations.o \
+ $(WEBRTC_SRC)
+
+
+export WEBRTC_CFLAGS = $(_CFLAGS) $(WEBRTC_OTHER_CFLAGS)
+export WEBRTC_CXXFLAGS = $(WEBRTC_CFLAGS)
+
+
+export CC_OUT CC AR RANLIB HOST_MV HOST_RM HOST_RMDIR HOST_MKDIR OBJEXT LD LDOUT
+###############################################################################
+# Main entry
+#
+# $(TARGET) is defined in os-$(OS_NAME).mak file in current directory.
+#
+TARGETS := $(WEBRTC_LIB) $(WEBRTC_SONAME)
+
+all: $(TARGETS)
+
+doc:
+ cd .. && doxygen docs/doxygen.cfg
+
+dep: depend
+distclean: realclean
+
+.PHONY: all dep depend clean realclean distclean
+.PHONY: $(TARGETS)
+.PHONY: $(WEBRTC_LIB) $(WEBRTC_SONAME)
+
+libwebrtc: $(WEBRTC_LIB)
+$(WEBRTC_SONAME): $(WEBRTC_LIB)
+$(WEBRTC_LIB) $(WEBRTC_SONAME):
+ $(MAKE) -f $(RULES_MAK) APP=WEBRTC app=libwebrtc $(subst /,$(HOST_PSEP),$(LIBDIR)/$@)
+
+clean print_lib:
+ $(MAKE) -f $(RULES_MAK) APP=WEBRTC app=libwebrtc $@
+
+realclean:
+ $(subst @@,$(subst /,$(HOST_PSEP),.webrtc-$(TARGET_NAME).depend),$(HOST_RMR))
+
+ $(MAKE) -f $(RULES_MAK) APP=WEBRTC app=libwebrtc $@
+
+depend:
+ $(MAKE) -f $(RULES_MAK) APP=WEBRTC app=libwebrtc $@
diff --git a/third_party/build/webrtc/notes.txt b/third_party/build/webrtc/notes.txt
new file mode 100644
index 00000000..dc47aeba
--- /dev/null
+++ b/third_party/build/webrtc/notes.txt
@@ -0,0 +1,2 @@
+Notes:
+* Source code for webrtc from https://chromium.googlesource.com/external/webrtc circa Oct 2015.
diff --git a/third_party/webrtc/src/webrtc/common_audio/fft4g.c b/third_party/webrtc/src/webrtc/common_audio/fft4g.c
new file mode 100644
index 00000000..9cf7b9f6
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/fft4g.c
@@ -0,0 +1,1332 @@
+/*
+ * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
+ * Copyright Takuya OOURA, 1996-2001
+ *
+ * You may use, copy, modify and distribute this code for any purpose (include
+ * commercial use) and without fee. Please refer to this package when you modify
+ * this code.
+ *
+ * Changes:
+ * Trivial type modifications by the WebRTC authors.
+ */
+
+/*
+Fast Fourier/Cosine/Sine Transform
+ dimension :one
+ data length :power of 2
+ decimation :frequency
+ radix :4, 2
+ data :inplace
+ table :use
+functions
+ cdft: Complex Discrete Fourier Transform
+ rdft: Real Discrete Fourier Transform
+ ddct: Discrete Cosine Transform
+ ddst: Discrete Sine Transform
+ dfct: Cosine Transform of RDFT (Real Symmetric DFT)
+ dfst: Sine Transform of RDFT (Real Anti-symmetric DFT)
+function prototypes
+ void cdft(int, int, float *, int *, float *);
+ void rdft(size_t, int, float *, size_t *, float *);
+ void ddct(int, int, float *, int *, float *);
+ void ddst(int, int, float *, int *, float *);
+ void dfct(int, float *, float *, int *, float *);
+ void dfst(int, float *, float *, int *, float *);
+
+
+-------- Complex DFT (Discrete Fourier Transform) --------
+ [definition]
+ <case1>
+ X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k<n
+ <case2>
+ X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k<n
+ (notes: sum_j=0^n-1 is a summation from j=0 to n-1)
+ [usage]
+ <case1>
+ ip[0] = 0; // first time only
+ cdft(2*n, 1, a, ip, w);
+ <case2>
+ ip[0] = 0; // first time only
+ cdft(2*n, -1, a, ip, w);
+ [parameters]
+ 2*n :data length (int)
+ n >= 1, n = power of 2
+ a[0...2*n-1] :input/output data (float *)
+ input data
+ a[2*j] = Re(x[j]),
+ a[2*j+1] = Im(x[j]), 0<=j<n
+ output data
+ a[2*k] = Re(X[k]),
+ a[2*k+1] = Im(X[k]), 0<=k<n
+ ip[0...*] :work area for bit reversal (int *)
+ length of ip >= 2+sqrt(n)
+ strictly,
+ length of ip >=
+ 2+(1<<(int)(log(n+0.5)/log(2))/2).
+ ip[0],ip[1] are pointers of the cos/sin table.
+ w[0...n/2-1] :cos/sin table (float *)
+ w[],ip[] are initialized if ip[0] == 0.
+ [remark]
+ Inverse of
+ cdft(2*n, -1, a, ip, w);
+ is
+ cdft(2*n, 1, a, ip, w);
+ for (j = 0; j <= 2 * n - 1; j++) {
+ a[j] *= 1.0 / n;
+ }
+ .
+
+
+-------- Real DFT / Inverse of Real DFT --------
+ [definition]
+ <case1> RDFT
+ R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2
+ I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2
+ <case2> IRDFT (excluding scale)
+ a[k] = (R[0] + R[n/2]*cos(pi*k))/2 +
+ sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) +
+ sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n
+ [usage]
+ <case1>
+ ip[0] = 0; // first time only
+ rdft(n, 1, a, ip, w);
+ <case2>
+ ip[0] = 0; // first time only
+ rdft(n, -1, a, ip, w);
+ [parameters]
+ n :data length (size_t)
+ n >= 2, n = power of 2
+ a[0...n-1] :input/output data (float *)
+ <case1>
+ output data
+ a[2*k] = R[k], 0<=k<n/2
+ a[2*k+1] = I[k], 0<k<n/2
+ a[1] = R[n/2]
+ <case2>
+ input data
+ a[2*j] = R[j], 0<=j<n/2
+ a[2*j+1] = I[j], 0<j<n/2
+ a[1] = R[n/2]
+ ip[0...*] :work area for bit reversal (size_t *)
+ length of ip >= 2+sqrt(n/2)
+ strictly,
+ length of ip >=
+ 2+(1<<(int)(log(n/2+0.5)/log(2))/2).
+ ip[0],ip[1] are pointers of the cos/sin table.
+ w[0...n/2-1] :cos/sin table (float *)
+ w[],ip[] are initialized if ip[0] == 0.
+ [remark]
+ Inverse of
+ rdft(n, 1, a, ip, w);
+ is
+ rdft(n, -1, a, ip, w);
+ for (j = 0; j <= n - 1; j++) {
+ a[j] *= 2.0 / n;
+ }
+ .
+
+
+-------- DCT (Discrete Cosine Transform) / Inverse of DCT --------
+ [definition]
+ <case1> IDCT (excluding scale)
+ C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k<n
+ <case2> DCT
+ C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k<n
+ [usage]
+ <case1>
+ ip[0] = 0; // first time only
+ ddct(n, 1, a, ip, w);
+ <case2>
+ ip[0] = 0; // first time only
+ ddct(n, -1, a, ip, w);
+ [parameters]
+ n :data length (int)
+ n >= 2, n = power of 2
+ a[0...n-1] :input/output data (float *)
+ output data
+ a[k] = C[k], 0<=k<n
+ ip[0...*] :work area for bit reversal (int *)
+ length of ip >= 2+sqrt(n/2)
+ strictly,
+ length of ip >=
+ 2+(1<<(int)(log(n/2+0.5)/log(2))/2).
+ ip[0],ip[1] are pointers of the cos/sin table.
+ w[0...n*5/4-1] :cos/sin table (float *)
+ w[],ip[] are initialized if ip[0] == 0.
+ [remark]
+ Inverse of
+ ddct(n, -1, a, ip, w);
+ is
+ a[0] *= 0.5;
+ ddct(n, 1, a, ip, w);
+ for (j = 0; j <= n - 1; j++) {
+ a[j] *= 2.0 / n;
+ }
+ .
+
+
+-------- DST (Discrete Sine Transform) / Inverse of DST --------
+ [definition]
+ <case1> IDST (excluding scale)
+ S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k<n
+ <case2> DST
+ S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0<k<=n
+ [usage]
+ <case1>
+ ip[0] = 0; // first time only
+ ddst(n, 1, a, ip, w);
+ <case2>
+ ip[0] = 0; // first time only
+ ddst(n, -1, a, ip, w);
+ [parameters]
+ n :data length (int)
+ n >= 2, n = power of 2
+ a[0...n-1] :input/output data (float *)
+ <case1>
+ input data
+ a[j] = A[j], 0<j<n
+ a[0] = A[n]
+ output data
+ a[k] = S[k], 0<=k<n
+ <case2>
+ output data
+ a[k] = S[k], 0<k<n
+ a[0] = S[n]
+ ip[0...*] :work area for bit reversal (int *)
+ length of ip >= 2+sqrt(n/2)
+ strictly,
+ length of ip >=
+ 2+(1<<(int)(log(n/2+0.5)/log(2))/2).
+ ip[0],ip[1] are pointers of the cos/sin table.
+ w[0...n*5/4-1] :cos/sin table (float *)
+ w[],ip[] are initialized if ip[0] == 0.
+ [remark]
+ Inverse of
+ ddst(n, -1, a, ip, w);
+ is
+ a[0] *= 0.5;
+ ddst(n, 1, a, ip, w);
+ for (j = 0; j <= n - 1; j++) {
+ a[j] *= 2.0 / n;
+ }
+ .
+
+
+-------- Cosine Transform of RDFT (Real Symmetric DFT) --------
+ [definition]
+ C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n
+ [usage]
+ ip[0] = 0; // first time only
+ dfct(n, a, t, ip, w);
+ [parameters]
+ n :data length - 1 (int)
+ n >= 2, n = power of 2
+ a[0...n] :input/output data (float *)
+ output data
+ a[k] = C[k], 0<=k<=n
+ t[0...n/2] :work area (float *)
+ ip[0...*] :work area for bit reversal (int *)
+ length of ip >= 2+sqrt(n/4)
+ strictly,
+ length of ip >=
+ 2+(1<<(int)(log(n/4+0.5)/log(2))/2).
+ ip[0],ip[1] are pointers of the cos/sin table.
+ w[0...n*5/8-1] :cos/sin table (float *)
+ w[],ip[] are initialized if ip[0] == 0.
+ [remark]
+ Inverse of
+ a[0] *= 0.5;
+ a[n] *= 0.5;
+ dfct(n, a, t, ip, w);
+ is
+ a[0] *= 0.5;
+ a[n] *= 0.5;
+ dfct(n, a, t, ip, w);
+ for (j = 0; j <= n; j++) {
+ a[j] *= 2.0 / n;
+ }
+ .
+
+
+-------- Sine Transform of RDFT (Real Anti-symmetric DFT) --------
+ [definition]
+ S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0<k<n
+ [usage]
+ ip[0] = 0; // first time only
+ dfst(n, a, t, ip, w);
+ [parameters]
+ n :data length + 1 (int)
+ n >= 2, n = power of 2
+ a[0...n-1] :input/output data (float *)
+ output data
+ a[k] = S[k], 0<k<n
+ (a[0] is used for work area)
+ t[0...n/2-1] :work area (float *)
+ ip[0...*] :work area for bit reversal (int *)
+ length of ip >= 2+sqrt(n/4)
+ strictly,
+ length of ip >=
+ 2+(1<<(int)(log(n/4+0.5)/log(2))/2).
+ ip[0],ip[1] are pointers of the cos/sin table.
+ w[0...n*5/8-1] :cos/sin table (float *)
+ w[],ip[] are initialized if ip[0] == 0.
+ [remark]
+ Inverse of
+ dfst(n, a, t, ip, w);
+ is
+ dfst(n, a, t, ip, w);
+ for (j = 1; j <= n - 1; j++) {
+ a[j] *= 2.0 / n;
+ }
+ .
+
+
+Appendix :
+ The cos/sin table is recalculated when the larger table required.
+ w[] and ip[] are compatible with all routines.
+*/
+
+#include <stddef.h>
+
+static void makewt(size_t nw, size_t *ip, float *w);
+static void makect(size_t nc, size_t *ip, float *c);
+static void bitrv2(size_t n, size_t *ip, float *a);
+#if 0 // Not used.
+static void bitrv2conj(int n, int *ip, float *a);
+#endif
+static void cftfsub(size_t n, float *a, float *w);
+static void cftbsub(size_t n, float *a, float *w);
+static void cft1st(size_t n, float *a, float *w);
+static void cftmdl(size_t n, size_t l, float *a, float *w);
+static void rftfsub(size_t n, float *a, size_t nc, float *c);
+static void rftbsub(size_t n, float *a, size_t nc, float *c);
+#if 0 // Not used.
+static void dctsub(int n, float *a, int nc, float *c)
+static void dstsub(int n, float *a, int nc, float *c)
+#endif
+
+
+#if 0 // Not used.
+void WebRtc_cdft(int n, int isgn, float *a, int *ip, float *w)
+{
+ if (n > (ip[0] << 2)) {
+ makewt(n >> 2, ip, w);
+ }
+ if (n > 4) {
+ if (isgn >= 0) {
+ bitrv2(n, ip + 2, a);
+ cftfsub(n, a, w);
+ } else {
+ bitrv2conj(n, ip + 2, a);
+ cftbsub(n, a, w);
+ }
+ } else if (n == 4) {
+ cftfsub(n, a, w);
+ }
+}
+#endif
+
+
+void WebRtc_rdft(size_t n, int isgn, float *a, size_t *ip, float *w)
+{
+ size_t nw, nc;
+ float xi;
+
+ nw = ip[0];
+ if (n > (nw << 2)) {
+ nw = n >> 2;
+ makewt(nw, ip, w);
+ }
+ nc = ip[1];
+ if (n > (nc << 2)) {
+ nc = n >> 2;
+ makect(nc, ip, w + nw);
+ }
+ if (isgn >= 0) {
+ if (n > 4) {
+ bitrv2(n, ip + 2, a);
+ cftfsub(n, a, w);
+ rftfsub(n, a, nc, w + nw);
+ } else if (n == 4) {
+ cftfsub(n, a, w);
+ }
+ xi = a[0] - a[1];
+ a[0] += a[1];
+ a[1] = xi;
+ } else {
+ a[1] = 0.5f * (a[0] - a[1]);
+ a[0] -= a[1];
+ if (n > 4) {
+ rftbsub(n, a, nc, w + nw);
+ bitrv2(n, ip + 2, a);
+ cftbsub(n, a, w);
+ } else if (n == 4) {
+ cftfsub(n, a, w);
+ }
+ }
+}
+
+#if 0 // Not used.
+static void ddct(int n, int isgn, float *a, int *ip, float *w)
+{
+ int j, nw, nc;
+ float xr;
+
+ nw = ip[0];
+ if (n > (nw << 2)) {
+ nw = n >> 2;
+ makewt(nw, ip, w);
+ }
+ nc = ip[1];
+ if (n > nc) {
+ nc = n;
+ makect(nc, ip, w + nw);
+ }
+ if (isgn < 0) {
+ xr = a[n - 1];
+ for (j = n - 2; j >= 2; j -= 2) {
+ a[j + 1] = a[j] - a[j - 1];
+ a[j] += a[j - 1];
+ }
+ a[1] = a[0] - xr;
+ a[0] += xr;
+ if (n > 4) {
+ rftbsub(n, a, nc, w + nw);
+ bitrv2(n, ip + 2, a);
+ cftbsub(n, a, w);
+ } else if (n == 4) {
+ cftfsub(n, a, w);
+ }
+ }
+ dctsub(n, a, nc, w + nw);
+ if (isgn >= 0) {
+ if (n > 4) {
+ bitrv2(n, ip + 2, a);
+ cftfsub(n, a, w);
+ rftfsub(n, a, nc, w + nw);
+ } else if (n == 4) {
+ cftfsub(n, a, w);
+ }
+ xr = a[0] - a[1];
+ a[0] += a[1];
+ for (j = 2; j < n; j += 2) {
+ a[j - 1] = a[j] - a[j + 1];
+ a[j] += a[j + 1];
+ }
+ a[n - 1] = xr;
+ }
+}
+
+
+static void ddst(int n, int isgn, float *a, int *ip, float *w)
+{
+ int j, nw, nc;
+ float xr;
+
+ nw = ip[0];
+ if (n > (nw << 2)) {
+ nw = n >> 2;
+ makewt(nw, ip, w);
+ }
+ nc = ip[1];
+ if (n > nc) {
+ nc = n;
+ makect(nc, ip, w + nw);
+ }
+ if (isgn < 0) {
+ xr = a[n - 1];
+ for (j = n - 2; j >= 2; j -= 2) {
+ a[j + 1] = -a[j] - a[j - 1];
+ a[j] -= a[j - 1];
+ }
+ a[1] = a[0] + xr;
+ a[0] -= xr;
+ if (n > 4) {
+ rftbsub(n, a, nc, w + nw);
+ bitrv2(n, ip + 2, a);
+ cftbsub(n, a, w);
+ } else if (n == 4) {
+ cftfsub(n, a, w);
+ }
+ }
+ dstsub(n, a, nc, w + nw);
+ if (isgn >= 0) {
+ if (n > 4) {
+ bitrv2(n, ip + 2, a);
+ cftfsub(n, a, w);
+ rftfsub(n, a, nc, w + nw);
+ } else if (n == 4) {
+ cftfsub(n, a, w);
+ }
+ xr = a[0] - a[1];
+ a[0] += a[1];
+ for (j = 2; j < n; j += 2) {
+ a[j - 1] = -a[j] - a[j + 1];
+ a[j] -= a[j + 1];
+ }
+ a[n - 1] = -xr;
+ }
+}
+
+
+static void dfct(int n, float *a, float *t, int *ip, float *w)
+{
+ int j, k, l, m, mh, nw, nc;
+ float xr, xi, yr, yi;
+
+ nw = ip[0];
+ if (n > (nw << 3)) {
+ nw = n >> 3;
+ makewt(nw, ip, w);
+ }
+ nc = ip[1];
+ if (n > (nc << 1)) {
+ nc = n >> 1;
+ makect(nc, ip, w + nw);
+ }
+ m = n >> 1;
+ yi = a[m];
+ xi = a[0] + a[n];
+ a[0] -= a[n];
+ t[0] = xi - yi;
+ t[m] = xi + yi;
+ if (n > 2) {
+ mh = m >> 1;
+ for (j = 1; j < mh; j++) {
+ k = m - j;
+ xr = a[j] - a[n - j];
+ xi = a[j] + a[n - j];
+ yr = a[k] - a[n - k];
+ yi = a[k] + a[n - k];
+ a[j] = xr;
+ a[k] = yr;
+ t[j] = xi - yi;
+ t[k] = xi + yi;
+ }
+ t[mh] = a[mh] + a[n - mh];
+ a[mh] -= a[n - mh];
+ dctsub(m, a, nc, w + nw);
+ if (m > 4) {
+ bitrv2(m, ip + 2, a);
+ cftfsub(m, a, w);
+ rftfsub(m, a, nc, w + nw);
+ } else if (m == 4) {
+ cftfsub(m, a, w);
+ }
+ a[n - 1] = a[0] - a[1];
+ a[1] = a[0] + a[1];
+ for (j = m - 2; j >= 2; j -= 2) {
+ a[2 * j + 1] = a[j] + a[j + 1];
+ a[2 * j - 1] = a[j] - a[j + 1];
+ }
+ l = 2;
+ m = mh;
+ while (m >= 2) {
+ dctsub(m, t, nc, w + nw);
+ if (m > 4) {
+ bitrv2(m, ip + 2, t);
+ cftfsub(m, t, w);
+ rftfsub(m, t, nc, w + nw);
+ } else if (m == 4) {
+ cftfsub(m, t, w);
+ }
+ a[n - l] = t[0] - t[1];
+ a[l] = t[0] + t[1];
+ k = 0;
+ for (j = 2; j < m; j += 2) {
+ k += l << 2;
+ a[k - l] = t[j] - t[j + 1];
+ a[k + l] = t[j] + t[j + 1];
+ }
+ l <<= 1;
+ mh = m >> 1;
+ for (j = 0; j < mh; j++) {
+ k = m - j;
+ t[j] = t[m + k] - t[m + j];
+ t[k] = t[m + k] + t[m + j];
+ }
+ t[mh] = t[m + mh];
+ m = mh;
+ }
+ a[l] = t[0];
+ a[n] = t[2] - t[1];
+ a[0] = t[2] + t[1];
+ } else {
+ a[1] = a[0];
+ a[2] = t[0];
+ a[0] = t[1];
+ }
+}
+
+static void dfst(int n, float *a, float *t, int *ip, float *w)
+{
+ int j, k, l, m, mh, nw, nc;
+ float xr, xi, yr, yi;
+
+ nw = ip[0];
+ if (n > (nw << 3)) {
+ nw = n >> 3;
+ makewt(nw, ip, w);
+ }
+ nc = ip[1];
+ if (n > (nc << 1)) {
+ nc = n >> 1;
+ makect(nc, ip, w + nw);
+ }
+ if (n > 2) {
+ m = n >> 1;
+ mh = m >> 1;
+ for (j = 1; j < mh; j++) {
+ k = m - j;
+ xr = a[j] + a[n - j];
+ xi = a[j] - a[n - j];
+ yr = a[k] + a[n - k];
+ yi = a[k] - a[n - k];
+ a[j] = xr;
+ a[k] = yr;
+ t[j] = xi + yi;
+ t[k] = xi - yi;
+ }
+ t[0] = a[mh] - a[n - mh];
+ a[mh] += a[n - mh];
+ a[0] = a[m];
+ dstsub(m, a, nc, w + nw);
+ if (m > 4) {
+ bitrv2(m, ip + 2, a);
+ cftfsub(m, a, w);
+ rftfsub(m, a, nc, w + nw);
+ } else if (m == 4) {
+ cftfsub(m, a, w);
+ }
+ a[n - 1] = a[1] - a[0];
+ a[1] = a[0] + a[1];
+ for (j = m - 2; j >= 2; j -= 2) {
+ a[2 * j + 1] = a[j] - a[j + 1];
+ a[2 * j - 1] = -a[j] - a[j + 1];
+ }
+ l = 2;
+ m = mh;
+ while (m >= 2) {
+ dstsub(m, t, nc, w + nw);
+ if (m > 4) {
+ bitrv2(m, ip + 2, t);
+ cftfsub(m, t, w);
+ rftfsub(m, t, nc, w + nw);
+ } else if (m == 4) {
+ cftfsub(m, t, w);
+ }
+ a[n - l] = t[1] - t[0];
+ a[l] = t[0] + t[1];
+ k = 0;
+ for (j = 2; j < m; j += 2) {
+ k += l << 2;
+ a[k - l] = -t[j] - t[j + 1];
+ a[k + l] = t[j] - t[j + 1];
+ }
+ l <<= 1;
+ mh = m >> 1;
+ for (j = 1; j < mh; j++) {
+ k = m - j;
+ t[j] = t[m + k] + t[m + j];
+ t[k] = t[m + k] - t[m + j];
+ }
+ t[0] = t[m + mh];
+ m = mh;
+ }
+ a[l] = t[0];
+ }
+ a[0] = 0;
+}
+#endif // Not used.
+
+
+/* -------- initializing routines -------- */
+
+
+#include <math.h>
+
+static void makewt(size_t nw, size_t *ip, float *w)
+{
+ size_t j, nwh;
+ float delta, x, y;
+
+ ip[0] = nw;
+ ip[1] = 1;
+ if (nw > 2) {
+ nwh = nw >> 1;
+ delta = atanf(1.0f) / nwh;
+ w[0] = 1;
+ w[1] = 0;
+ w[nwh] = (float)cos(delta * nwh);
+ w[nwh + 1] = w[nwh];
+ if (nwh > 2) {
+ for (j = 2; j < nwh; j += 2) {
+ x = (float)cos(delta * j);
+ y = (float)sin(delta * j);
+ w[j] = x;
+ w[j + 1] = y;
+ w[nw - j] = y;
+ w[nw - j + 1] = x;
+ }
+ bitrv2(nw, ip + 2, w);
+ }
+ }
+}
+
+
+static void makect(size_t nc, size_t *ip, float *c)
+{
+ size_t j, nch;
+ float delta;
+
+ ip[1] = nc;
+ if (nc > 1) {
+ nch = nc >> 1;
+ delta = atanf(1.0f) / nch;
+ c[0] = (float)cos(delta * nch);
+ c[nch] = 0.5f * c[0];
+ for (j = 1; j < nch; j++) {
+ c[j] = 0.5f * (float)cos(delta * j);
+ c[nc - j] = 0.5f * (float)sin(delta * j);
+ }
+ }
+}
+
+
+/* -------- child routines -------- */
+
+
+static void bitrv2(size_t n, size_t *ip, float *a)
+{
+ size_t j, j1, k, k1, l, m, m2;
+ float xr, xi, yr, yi;
+
+ ip[0] = 0;
+ l = n;
+ m = 1;
+ while ((m << 3) < l) {
+ l >>= 1;
+ for (j = 0; j < m; j++) {
+ ip[m + j] = ip[j] + l;
+ }
+ m <<= 1;
+ }
+ m2 = 2 * m;
+ if ((m << 3) == l) {
+ for (k = 0; k < m; k++) {
+ for (j = 0; j < k; j++) {
+ j1 = 2 * j + ip[k];
+ k1 = 2 * k + ip[j];
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += m2;
+ k1 += 2 * m2;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += m2;
+ k1 -= m2;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += m2;
+ k1 += 2 * m2;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ }
+ j1 = 2 * k + m2 + ip[k];
+ k1 = j1 + m2;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ }
+ } else {
+ for (k = 1; k < m; k++) {
+ for (j = 0; j < k; j++) {
+ j1 = 2 * j + ip[k];
+ k1 = 2 * k + ip[j];
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += m2;
+ k1 += m2;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ }
+ }
+ }
+}
+
+#if 0 // Not used.
+static void bitrv2conj(int n, int *ip, float *a)
+{
+ int j, j1, k, k1, l, m, m2;
+ float xr, xi, yr, yi;
+
+ ip[0] = 0;
+ l = n;
+ m = 1;
+ while ((m << 3) < l) {
+ l >>= 1;
+ for (j = 0; j < m; j++) {
+ ip[m + j] = ip[j] + l;
+ }
+ m <<= 1;
+ }
+ m2 = 2 * m;
+ if ((m << 3) == l) {
+ for (k = 0; k < m; k++) {
+ for (j = 0; j < k; j++) {
+ j1 = 2 * j + ip[k];
+ k1 = 2 * k + ip[j];
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += m2;
+ k1 += 2 * m2;
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += m2;
+ k1 -= m2;
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += m2;
+ k1 += 2 * m2;
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ }
+ k1 = 2 * k + ip[k];
+ a[k1 + 1] = -a[k1 + 1];
+ j1 = k1 + m2;
+ k1 = j1 + m2;
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ k1 += m2;
+ a[k1 + 1] = -a[k1 + 1];
+ }
+ } else {
+ a[1] = -a[1];
+ a[m2 + 1] = -a[m2 + 1];
+ for (k = 1; k < m; k++) {
+ for (j = 0; j < k; j++) {
+ j1 = 2 * j + ip[k];
+ k1 = 2 * k + ip[j];
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += m2;
+ k1 += m2;
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ }
+ k1 = 2 * k + ip[k];
+ a[k1 + 1] = -a[k1 + 1];
+ a[k1 + m2 + 1] = -a[k1 + m2 + 1];
+ }
+ }
+}
+#endif
+
+static void cftfsub(size_t n, float *a, float *w)
+{
+ size_t j, j1, j2, j3, l;
+ float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+ l = 2;
+ if (n > 8) {
+ cft1st(n, a, w);
+ l = 8;
+ while ((l << 2) < n) {
+ cftmdl(n, l, a, w);
+ l <<= 2;
+ }
+ }
+ if ((l << 2) == n) {
+ for (j = 0; j < l; j += 2) {
+ j1 = j + l;
+ j2 = j1 + l;
+ j3 = j2 + l;
+ x0r = a[j] + a[j1];
+ x0i = a[j + 1] + a[j1 + 1];
+ x1r = a[j] - a[j1];
+ x1i = a[j + 1] - a[j1 + 1];
+ x2r = a[j2] + a[j3];
+ x2i = a[j2 + 1] + a[j3 + 1];
+ x3r = a[j2] - a[j3];
+ x3i = a[j2 + 1] - a[j3 + 1];
+ a[j] = x0r + x2r;
+ a[j + 1] = x0i + x2i;
+ a[j2] = x0r - x2r;
+ a[j2 + 1] = x0i - x2i;
+ a[j1] = x1r - x3i;
+ a[j1 + 1] = x1i + x3r;
+ a[j3] = x1r + x3i;
+ a[j3 + 1] = x1i - x3r;
+ }
+ } else {
+ for (j = 0; j < l; j += 2) {
+ j1 = j + l;
+ x0r = a[j] - a[j1];
+ x0i = a[j + 1] - a[j1 + 1];
+ a[j] += a[j1];
+ a[j + 1] += a[j1 + 1];
+ a[j1] = x0r;
+ a[j1 + 1] = x0i;
+ }
+ }
+}
+
+
+static void cftbsub(size_t n, float *a, float *w)
+{
+ size_t j, j1, j2, j3, l;
+ float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+ l = 2;
+ if (n > 8) {
+ cft1st(n, a, w);
+ l = 8;
+ while ((l << 2) < n) {
+ cftmdl(n, l, a, w);
+ l <<= 2;
+ }
+ }
+ if ((l << 2) == n) {
+ for (j = 0; j < l; j += 2) {
+ j1 = j + l;
+ j2 = j1 + l;
+ j3 = j2 + l;
+ x0r = a[j] + a[j1];
+ x0i = -a[j + 1] - a[j1 + 1];
+ x1r = a[j] - a[j1];
+ x1i = -a[j + 1] + a[j1 + 1];
+ x2r = a[j2] + a[j3];
+ x2i = a[j2 + 1] + a[j3 + 1];
+ x3r = a[j2] - a[j3];
+ x3i = a[j2 + 1] - a[j3 + 1];
+ a[j] = x0r + x2r;
+ a[j + 1] = x0i - x2i;
+ a[j2] = x0r - x2r;
+ a[j2 + 1] = x0i + x2i;
+ a[j1] = x1r - x3i;
+ a[j1 + 1] = x1i - x3r;
+ a[j3] = x1r + x3i;
+ a[j3 + 1] = x1i + x3r;
+ }
+ } else {
+ for (j = 0; j < l; j += 2) {
+ j1 = j + l;
+ x0r = a[j] - a[j1];
+ x0i = -a[j + 1] + a[j1 + 1];
+ a[j] += a[j1];
+ a[j + 1] = -a[j + 1] - a[j1 + 1];
+ a[j1] = x0r;
+ a[j1 + 1] = x0i;
+ }
+ }
+}
+
+
+static void cft1st(size_t n, float *a, float *w)
+{
+ size_t j, k1, k2;
+ float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
+ float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+ x0r = a[0] + a[2];
+ x0i = a[1] + a[3];
+ x1r = a[0] - a[2];
+ x1i = a[1] - a[3];
+ x2r = a[4] + a[6];
+ x2i = a[5] + a[7];
+ x3r = a[4] - a[6];
+ x3i = a[5] - a[7];
+ a[0] = x0r + x2r;
+ a[1] = x0i + x2i;
+ a[4] = x0r - x2r;
+ a[5] = x0i - x2i;
+ a[2] = x1r - x3i;
+ a[3] = x1i + x3r;
+ a[6] = x1r + x3i;
+ a[7] = x1i - x3r;
+ wk1r = w[2];
+ x0r = a[8] + a[10];
+ x0i = a[9] + a[11];
+ x1r = a[8] - a[10];
+ x1i = a[9] - a[11];
+ x2r = a[12] + a[14];
+ x2i = a[13] + a[15];
+ x3r = a[12] - a[14];
+ x3i = a[13] - a[15];
+ a[8] = x0r + x2r;
+ a[9] = x0i + x2i;
+ a[12] = x2i - x0i;
+ a[13] = x0r - x2r;
+ x0r = x1r - x3i;
+ x0i = x1i + x3r;
+ a[10] = wk1r * (x0r - x0i);
+ a[11] = wk1r * (x0r + x0i);
+ x0r = x3i + x1r;
+ x0i = x3r - x1i;
+ a[14] = wk1r * (x0i - x0r);
+ a[15] = wk1r * (x0i + x0r);
+ k1 = 0;
+ for (j = 16; j < n; j += 16) {
+ k1 += 2;
+ k2 = 2 * k1;
+ wk2r = w[k1];
+ wk2i = w[k1 + 1];
+ wk1r = w[k2];
+ wk1i = w[k2 + 1];
+ wk3r = wk1r - 2 * wk2i * wk1i;
+ wk3i = 2 * wk2i * wk1r - wk1i;
+ x0r = a[j] + a[j + 2];
+ x0i = a[j + 1] + a[j + 3];
+ x1r = a[j] - a[j + 2];
+ x1i = a[j + 1] - a[j + 3];
+ x2r = a[j + 4] + a[j + 6];
+ x2i = a[j + 5] + a[j + 7];
+ x3r = a[j + 4] - a[j + 6];
+ x3i = a[j + 5] - a[j + 7];
+ a[j] = x0r + x2r;
+ a[j + 1] = x0i + x2i;
+ x0r -= x2r;
+ x0i -= x2i;
+ a[j + 4] = wk2r * x0r - wk2i * x0i;
+ a[j + 5] = wk2r * x0i + wk2i * x0r;
+ x0r = x1r - x3i;
+ x0i = x1i + x3r;
+ a[j + 2] = wk1r * x0r - wk1i * x0i;
+ a[j + 3] = wk1r * x0i + wk1i * x0r;
+ x0r = x1r + x3i;
+ x0i = x1i - x3r;
+ a[j + 6] = wk3r * x0r - wk3i * x0i;
+ a[j + 7] = wk3r * x0i + wk3i * x0r;
+ wk1r = w[k2 + 2];
+ wk1i = w[k2 + 3];
+ wk3r = wk1r - 2 * wk2r * wk1i;
+ wk3i = 2 * wk2r * wk1r - wk1i;
+ x0r = a[j + 8] + a[j + 10];
+ x0i = a[j + 9] + a[j + 11];
+ x1r = a[j + 8] - a[j + 10];
+ x1i = a[j + 9] - a[j + 11];
+ x2r = a[j + 12] + a[j + 14];
+ x2i = a[j + 13] + a[j + 15];
+ x3r = a[j + 12] - a[j + 14];
+ x3i = a[j + 13] - a[j + 15];
+ a[j + 8] = x0r + x2r;
+ a[j + 9] = x0i + x2i;
+ x0r -= x2r;
+ x0i -= x2i;
+ a[j + 12] = -wk2i * x0r - wk2r * x0i;
+ a[j + 13] = -wk2i * x0i + wk2r * x0r;
+ x0r = x1r - x3i;
+ x0i = x1i + x3r;
+ a[j + 10] = wk1r * x0r - wk1i * x0i;
+ a[j + 11] = wk1r * x0i + wk1i * x0r;
+ x0r = x1r + x3i;
+ x0i = x1i - x3r;
+ a[j + 14] = wk3r * x0r - wk3i * x0i;
+ a[j + 15] = wk3r * x0i + wk3i * x0r;
+ }
+}
+
+
+static void cftmdl(size_t n, size_t l, float *a, float *w)
+{
+ size_t j, j1, j2, j3, k, k1, k2, m, m2;
+ float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
+ float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+ m = l << 2;
+ for (j = 0; j < l; j += 2) {
+ j1 = j + l;
+ j2 = j1 + l;
+ j3 = j2 + l;
+ x0r = a[j] + a[j1];
+ x0i = a[j + 1] + a[j1 + 1];
+ x1r = a[j] - a[j1];
+ x1i = a[j + 1] - a[j1 + 1];
+ x2r = a[j2] + a[j3];
+ x2i = a[j2 + 1] + a[j3 + 1];
+ x3r = a[j2] - a[j3];
+ x3i = a[j2 + 1] - a[j3 + 1];
+ a[j] = x0r + x2r;
+ a[j + 1] = x0i + x2i;
+ a[j2] = x0r - x2r;
+ a[j2 + 1] = x0i - x2i;
+ a[j1] = x1r - x3i;
+ a[j1 + 1] = x1i + x3r;
+ a[j3] = x1r + x3i;
+ a[j3 + 1] = x1i - x3r;
+ }
+ wk1r = w[2];
+ for (j = m; j < l + m; j += 2) {
+ j1 = j + l;
+ j2 = j1 + l;
+ j3 = j2 + l;
+ x0r = a[j] + a[j1];
+ x0i = a[j + 1] + a[j1 + 1];
+ x1r = a[j] - a[j1];
+ x1i = a[j + 1] - a[j1 + 1];
+ x2r = a[j2] + a[j3];
+ x2i = a[j2 + 1] + a[j3 + 1];
+ x3r = a[j2] - a[j3];
+ x3i = a[j2 + 1] - a[j3 + 1];
+ a[j] = x0r + x2r;
+ a[j + 1] = x0i + x2i;
+ a[j2] = x2i - x0i;
+ a[j2 + 1] = x0r - x2r;
+ x0r = x1r - x3i;
+ x0i = x1i + x3r;
+ a[j1] = wk1r * (x0r - x0i);
+ a[j1 + 1] = wk1r * (x0r + x0i);
+ x0r = x3i + x1r;
+ x0i = x3r - x1i;
+ a[j3] = wk1r * (x0i - x0r);
+ a[j3 + 1] = wk1r * (x0i + x0r);
+ }
+ k1 = 0;
+ m2 = 2 * m;
+ for (k = m2; k < n; k += m2) {
+ k1 += 2;
+ k2 = 2 * k1;
+ wk2r = w[k1];
+ wk2i = w[k1 + 1];
+ wk1r = w[k2];
+ wk1i = w[k2 + 1];
+ wk3r = wk1r - 2 * wk2i * wk1i;
+ wk3i = 2 * wk2i * wk1r - wk1i;
+ for (j = k; j < l + k; j += 2) {
+ j1 = j + l;
+ j2 = j1 + l;
+ j3 = j2 + l;
+ x0r = a[j] + a[j1];
+ x0i = a[j + 1] + a[j1 + 1];
+ x1r = a[j] - a[j1];
+ x1i = a[j + 1] - a[j1 + 1];
+ x2r = a[j2] + a[j3];
+ x2i = a[j2 + 1] + a[j3 + 1];
+ x3r = a[j2] - a[j3];
+ x3i = a[j2 + 1] - a[j3 + 1];
+ a[j] = x0r + x2r;
+ a[j + 1] = x0i + x2i;
+ x0r -= x2r;
+ x0i -= x2i;
+ a[j2] = wk2r * x0r - wk2i * x0i;
+ a[j2 + 1] = wk2r * x0i + wk2i * x0r;
+ x0r = x1r - x3i;
+ x0i = x1i + x3r;
+ a[j1] = wk1r * x0r - wk1i * x0i;
+ a[j1 + 1] = wk1r * x0i + wk1i * x0r;
+ x0r = x1r + x3i;
+ x0i = x1i - x3r;
+ a[j3] = wk3r * x0r - wk3i * x0i;
+ a[j3 + 1] = wk3r * x0i + wk3i * x0r;
+ }
+ wk1r = w[k2 + 2];
+ wk1i = w[k2 + 3];
+ wk3r = wk1r - 2 * wk2r * wk1i;
+ wk3i = 2 * wk2r * wk1r - wk1i;
+ for (j = k + m; j < l + (k + m); j += 2) {
+ j1 = j + l;
+ j2 = j1 + l;
+ j3 = j2 + l;
+ x0r = a[j] + a[j1];
+ x0i = a[j + 1] + a[j1 + 1];
+ x1r = a[j] - a[j1];
+ x1i = a[j + 1] - a[j1 + 1];
+ x2r = a[j2] + a[j3];
+ x2i = a[j2 + 1] + a[j3 + 1];
+ x3r = a[j2] - a[j3];
+ x3i = a[j2 + 1] - a[j3 + 1];
+ a[j] = x0r + x2r;
+ a[j + 1] = x0i + x2i;
+ x0r -= x2r;
+ x0i -= x2i;
+ a[j2] = -wk2i * x0r - wk2r * x0i;
+ a[j2 + 1] = -wk2i * x0i + wk2r * x0r;
+ x0r = x1r - x3i;
+ x0i = x1i + x3r;
+ a[j1] = wk1r * x0r - wk1i * x0i;
+ a[j1 + 1] = wk1r * x0i + wk1i * x0r;
+ x0r = x1r + x3i;
+ x0i = x1i - x3r;
+ a[j3] = wk3r * x0r - wk3i * x0i;
+ a[j3 + 1] = wk3r * x0i + wk3i * x0r;
+ }
+ }
+}
+
+
+static void rftfsub(size_t n, float *a, size_t nc, float *c)
+{
+ size_t j, k, kk, ks, m;
+ float wkr, wki, xr, xi, yr, yi;
+
+ m = n >> 1;
+ ks = 2 * nc / m;
+ kk = 0;
+ for (j = 2; j < m; j += 2) {
+ k = n - j;
+ kk += ks;
+ wkr = 0.5f - c[nc - kk];
+ wki = c[kk];
+ xr = a[j] - a[k];
+ xi = a[j + 1] + a[k + 1];
+ yr = wkr * xr - wki * xi;
+ yi = wkr * xi + wki * xr;
+ a[j] -= yr;
+ a[j + 1] -= yi;
+ a[k] += yr;
+ a[k + 1] -= yi;
+ }
+}
+
+
+static void rftbsub(size_t n, float *a, size_t nc, float *c)
+{
+ size_t j, k, kk, ks, m;
+ float wkr, wki, xr, xi, yr, yi;
+
+ a[1] = -a[1];
+ m = n >> 1;
+ ks = 2 * nc / m;
+ kk = 0;
+ for (j = 2; j < m; j += 2) {
+ k = n - j;
+ kk += ks;
+ wkr = 0.5f - c[nc - kk];
+ wki = c[kk];
+ xr = a[j] - a[k];
+ xi = a[j + 1] + a[k + 1];
+ yr = wkr * xr + wki * xi;
+ yi = wkr * xi - wki * xr;
+ a[j] -= yr;
+ a[j + 1] = yi - a[j + 1];
+ a[k] += yr;
+ a[k + 1] = yi - a[k + 1];
+ }
+ a[m + 1] = -a[m + 1];
+}
+
+#if 0 // Not used.
+static void dctsub(int n, float *a, int nc, float *c)
+{
+ int j, k, kk, ks, m;
+ float wkr, wki, xr;
+
+ m = n >> 1;
+ ks = nc / n;
+ kk = 0;
+ for (j = 1; j < m; j++) {
+ k = n - j;
+ kk += ks;
+ wkr = c[kk] - c[nc - kk];
+ wki = c[kk] + c[nc - kk];
+ xr = wki * a[j] - wkr * a[k];
+ a[j] = wkr * a[j] + wki * a[k];
+ a[k] = xr;
+ }
+ a[m] *= c[0];
+}
+
+
+static void dstsub(int n, float *a, int nc, float *c)
+{
+ int j, k, kk, ks, m;
+ float wkr, wki, xr;
+
+ m = n >> 1;
+ ks = nc / n;
+ kk = 0;
+ for (j = 1; j < m; j++) {
+ k = n - j;
+ kk += ks;
+ wkr = c[kk] - c[nc - kk];
+ wki = c[kk] + c[nc - kk];
+ xr = wki * a[k] - wkr * a[j];
+ a[k] = wkr * a[k] + wki * a[j];
+ a[j] = xr;
+ }
+ a[m] *= c[0];
+}
+#endif // Not used.
diff --git a/third_party/webrtc/src/webrtc/common_audio/fft4g.h b/third_party/webrtc/src/webrtc/common_audio/fft4g.h
new file mode 100644
index 00000000..6dd792f6
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/fft4g.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_COMMON_AUDIO_FFT4G_H_
+#define WEBRTC_COMMON_AUDIO_FFT4G_H_
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+// Refer to fft4g.c for documentation.
+void WebRtc_rdft(size_t n, int isgn, float *a, size_t *ip, float *w);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif // WEBRTC_COMMON_AUDIO_FFT4G_H_
diff --git a/third_party/webrtc/src/webrtc/common_audio/ring_buffer.c b/third_party/webrtc/src/webrtc/common_audio/ring_buffer.c
new file mode 100644
index 00000000..60fb5dff
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/ring_buffer.c
@@ -0,0 +1,247 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+// A ring buffer to hold arbitrary data. Provides no thread safety. Unless
+// otherwise specified, functions return 0 on success and -1 on error.
+
+#include "webrtc/common_audio/ring_buffer.h"
+
+#include <stddef.h> // size_t
+#include <stdlib.h>
+#include <string.h>
+
+enum Wrap {
+ SAME_WRAP,
+ DIFF_WRAP
+};
+
+struct RingBuffer {
+ size_t read_pos;
+ size_t write_pos;
+ size_t element_count;
+ size_t element_size;
+ enum Wrap rw_wrap;
+ char* data;
+};
+
+// Get address of region(s) from which we can read data.
+// If the region is contiguous, |data_ptr_bytes_2| will be zero.
+// If non-contiguous, |data_ptr_bytes_2| will be the size in bytes of the second
+// region. Returns room available to be read or |element_count|, whichever is
+// smaller.
+static size_t GetBufferReadRegions(RingBuffer* buf,
+ size_t element_count,
+ void** data_ptr_1,
+ size_t* data_ptr_bytes_1,
+ void** data_ptr_2,
+ size_t* data_ptr_bytes_2) {
+
+ const size_t readable_elements = WebRtc_available_read(buf);
+ const size_t read_elements = (readable_elements < element_count ?
+ readable_elements : element_count);
+ const size_t margin = buf->element_count - buf->read_pos;
+
+ // Check to see if read is not contiguous.
+ if (read_elements > margin) {
+ // Write data in two blocks that wrap the buffer.
+ *data_ptr_1 = buf->data + buf->read_pos * buf->element_size;
+ *data_ptr_bytes_1 = margin * buf->element_size;
+ *data_ptr_2 = buf->data;
+ *data_ptr_bytes_2 = (read_elements - margin) * buf->element_size;
+ } else {
+ *data_ptr_1 = buf->data + buf->read_pos * buf->element_size;
+ *data_ptr_bytes_1 = read_elements * buf->element_size;
+ *data_ptr_2 = NULL;
+ *data_ptr_bytes_2 = 0;
+ }
+
+ return read_elements;
+}
+
+RingBuffer* WebRtc_CreateBuffer(size_t element_count, size_t element_size) {
+ RingBuffer* self = NULL;
+ if (element_count == 0 || element_size == 0) {
+ return NULL;
+ }
+
+ self = malloc(sizeof(RingBuffer));
+ if (!self) {
+ return NULL;
+ }
+
+ self->data = malloc(element_count * element_size);
+ if (!self->data) {
+ free(self);
+ self = NULL;
+ return NULL;
+ }
+
+ self->element_count = element_count;
+ self->element_size = element_size;
+ WebRtc_InitBuffer(self);
+
+ return self;
+}
+
+void WebRtc_InitBuffer(RingBuffer* self) {
+ self->read_pos = 0;
+ self->write_pos = 0;
+ self->rw_wrap = SAME_WRAP;
+
+ // Initialize buffer to zeros
+ memset(self->data, 0, self->element_count * self->element_size);
+}
+
+void WebRtc_FreeBuffer(void* handle) {
+ RingBuffer* self = (RingBuffer*)handle;
+ if (!self) {
+ return;
+ }
+
+ free(self->data);
+ free(self);
+}
+
+size_t WebRtc_ReadBuffer(RingBuffer* self,
+ void** data_ptr,
+ void* data,
+ size_t element_count) {
+
+ if (self == NULL) {
+ return 0;
+ }
+ if (data == NULL) {
+ return 0;
+ }
+
+ {
+ void* buf_ptr_1 = NULL;
+ void* buf_ptr_2 = NULL;
+ size_t buf_ptr_bytes_1 = 0;
+ size_t buf_ptr_bytes_2 = 0;
+ const size_t read_count = GetBufferReadRegions(self,
+ element_count,
+ &buf_ptr_1,
+ &buf_ptr_bytes_1,
+ &buf_ptr_2,
+ &buf_ptr_bytes_2);
+
+ if (buf_ptr_bytes_2 > 0) {
+ // We have a wrap around when reading the buffer. Copy the buffer data to
+ // |data| and point to it.
+ memcpy(data, buf_ptr_1, buf_ptr_bytes_1);
+ memcpy(((char*) data) + buf_ptr_bytes_1, buf_ptr_2, buf_ptr_bytes_2);
+ buf_ptr_1 = data;
+ } else if (!data_ptr) {
+ // No wrap, but a memcpy was requested.
+ memcpy(data, buf_ptr_1, buf_ptr_bytes_1);
+ }
+ if (data_ptr) {
+ // |buf_ptr_1| == |data| in the case of a wrap.
+ *data_ptr = buf_ptr_1;
+ }
+
+ // Update read position
+ WebRtc_MoveReadPtr(self, (int) read_count);
+
+ return read_count;
+ }
+}
+
+size_t WebRtc_WriteBuffer(RingBuffer* self,
+ const void* data,
+ size_t element_count) {
+ if (!self) {
+ return 0;
+ }
+ if (!data) {
+ return 0;
+ }
+
+ {
+ const size_t free_elements = WebRtc_available_write(self);
+ const size_t write_elements = (free_elements < element_count ? free_elements
+ : element_count);
+ size_t n = write_elements;
+ const size_t margin = self->element_count - self->write_pos;
+
+ if (write_elements > margin) {
+ // Buffer wrap around when writing.
+ memcpy(self->data + self->write_pos * self->element_size,
+ data, margin * self->element_size);
+ self->write_pos = 0;
+ n -= margin;
+ self->rw_wrap = DIFF_WRAP;
+ }
+ memcpy(self->data + self->write_pos * self->element_size,
+ ((const char*) data) + ((write_elements - n) * self->element_size),
+ n * self->element_size);
+ self->write_pos += n;
+
+ return write_elements;
+ }
+}
+
+int WebRtc_MoveReadPtr(RingBuffer* self, int element_count) {
+ if (!self) {
+ return 0;
+ }
+
+ {
+ // We need to be able to take care of negative changes, hence use "int"
+ // instead of "size_t".
+ const int free_elements = (int) WebRtc_available_write(self);
+ const int readable_elements = (int) WebRtc_available_read(self);
+ int read_pos = (int) self->read_pos;
+
+ if (element_count > readable_elements) {
+ element_count = readable_elements;
+ }
+ if (element_count < -free_elements) {
+ element_count = -free_elements;
+ }
+
+ read_pos += element_count;
+ if (read_pos > (int) self->element_count) {
+ // Buffer wrap around. Restart read position and wrap indicator.
+ read_pos -= (int) self->element_count;
+ self->rw_wrap = SAME_WRAP;
+ }
+ if (read_pos < 0) {
+ // Buffer wrap around. Restart read position and wrap indicator.
+ read_pos += (int) self->element_count;
+ self->rw_wrap = DIFF_WRAP;
+ }
+
+ self->read_pos = (size_t) read_pos;
+
+ return element_count;
+ }
+}
+
+size_t WebRtc_available_read(const RingBuffer* self) {
+ if (!self) {
+ return 0;
+ }
+
+ if (self->rw_wrap == SAME_WRAP) {
+ return self->write_pos - self->read_pos;
+ } else {
+ return self->element_count - self->read_pos + self->write_pos;
+ }
+}
+
+size_t WebRtc_available_write(const RingBuffer* self) {
+ if (!self) {
+ return 0;
+ }
+
+ return self->element_count - WebRtc_available_read(self);
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/ring_buffer.h b/third_party/webrtc/src/webrtc/common_audio/ring_buffer.h
new file mode 100644
index 00000000..4125c48d
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/ring_buffer.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+// A ring buffer to hold arbitrary data. Provides no thread safety. Unless
+// otherwise specified, functions return 0 on success and -1 on error.
+
+#ifndef WEBRTC_COMMON_AUDIO_RING_BUFFER_H_
+#define WEBRTC_COMMON_AUDIO_RING_BUFFER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stddef.h> // size_t
+
+typedef struct RingBuffer RingBuffer;
+
+// Creates and initializes the buffer. Returns NULL on failure.
+RingBuffer* WebRtc_CreateBuffer(size_t element_count, size_t element_size);
+void WebRtc_InitBuffer(RingBuffer* handle);
+void WebRtc_FreeBuffer(void* handle);
+
+// Reads data from the buffer. The |data_ptr| will point to the address where
+// it is located. If all |element_count| data are feasible to read without
+// buffer wrap around |data_ptr| will point to the location in the buffer.
+// Otherwise, the data will be copied to |data| (memory allocation done by the
+// user) and |data_ptr| points to the address of |data|. |data_ptr| is only
+// guaranteed to be valid until the next call to WebRtc_WriteBuffer().
+//
+// To force a copying to |data|, pass a NULL |data_ptr|.
+//
+// Returns number of elements read.
+size_t WebRtc_ReadBuffer(RingBuffer* handle,
+ void** data_ptr,
+ void* data,
+ size_t element_count);
+
+// Writes |data| to buffer and returns the number of elements written.
+size_t WebRtc_WriteBuffer(RingBuffer* handle, const void* data,
+ size_t element_count);
+
+// Moves the buffer read position and returns the number of elements moved.
+// Positive |element_count| moves the read position towards the write position,
+// that is, flushing the buffer. Negative |element_count| moves the read
+// position away from the the write position, that is, stuffing the buffer.
+// Returns number of elements moved.
+int WebRtc_MoveReadPtr(RingBuffer* handle, int element_count);
+
+// Returns number of available elements to read.
+size_t WebRtc_available_read(const RingBuffer* handle);
+
+// Returns number of available elements for write.
+size_t WebRtc_available_write(const RingBuffer* handle);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // WEBRTC_COMMON_AUDIO_RING_BUFFER_H_
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/auto_corr_to_refl_coef.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/auto_corr_to_refl_coef.c
new file mode 100644
index 00000000..f99dd62b
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/auto_corr_to_refl_coef.c
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_AutoCorrToReflCoef().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+void WebRtcSpl_AutoCorrToReflCoef(const int32_t *R, int use_order, int16_t *K)
+{
+ int i, n;
+ int16_t tmp;
+ const int32_t *rptr;
+ int32_t L_num, L_den;
+ int16_t *acfptr, *pptr, *wptr, *p1ptr, *w1ptr, ACF[WEBRTC_SPL_MAX_LPC_ORDER],
+ P[WEBRTC_SPL_MAX_LPC_ORDER], W[WEBRTC_SPL_MAX_LPC_ORDER];
+
+ // Initialize loop and pointers.
+ acfptr = ACF;
+ rptr = R;
+ pptr = P;
+ p1ptr = &P[1];
+ w1ptr = &W[1];
+ wptr = w1ptr;
+
+ // First loop; n=0. Determine shifting.
+ tmp = WebRtcSpl_NormW32(*R);
+ *acfptr = (int16_t)((*rptr++ << tmp) >> 16);
+ *pptr++ = *acfptr++;
+
+ // Initialize ACF, P and W.
+ for (i = 1; i <= use_order; i++)
+ {
+ *acfptr = (int16_t)((*rptr++ << tmp) >> 16);
+ *wptr++ = *acfptr;
+ *pptr++ = *acfptr++;
+ }
+
+ // Compute reflection coefficients.
+ for (n = 1; n <= use_order; n++, K++)
+ {
+ tmp = WEBRTC_SPL_ABS_W16(*p1ptr);
+ if (*P < tmp)
+ {
+ for (i = n; i <= use_order; i++)
+ *K++ = 0;
+
+ return;
+ }
+
+ // Division: WebRtcSpl_div(tmp, *P)
+ *K = 0;
+ if (tmp != 0)
+ {
+ L_num = tmp;
+ L_den = *P;
+ i = 15;
+ while (i--)
+ {
+ (*K) <<= 1;
+ L_num <<= 1;
+ if (L_num >= L_den)
+ {
+ L_num -= L_den;
+ (*K)++;
+ }
+ }
+ if (*p1ptr > 0)
+ *K = -*K;
+ }
+
+ // Last iteration; don't do Schur recursion.
+ if (n == use_order)
+ return;
+
+ // Schur recursion.
+ pptr = P;
+ wptr = w1ptr;
+ tmp = (int16_t)(((int32_t)*p1ptr * (int32_t)*K + 16384) >> 15);
+ *pptr = WebRtcSpl_AddSatW16(*pptr, tmp);
+ pptr++;
+ for (i = 1; i <= use_order - n; i++)
+ {
+ tmp = (int16_t)(((int32_t)*wptr * (int32_t)*K + 16384) >> 15);
+ *pptr = WebRtcSpl_AddSatW16(*(pptr + 1), tmp);
+ pptr++;
+ tmp = (int16_t)(((int32_t)*pptr * (int32_t)*K + 16384) >> 15);
+ *wptr = WebRtcSpl_AddSatW16(*wptr, tmp);
+ wptr++;
+ }
+ }
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/auto_correlation.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/auto_correlation.c
new file mode 100644
index 00000000..fda4fffe
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/auto_correlation.c
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+#include <assert.h>
+
+size_t WebRtcSpl_AutoCorrelation(const int16_t* in_vector,
+ size_t in_vector_length,
+ size_t order,
+ int32_t* result,
+ int* scale) {
+ int32_t sum = 0;
+ size_t i = 0, j = 0;
+ int16_t smax = 0;
+ int scaling = 0;
+
+ assert(order <= in_vector_length);
+
+ // Find the maximum absolute value of the samples.
+ smax = WebRtcSpl_MaxAbsValueW16(in_vector, in_vector_length);
+
+ // In order to avoid overflow when computing the sum we should scale the
+ // samples so that (in_vector_length * smax * smax) will not overflow.
+ if (smax == 0) {
+ scaling = 0;
+ } else {
+ // Number of bits in the sum loop.
+ int nbits = WebRtcSpl_GetSizeInBits((uint32_t)in_vector_length);
+ // Number of bits to normalize smax.
+ int t = WebRtcSpl_NormW32(WEBRTC_SPL_MUL(smax, smax));
+
+ if (t > nbits) {
+ scaling = 0;
+ } else {
+ scaling = nbits - t;
+ }
+ }
+
+ // Perform the actual correlation calculation.
+ for (i = 0; i < order + 1; i++) {
+ sum = 0;
+ /* Unroll the loop to improve performance. */
+ for (j = 0; i + j + 3 < in_vector_length; j += 4) {
+ sum += (in_vector[j + 0] * in_vector[i + j + 0]) >> scaling;
+ sum += (in_vector[j + 1] * in_vector[i + j + 1]) >> scaling;
+ sum += (in_vector[j + 2] * in_vector[i + j + 2]) >> scaling;
+ sum += (in_vector[j + 3] * in_vector[i + j + 3]) >> scaling;
+ }
+ for (; j < in_vector_length - i; j++) {
+ sum += (in_vector[j] * in_vector[i + j]) >> scaling;
+ }
+ *result++ = sum;
+ }
+
+ *scale = scaling;
+ return order + 1;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse.c
new file mode 100644
index 00000000..c8bd2dc4
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+/* Tables for data buffer indexes that are bit reversed and thus need to be
+ * swapped. Note that, index_7[{0, 2, 4, ...}] are for the left side of the swap
+ * operations, while index_7[{1, 3, 5, ...}] are for the right side of the
+ * operation. Same for index_8.
+ */
+
+/* Indexes for the case of stages == 7. */
+static const int16_t index_7[112] = {
+ 1, 64, 2, 32, 3, 96, 4, 16, 5, 80, 6, 48, 7, 112, 9, 72, 10, 40, 11, 104,
+ 12, 24, 13, 88, 14, 56, 15, 120, 17, 68, 18, 36, 19, 100, 21, 84, 22, 52,
+ 23, 116, 25, 76, 26, 44, 27, 108, 29, 92, 30, 60, 31, 124, 33, 66, 35, 98,
+ 37, 82, 38, 50, 39, 114, 41, 74, 43, 106, 45, 90, 46, 58, 47, 122, 49, 70,
+ 51, 102, 53, 86, 55, 118, 57, 78, 59, 110, 61, 94, 63, 126, 67, 97, 69,
+ 81, 71, 113, 75, 105, 77, 89, 79, 121, 83, 101, 87, 117, 91, 109, 95, 125,
+ 103, 115, 111, 123
+};
+
+/* Indexes for the case of stages == 8. */
+static const int16_t index_8[240] = {
+ 1, 128, 2, 64, 3, 192, 4, 32, 5, 160, 6, 96, 7, 224, 8, 16, 9, 144, 10, 80,
+ 11, 208, 12, 48, 13, 176, 14, 112, 15, 240, 17, 136, 18, 72, 19, 200, 20,
+ 40, 21, 168, 22, 104, 23, 232, 25, 152, 26, 88, 27, 216, 28, 56, 29, 184,
+ 30, 120, 31, 248, 33, 132, 34, 68, 35, 196, 37, 164, 38, 100, 39, 228, 41,
+ 148, 42, 84, 43, 212, 44, 52, 45, 180, 46, 116, 47, 244, 49, 140, 50, 76,
+ 51, 204, 53, 172, 54, 108, 55, 236, 57, 156, 58, 92, 59, 220, 61, 188, 62,
+ 124, 63, 252, 65, 130, 67, 194, 69, 162, 70, 98, 71, 226, 73, 146, 74, 82,
+ 75, 210, 77, 178, 78, 114, 79, 242, 81, 138, 83, 202, 85, 170, 86, 106, 87,
+ 234, 89, 154, 91, 218, 93, 186, 94, 122, 95, 250, 97, 134, 99, 198, 101,
+ 166, 103, 230, 105, 150, 107, 214, 109, 182, 110, 118, 111, 246, 113, 142,
+ 115, 206, 117, 174, 119, 238, 121, 158, 123, 222, 125, 190, 127, 254, 131,
+ 193, 133, 161, 135, 225, 137, 145, 139, 209, 141, 177, 143, 241, 147, 201,
+ 149, 169, 151, 233, 155, 217, 157, 185, 159, 249, 163, 197, 167, 229, 171,
+ 213, 173, 181, 175, 245, 179, 205, 183, 237, 187, 221, 191, 253, 199, 227,
+ 203, 211, 207, 243, 215, 235, 223, 251, 239, 247
+};
+
+void WebRtcSpl_ComplexBitReverse(int16_t* __restrict complex_data, int stages) {
+ /* For any specific value of stages, we know exactly the indexes that are
+ * bit reversed. Currently (Feb. 2012) in WebRTC the only possible values of
+ * stages are 7 and 8, so we use tables to save unnecessary iterations and
+ * calculations for these two cases.
+ */
+ if (stages == 7 || stages == 8) {
+ int m = 0;
+ int length = 112;
+ const int16_t* index = index_7;
+
+ if (stages == 8) {
+ length = 240;
+ index = index_8;
+ }
+
+ /* Decimation in time. Swap the elements with bit-reversed indexes. */
+ for (m = 0; m < length; m += 2) {
+ /* We declare a int32_t* type pointer, to load both the 16-bit real
+ * and imaginary elements from complex_data in one instruction, reducing
+ * complexity.
+ */
+ int32_t* complex_data_ptr = (int32_t*)complex_data;
+ int32_t temp = 0;
+
+ temp = complex_data_ptr[index[m]]; /* Real and imaginary */
+ complex_data_ptr[index[m]] = complex_data_ptr[index[m + 1]];
+ complex_data_ptr[index[m + 1]] = temp;
+ }
+ }
+ else {
+ int m = 0, mr = 0, l = 0;
+ int n = 1 << stages;
+ int nn = n - 1;
+
+ /* Decimation in time - re-order data */
+ for (m = 1; m <= nn; ++m) {
+ int32_t* complex_data_ptr = (int32_t*)complex_data;
+ int32_t temp = 0;
+
+ /* Find out indexes that are bit-reversed. */
+ l = n;
+ do {
+ l >>= 1;
+ } while (l > nn - mr);
+ mr = (mr & (l - 1)) + l;
+
+ if (mr <= m) {
+ continue;
+ }
+
+ /* Swap the elements with bit-reversed indexes.
+ * This is similar to the loop in the stages == 7 or 8 cases.
+ */
+ temp = complex_data_ptr[m]; /* Real and imaginary */
+ complex_data_ptr[m] = complex_data_ptr[mr];
+ complex_data_ptr[mr] = temp;
+ }
+ }
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse_arm.S b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse_arm.S
new file mode 100644
index 00000000..e7f8a819
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse_arm.S
@@ -0,0 +1,119 @@
+@
+@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+@
+@ Use of this source code is governed by a BSD-style license
+@ that can be found in the LICENSE file in the root of the source
+@ tree. An additional intellectual property rights grant can be found
+@ in the file PATENTS. All contributing project authors may
+@ be found in the AUTHORS file in the root of the source tree.
+@
+
+@ This file contains the function WebRtcSpl_ComplexBitReverse(), optimized
+@ for ARMv5 platforms.
+@ Reference C code is in file complex_bit_reverse.c. Bit-exact.
+
+#include "webrtc/system_wrappers/interface/asm_defines.h"
+
+GLOBAL_FUNCTION WebRtcSpl_ComplexBitReverse
+.align 2
+DEFINE_FUNCTION WebRtcSpl_ComplexBitReverse
+ push {r4-r7}
+
+ cmp r1, #7
+ adr r3, index_7 @ Table pointer.
+ mov r4, #112 @ Number of interations.
+ beq PRE_LOOP_STAGES_7_OR_8
+
+ cmp r1, #8
+ adr r3, index_8 @ Table pointer.
+ mov r4, #240 @ Number of interations.
+ beq PRE_LOOP_STAGES_7_OR_8
+
+ mov r3, #1 @ Initialize m.
+ mov r1, r3, asl r1 @ n = 1 << stages;
+ subs r6, r1, #1 @ nn = n - 1;
+ ble END
+
+ mov r5, r0 @ &complex_data
+ mov r4, #0 @ ml
+
+LOOP_GENERIC:
+ rsb r12, r4, r6 @ l > nn - mr
+ mov r2, r1 @ n
+
+LOOP_SHIFT:
+ asr r2, #1 @ l >>= 1;
+ cmp r2, r12
+ bgt LOOP_SHIFT
+
+ sub r12, r2, #1
+ and r4, r12, r4
+ add r4, r2 @ mr = (mr & (l - 1)) + l;
+ cmp r4, r3 @ mr <= m ?
+ ble UPDATE_REGISTERS
+
+ mov r12, r4, asl #2
+ ldr r7, [r5, #4] @ complex_data[2 * m, 2 * m + 1].
+ @ Offset 4 due to m incrementing from 1.
+ ldr r2, [r0, r12] @ complex_data[2 * mr, 2 * mr + 1].
+ str r7, [r0, r12]
+ str r2, [r5, #4]
+
+UPDATE_REGISTERS:
+ add r3, r3, #1
+ add r5, #4
+ cmp r3, r1
+ bne LOOP_GENERIC
+
+ b END
+
+PRE_LOOP_STAGES_7_OR_8:
+ add r4, r3, r4, asl #1
+
+LOOP_STAGES_7_OR_8:
+ ldrsh r2, [r3], #2 @ index[m]
+ ldrsh r5, [r3], #2 @ index[m + 1]
+ ldr r1, [r0, r2] @ complex_data[index[m], index[m] + 1]
+ ldr r12, [r0, r5] @ complex_data[index[m + 1], index[m + 1] + 1]
+ cmp r3, r4
+ str r1, [r0, r5]
+ str r12, [r0, r2]
+ bne LOOP_STAGES_7_OR_8
+
+END:
+ pop {r4-r7}
+ bx lr
+
+@ The index tables. Note the values are doubles of the actual indexes for 16-bit
+@ elements, different from the generic C code. It actually provides byte offsets
+@ for the indexes.
+
+.align 2
+index_7: @ Indexes for stages == 7.
+ .short 4, 256, 8, 128, 12, 384, 16, 64, 20, 320, 24, 192, 28, 448, 36, 288
+ .short 40, 160, 44, 416, 48, 96, 52, 352, 56, 224, 60, 480, 68, 272, 72, 144
+ .short 76, 400, 84, 336, 88, 208, 92, 464, 100, 304, 104, 176, 108, 432, 116
+ .short 368, 120, 240, 124, 496, 132, 264, 140, 392, 148, 328, 152, 200, 156
+ .short 456, 164, 296, 172, 424, 180, 360, 184, 232, 188, 488, 196, 280, 204
+ .short 408, 212, 344, 220, 472, 228, 312, 236, 440, 244, 376, 252, 504, 268
+ .short 388, 276, 324, 284, 452, 300, 420, 308, 356, 316, 484, 332, 404, 348
+ .short 468, 364, 436, 380, 500, 412, 460, 444, 492
+
+index_8: @ Indexes for stages == 8.
+ .short 4, 512, 8, 256, 12, 768, 16, 128, 20, 640, 24, 384, 28, 896, 32, 64
+ .short 36, 576, 40, 320, 44, 832, 48, 192, 52, 704, 56, 448, 60, 960, 68, 544
+ .short 72, 288, 76, 800, 80, 160, 84, 672, 88, 416, 92, 928, 100, 608, 104
+ .short 352, 108, 864, 112, 224, 116, 736, 120, 480, 124, 992, 132, 528, 136
+ .short 272, 140, 784, 148, 656, 152, 400, 156, 912, 164, 592, 168, 336, 172
+ .short 848, 176, 208, 180, 720, 184, 464, 188, 976, 196, 560, 200, 304, 204
+ .short 816, 212, 688, 216, 432, 220, 944, 228, 624, 232, 368, 236, 880, 244
+ .short 752, 248, 496, 252, 1008, 260, 520, 268, 776, 276, 648, 280, 392, 284
+ .short 904, 292, 584, 296, 328, 300, 840, 308, 712, 312, 456, 316, 968, 324
+ .short 552, 332, 808, 340, 680, 344, 424, 348, 936, 356, 616, 364, 872, 372
+ .short 744, 376, 488, 380, 1000, 388, 536, 396, 792, 404, 664, 412, 920, 420
+ .short 600, 428, 856, 436, 728, 440, 472, 444, 984, 452, 568, 460, 824, 468
+ .short 696, 476, 952, 484, 632, 492, 888, 500, 760, 508, 1016, 524, 772, 532
+ .short 644, 540, 900, 548, 580, 556, 836, 564, 708, 572, 964, 588, 804, 596
+ .short 676, 604, 932, 620, 868, 628, 740, 636, 996, 652, 788, 668, 916, 684
+ .short 852, 692, 724, 700, 980, 716, 820, 732, 948, 748, 884, 764, 1012, 796
+ .short 908, 812, 844, 828, 972, 860, 940, 892, 1004, 956, 988
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse_mips.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse_mips.c
new file mode 100644
index 00000000..583fe4f6
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse_mips.c
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+static int16_t coefTable_7[] = {
+ 4, 256, 8, 128, 12, 384, 16, 64,
+ 20, 320, 24, 192, 28, 448, 36, 288,
+ 40, 160, 44, 416, 48, 96, 52, 352,
+ 56, 224, 60, 480, 68, 272, 72, 144,
+ 76, 400, 84, 336, 88, 208, 92, 464,
+ 100, 304, 104, 176, 108, 432, 116, 368,
+ 120, 240, 124, 496, 132, 264, 140, 392,
+ 148, 328, 152, 200, 156, 456, 164, 296,
+ 172, 424, 180, 360, 184, 232, 188, 488,
+ 196, 280, 204, 408, 212, 344, 220, 472,
+ 228, 312, 236, 440, 244, 376, 252, 504,
+ 268, 388, 276, 324, 284, 452, 300, 420,
+ 308, 356, 316, 484, 332, 404, 348, 468,
+ 364, 436, 380, 500, 412, 460, 444, 492
+};
+
+static int16_t coefTable_8[] = {
+ 4, 512, 8, 256, 12, 768, 16, 128,
+ 20, 640, 24, 384, 28, 896, 32, 64,
+ 36, 576, 40, 320, 44, 832, 48, 192,
+ 52, 704, 56, 448, 60, 960, 68, 544,
+ 72, 288, 76, 800, 80, 160, 84, 672,
+ 88, 416, 92, 928, 100, 608, 104, 352,
+ 108, 864, 112, 224, 116, 736, 120, 480,
+ 124, 992, 132, 528, 136, 272, 140, 784,
+ 148, 656, 152, 400, 156, 912, 164, 592,
+ 168, 336, 172, 848, 176, 208, 180, 720,
+ 184, 464, 188, 976, 196, 560, 200, 304,
+ 204, 816, 212, 688, 216, 432, 220, 944,
+ 228, 624, 232, 368, 236, 880, 244, 752,
+ 248, 496, 252, 1008, 260, 520, 268, 776,
+ 276, 648, 280, 392, 284, 904, 292, 584,
+ 296, 328, 300, 840, 308, 712, 312, 456,
+ 316, 968, 324, 552, 332, 808, 340, 680,
+ 344, 424, 348, 936, 356, 616, 364, 872,
+ 372, 744, 376, 488, 380, 1000, 388, 536,
+ 396, 792, 404, 664, 412, 920, 420, 600,
+ 428, 856, 436, 728, 440, 472, 444, 984,
+ 452, 568, 460, 824, 468, 696, 476, 952,
+ 484, 632, 492, 888, 500, 760, 508, 1016,
+ 524, 772, 532, 644, 540, 900, 548, 580,
+ 556, 836, 564, 708, 572, 964, 588, 804,
+ 596, 676, 604, 932, 620, 868, 628, 740,
+ 636, 996, 652, 788, 668, 916, 684, 852,
+ 692, 724, 700, 980, 716, 820, 732, 948,
+ 748, 884, 764, 1012, 796, 908, 812, 844,
+ 828, 972, 860, 940, 892, 1004, 956, 988
+};
+
+void WebRtcSpl_ComplexBitReverse(int16_t frfi[], int stages) {
+ int l;
+ int16_t tr, ti;
+ int32_t tmp1, tmp2, tmp3, tmp4;
+ int32_t* ptr_i;
+ int32_t* ptr_j;
+
+ if (stages == 8) {
+ int16_t* pcoeftable_8 = coefTable_8;
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "addiu %[l], $zero, 120 \n\t"
+ "1: \n\t"
+ "addiu %[l], %[l], -4 \n\t"
+ "lh %[tr], 0(%[pcoeftable_8]) \n\t"
+ "lh %[ti], 2(%[pcoeftable_8]) \n\t"
+ "lh %[tmp3], 4(%[pcoeftable_8]) \n\t"
+ "lh %[tmp4], 6(%[pcoeftable_8]) \n\t"
+ "addu %[ptr_i], %[frfi], %[tr] \n\t"
+ "addu %[ptr_j], %[frfi], %[ti] \n\t"
+ "addu %[tr], %[frfi], %[tmp3] \n\t"
+ "addu %[ti], %[frfi], %[tmp4] \n\t"
+ "ulw %[tmp1], 0(%[ptr_i]) \n\t"
+ "ulw %[tmp2], 0(%[ptr_j]) \n\t"
+ "ulw %[tmp3], 0(%[tr]) \n\t"
+ "ulw %[tmp4], 0(%[ti]) \n\t"
+ "usw %[tmp1], 0(%[ptr_j]) \n\t"
+ "usw %[tmp2], 0(%[ptr_i]) \n\t"
+ "usw %[tmp4], 0(%[tr]) \n\t"
+ "usw %[tmp3], 0(%[ti]) \n\t"
+ "lh %[tmp1], 8(%[pcoeftable_8]) \n\t"
+ "lh %[tmp2], 10(%[pcoeftable_8]) \n\t"
+ "lh %[tr], 12(%[pcoeftable_8]) \n\t"
+ "lh %[ti], 14(%[pcoeftable_8]) \n\t"
+ "addu %[ptr_i], %[frfi], %[tmp1] \n\t"
+ "addu %[ptr_j], %[frfi], %[tmp2] \n\t"
+ "addu %[tr], %[frfi], %[tr] \n\t"
+ "addu %[ti], %[frfi], %[ti] \n\t"
+ "ulw %[tmp1], 0(%[ptr_i]) \n\t"
+ "ulw %[tmp2], 0(%[ptr_j]) \n\t"
+ "ulw %[tmp3], 0(%[tr]) \n\t"
+ "ulw %[tmp4], 0(%[ti]) \n\t"
+ "usw %[tmp1], 0(%[ptr_j]) \n\t"
+ "usw %[tmp2], 0(%[ptr_i]) \n\t"
+ "usw %[tmp4], 0(%[tr]) \n\t"
+ "usw %[tmp3], 0(%[ti]) \n\t"
+ "bgtz %[l], 1b \n\t"
+ " addiu %[pcoeftable_8], %[pcoeftable_8], 16 \n\t"
+ ".set pop \n\t"
+
+ : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [ptr_i] "=&r" (ptr_i),
+ [ptr_j] "=&r" (ptr_j), [tr] "=&r" (tr), [l] "=&r" (l),
+ [tmp3] "=&r" (tmp3), [pcoeftable_8] "+r" (pcoeftable_8),
+ [ti] "=&r" (ti), [tmp4] "=&r" (tmp4)
+ : [frfi] "r" (frfi)
+ : "memory"
+ );
+ } else if (stages == 7) {
+ int16_t* pcoeftable_7 = coefTable_7;
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "addiu %[l], $zero, 56 \n\t"
+ "1: \n\t"
+ "addiu %[l], %[l], -4 \n\t"
+ "lh %[tr], 0(%[pcoeftable_7]) \n\t"
+ "lh %[ti], 2(%[pcoeftable_7]) \n\t"
+ "lh %[tmp3], 4(%[pcoeftable_7]) \n\t"
+ "lh %[tmp4], 6(%[pcoeftable_7]) \n\t"
+ "addu %[ptr_i], %[frfi], %[tr] \n\t"
+ "addu %[ptr_j], %[frfi], %[ti] \n\t"
+ "addu %[tr], %[frfi], %[tmp3] \n\t"
+ "addu %[ti], %[frfi], %[tmp4] \n\t"
+ "ulw %[tmp1], 0(%[ptr_i]) \n\t"
+ "ulw %[tmp2], 0(%[ptr_j]) \n\t"
+ "ulw %[tmp3], 0(%[tr]) \n\t"
+ "ulw %[tmp4], 0(%[ti]) \n\t"
+ "usw %[tmp1], 0(%[ptr_j]) \n\t"
+ "usw %[tmp2], 0(%[ptr_i]) \n\t"
+ "usw %[tmp4], 0(%[tr]) \n\t"
+ "usw %[tmp3], 0(%[ti]) \n\t"
+ "lh %[tmp1], 8(%[pcoeftable_7]) \n\t"
+ "lh %[tmp2], 10(%[pcoeftable_7]) \n\t"
+ "lh %[tr], 12(%[pcoeftable_7]) \n\t"
+ "lh %[ti], 14(%[pcoeftable_7]) \n\t"
+ "addu %[ptr_i], %[frfi], %[tmp1] \n\t"
+ "addu %[ptr_j], %[frfi], %[tmp2] \n\t"
+ "addu %[tr], %[frfi], %[tr] \n\t"
+ "addu %[ti], %[frfi], %[ti] \n\t"
+ "ulw %[tmp1], 0(%[ptr_i]) \n\t"
+ "ulw %[tmp2], 0(%[ptr_j]) \n\t"
+ "ulw %[tmp3], 0(%[tr]) \n\t"
+ "ulw %[tmp4], 0(%[ti]) \n\t"
+ "usw %[tmp1], 0(%[ptr_j]) \n\t"
+ "usw %[tmp2], 0(%[ptr_i]) \n\t"
+ "usw %[tmp4], 0(%[tr]) \n\t"
+ "usw %[tmp3], 0(%[ti]) \n\t"
+ "bgtz %[l], 1b \n\t"
+ " addiu %[pcoeftable_7], %[pcoeftable_7], 16 \n\t"
+ ".set pop \n\t"
+
+ : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [ptr_i] "=&r" (ptr_i),
+ [ptr_j] "=&r" (ptr_j), [ti] "=&r" (ti), [tr] "=&r" (tr),
+ [l] "=&r" (l), [pcoeftable_7] "+r" (pcoeftable_7),
+ [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4)
+ : [frfi] "r" (frfi)
+ : "memory"
+ );
+ }
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft.c
new file mode 100644
index 00000000..97ebacc4
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft.c
@@ -0,0 +1,298 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_ComplexFFT().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/complex_fft_tables.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+#define CFFTSFT 14
+#define CFFTRND 1
+#define CFFTRND2 16384
+
+#define CIFFTSFT 14
+#define CIFFTRND 1
+
+
+int WebRtcSpl_ComplexFFT(int16_t frfi[], int stages, int mode)
+{
+ int i, j, l, k, istep, n, m;
+ int16_t wr, wi;
+ int32_t tr32, ti32, qr32, qi32;
+
+ /* The 1024-value is a constant given from the size of kSinTable1024[],
+ * and should not be changed depending on the input parameter 'stages'
+ */
+ n = 1 << stages;
+ if (n > 1024)
+ return -1;
+
+ l = 1;
+ k = 10 - 1; /* Constant for given kSinTable1024[]. Do not change
+ depending on the input parameter 'stages' */
+
+ if (mode == 0)
+ {
+ // mode==0: Low-complexity and Low-accuracy mode
+ while (l < n)
+ {
+ istep = l << 1;
+
+ for (m = 0; m < l; ++m)
+ {
+ j = m << k;
+
+ /* The 256-value is a constant given as 1/4 of the size of
+ * kSinTable1024[], and should not be changed depending on the input
+ * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
+ */
+ wr = kSinTable1024[j + 256];
+ wi = -kSinTable1024[j];
+
+ for (i = m; i < n; i += istep)
+ {
+ j = i + l;
+
+ tr32 = (wr * frfi[2 * j] - wi * frfi[2 * j + 1]) >> 15;
+
+ ti32 = (wr * frfi[2 * j + 1] + wi * frfi[2 * j]) >> 15;
+
+ qr32 = (int32_t)frfi[2 * i];
+ qi32 = (int32_t)frfi[2 * i + 1];
+ frfi[2 * j] = (int16_t)((qr32 - tr32) >> 1);
+ frfi[2 * j + 1] = (int16_t)((qi32 - ti32) >> 1);
+ frfi[2 * i] = (int16_t)((qr32 + tr32) >> 1);
+ frfi[2 * i + 1] = (int16_t)((qi32 + ti32) >> 1);
+ }
+ }
+
+ --k;
+ l = istep;
+
+ }
+
+ } else
+ {
+ // mode==1: High-complexity and High-accuracy mode
+ while (l < n)
+ {
+ istep = l << 1;
+
+ for (m = 0; m < l; ++m)
+ {
+ j = m << k;
+
+ /* The 256-value is a constant given as 1/4 of the size of
+ * kSinTable1024[], and should not be changed depending on the input
+ * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
+ */
+ wr = kSinTable1024[j + 256];
+ wi = -kSinTable1024[j];
+
+#ifdef WEBRTC_ARCH_ARM_V7
+ int32_t wri = 0;
+ __asm __volatile("pkhbt %0, %1, %2, lsl #16" : "=r"(wri) :
+ "r"((int32_t)wr), "r"((int32_t)wi));
+#endif
+
+ for (i = m; i < n; i += istep)
+ {
+ j = i + l;
+
+#ifdef WEBRTC_ARCH_ARM_V7
+ register int32_t frfi_r;
+ __asm __volatile(
+ "pkhbt %[frfi_r], %[frfi_even], %[frfi_odd],"
+ " lsl #16\n\t"
+ "smlsd %[tr32], %[wri], %[frfi_r], %[cfftrnd]\n\t"
+ "smladx %[ti32], %[wri], %[frfi_r], %[cfftrnd]\n\t"
+ :[frfi_r]"=&r"(frfi_r),
+ [tr32]"=&r"(tr32),
+ [ti32]"=r"(ti32)
+ :[frfi_even]"r"((int32_t)frfi[2*j]),
+ [frfi_odd]"r"((int32_t)frfi[2*j +1]),
+ [wri]"r"(wri),
+ [cfftrnd]"r"(CFFTRND));
+#else
+ tr32 = wr * frfi[2 * j] - wi * frfi[2 * j + 1] + CFFTRND;
+
+ ti32 = wr * frfi[2 * j + 1] + wi * frfi[2 * j] + CFFTRND;
+#endif
+
+ tr32 >>= 15 - CFFTSFT;
+ ti32 >>= 15 - CFFTSFT;
+
+ qr32 = ((int32_t)frfi[2 * i]) << CFFTSFT;
+ qi32 = ((int32_t)frfi[2 * i + 1]) << CFFTSFT;
+
+ frfi[2 * j] = (int16_t)(
+ (qr32 - tr32 + CFFTRND2) >> (1 + CFFTSFT));
+ frfi[2 * j + 1] = (int16_t)(
+ (qi32 - ti32 + CFFTRND2) >> (1 + CFFTSFT));
+ frfi[2 * i] = (int16_t)(
+ (qr32 + tr32 + CFFTRND2) >> (1 + CFFTSFT));
+ frfi[2 * i + 1] = (int16_t)(
+ (qi32 + ti32 + CFFTRND2) >> (1 + CFFTSFT));
+ }
+ }
+
+ --k;
+ l = istep;
+ }
+ }
+ return 0;
+}
+
+int WebRtcSpl_ComplexIFFT(int16_t frfi[], int stages, int mode)
+{
+ size_t i, j, l, istep, n, m;
+ int k, scale, shift;
+ int16_t wr, wi;
+ int32_t tr32, ti32, qr32, qi32;
+ int32_t tmp32, round2;
+
+ /* The 1024-value is a constant given from the size of kSinTable1024[],
+ * and should not be changed depending on the input parameter 'stages'
+ */
+ n = 1 << stages;
+ if (n > 1024)
+ return -1;
+
+ scale = 0;
+
+ l = 1;
+ k = 10 - 1; /* Constant for given kSinTable1024[]. Do not change
+ depending on the input parameter 'stages' */
+
+ while (l < n)
+ {
+ // variable scaling, depending upon data
+ shift = 0;
+ round2 = 8192;
+
+ tmp32 = WebRtcSpl_MaxAbsValueW16(frfi, 2 * n);
+ if (tmp32 > 13573)
+ {
+ shift++;
+ scale++;
+ round2 <<= 1;
+ }
+ if (tmp32 > 27146)
+ {
+ shift++;
+ scale++;
+ round2 <<= 1;
+ }
+
+ istep = l << 1;
+
+ if (mode == 0)
+ {
+ // mode==0: Low-complexity and Low-accuracy mode
+ for (m = 0; m < l; ++m)
+ {
+ j = m << k;
+
+ /* The 256-value is a constant given as 1/4 of the size of
+ * kSinTable1024[], and should not be changed depending on the input
+ * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
+ */
+ wr = kSinTable1024[j + 256];
+ wi = kSinTable1024[j];
+
+ for (i = m; i < n; i += istep)
+ {
+ j = i + l;
+
+ tr32 = (wr * frfi[2 * j] - wi * frfi[2 * j + 1]) >> 15;
+
+ ti32 = (wr * frfi[2 * j + 1] + wi * frfi[2 * j]) >> 15;
+
+ qr32 = (int32_t)frfi[2 * i];
+ qi32 = (int32_t)frfi[2 * i + 1];
+ frfi[2 * j] = (int16_t)((qr32 - tr32) >> shift);
+ frfi[2 * j + 1] = (int16_t)((qi32 - ti32) >> shift);
+ frfi[2 * i] = (int16_t)((qr32 + tr32) >> shift);
+ frfi[2 * i + 1] = (int16_t)((qi32 + ti32) >> shift);
+ }
+ }
+ } else
+ {
+ // mode==1: High-complexity and High-accuracy mode
+
+ for (m = 0; m < l; ++m)
+ {
+ j = m << k;
+
+ /* The 256-value is a constant given as 1/4 of the size of
+ * kSinTable1024[], and should not be changed depending on the input
+ * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
+ */
+ wr = kSinTable1024[j + 256];
+ wi = kSinTable1024[j];
+
+#ifdef WEBRTC_ARCH_ARM_V7
+ int32_t wri = 0;
+ __asm __volatile("pkhbt %0, %1, %2, lsl #16" : "=r"(wri) :
+ "r"((int32_t)wr), "r"((int32_t)wi));
+#endif
+
+ for (i = m; i < n; i += istep)
+ {
+ j = i + l;
+
+#ifdef WEBRTC_ARCH_ARM_V7
+ register int32_t frfi_r;
+ __asm __volatile(
+ "pkhbt %[frfi_r], %[frfi_even], %[frfi_odd], lsl #16\n\t"
+ "smlsd %[tr32], %[wri], %[frfi_r], %[cifftrnd]\n\t"
+ "smladx %[ti32], %[wri], %[frfi_r], %[cifftrnd]\n\t"
+ :[frfi_r]"=&r"(frfi_r),
+ [tr32]"=&r"(tr32),
+ [ti32]"=r"(ti32)
+ :[frfi_even]"r"((int32_t)frfi[2*j]),
+ [frfi_odd]"r"((int32_t)frfi[2*j +1]),
+ [wri]"r"(wri),
+ [cifftrnd]"r"(CIFFTRND)
+ );
+#else
+
+ tr32 = wr * frfi[2 * j] - wi * frfi[2 * j + 1] + CIFFTRND;
+
+ ti32 = wr * frfi[2 * j + 1] + wi * frfi[2 * j] + CIFFTRND;
+#endif
+ tr32 >>= 15 - CIFFTSFT;
+ ti32 >>= 15 - CIFFTSFT;
+
+ qr32 = ((int32_t)frfi[2 * i]) << CIFFTSFT;
+ qi32 = ((int32_t)frfi[2 * i + 1]) << CIFFTSFT;
+
+ frfi[2 * j] = (int16_t)(
+ (qr32 - tr32 + round2) >> (shift + CIFFTSFT));
+ frfi[2 * j + 1] = (int16_t)(
+ (qi32 - ti32 + round2) >> (shift + CIFFTSFT));
+ frfi[2 * i] = (int16_t)(
+ (qr32 + tr32 + round2) >> (shift + CIFFTSFT));
+ frfi[2 * i + 1] = (int16_t)(
+ (qi32 + ti32 + round2) >> (shift + CIFFTSFT));
+ }
+ }
+
+ }
+ --k;
+ l = istep;
+ }
+ return scale;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft_mips.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft_mips.c
new file mode 100644
index 00000000..34c4f232
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft_mips.c
@@ -0,0 +1,328 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "webrtc/common_audio/signal_processing/complex_fft_tables.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+#define CFFTSFT 14
+#define CFFTRND 1
+#define CFFTRND2 16384
+
+#define CIFFTSFT 14
+#define CIFFTRND 1
+
+int WebRtcSpl_ComplexFFT(int16_t frfi[], int stages, int mode) {
+ int i = 0;
+ int l = 0;
+ int k = 0;
+ int istep = 0;
+ int n = 0;
+ int m = 0;
+ int32_t wr = 0, wi = 0;
+ int32_t tmp1 = 0;
+ int32_t tmp2 = 0;
+ int32_t tmp3 = 0;
+ int32_t tmp4 = 0;
+ int32_t tmp5 = 0;
+ int32_t tmp6 = 0;
+ int32_t tmp = 0;
+ int16_t* ptr_j = NULL;
+ int16_t* ptr_i = NULL;
+
+ n = 1 << stages;
+ if (n > 1024) {
+ return -1;
+ }
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+
+ "addiu %[k], $zero, 10 \n\t"
+ "addiu %[l], $zero, 1 \n\t"
+ "3: \n\t"
+ "sll %[istep], %[l], 1 \n\t"
+ "move %[m], $zero \n\t"
+ "sll %[tmp], %[l], 2 \n\t"
+ "move %[i], $zero \n\t"
+ "2: \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "sllv %[tmp3], %[m], %[k] \n\t"
+ "addiu %[tmp2], %[tmp3], 512 \n\t"
+ "addiu %[m], %[m], 1 \n\t"
+ "lhx %[wi], %[tmp3](%[kSinTable1024]) \n\t"
+ "lhx %[wr], %[tmp2](%[kSinTable1024]) \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "sllv %[tmp3], %[m], %[k] \n\t"
+ "addu %[ptr_j], %[tmp3], %[kSinTable1024] \n\t"
+ "addiu %[ptr_i], %[ptr_j], 512 \n\t"
+ "addiu %[m], %[m], 1 \n\t"
+ "lh %[wi], 0(%[ptr_j]) \n\t"
+ "lh %[wr], 0(%[ptr_i]) \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "1: \n\t"
+ "sll %[tmp1], %[i], 2 \n\t"
+ "addu %[ptr_i], %[frfi], %[tmp1] \n\t"
+ "addu %[ptr_j], %[ptr_i], %[tmp] \n\t"
+ "lh %[tmp6], 0(%[ptr_i]) \n\t"
+ "lh %[tmp5], 2(%[ptr_i]) \n\t"
+ "lh %[tmp3], 0(%[ptr_j]) \n\t"
+ "lh %[tmp4], 2(%[ptr_j]) \n\t"
+ "addu %[i], %[i], %[istep] \n\t"
+#if defined(MIPS_DSP_R2_LE)
+ "mult %[wr], %[tmp3] \n\t"
+ "madd %[wi], %[tmp4] \n\t"
+ "mult $ac1, %[wr], %[tmp4] \n\t"
+ "msub $ac1, %[wi], %[tmp3] \n\t"
+ "mflo %[tmp1] \n\t"
+ "mflo %[tmp2], $ac1 \n\t"
+ "sll %[tmp6], %[tmp6], 14 \n\t"
+ "sll %[tmp5], %[tmp5], 14 \n\t"
+ "shra_r.w %[tmp1], %[tmp1], 1 \n\t"
+ "shra_r.w %[tmp2], %[tmp2], 1 \n\t"
+ "subu %[tmp4], %[tmp6], %[tmp1] \n\t"
+ "addu %[tmp1], %[tmp6], %[tmp1] \n\t"
+ "addu %[tmp6], %[tmp5], %[tmp2] \n\t"
+ "subu %[tmp5], %[tmp5], %[tmp2] \n\t"
+ "shra_r.w %[tmp1], %[tmp1], 15 \n\t"
+ "shra_r.w %[tmp6], %[tmp6], 15 \n\t"
+ "shra_r.w %[tmp4], %[tmp4], 15 \n\t"
+ "shra_r.w %[tmp5], %[tmp5], 15 \n\t"
+#else // #if defined(MIPS_DSP_R2_LE)
+ "mul %[tmp2], %[wr], %[tmp4] \n\t"
+ "mul %[tmp1], %[wr], %[tmp3] \n\t"
+ "mul %[tmp4], %[wi], %[tmp4] \n\t"
+ "mul %[tmp3], %[wi], %[tmp3] \n\t"
+ "sll %[tmp6], %[tmp6], 14 \n\t"
+ "sll %[tmp5], %[tmp5], 14 \n\t"
+ "addiu %[tmp6], %[tmp6], 16384 \n\t"
+ "addiu %[tmp5], %[tmp5], 16384 \n\t"
+ "addu %[tmp1], %[tmp1], %[tmp4] \n\t"
+ "subu %[tmp2], %[tmp2], %[tmp3] \n\t"
+ "addiu %[tmp1], %[tmp1], 1 \n\t"
+ "addiu %[tmp2], %[tmp2], 1 \n\t"
+ "sra %[tmp1], %[tmp1], 1 \n\t"
+ "sra %[tmp2], %[tmp2], 1 \n\t"
+ "subu %[tmp4], %[tmp6], %[tmp1] \n\t"
+ "addu %[tmp1], %[tmp6], %[tmp1] \n\t"
+ "addu %[tmp6], %[tmp5], %[tmp2] \n\t"
+ "subu %[tmp5], %[tmp5], %[tmp2] \n\t"
+ "sra %[tmp4], %[tmp4], 15 \n\t"
+ "sra %[tmp1], %[tmp1], 15 \n\t"
+ "sra %[tmp6], %[tmp6], 15 \n\t"
+ "sra %[tmp5], %[tmp5], 15 \n\t"
+#endif // #if defined(MIPS_DSP_R2_LE)
+ "sh %[tmp1], 0(%[ptr_i]) \n\t"
+ "sh %[tmp6], 2(%[ptr_i]) \n\t"
+ "sh %[tmp4], 0(%[ptr_j]) \n\t"
+ "blt %[i], %[n], 1b \n\t"
+ " sh %[tmp5], 2(%[ptr_j]) \n\t"
+ "blt %[m], %[l], 2b \n\t"
+ " addu %[i], $zero, %[m] \n\t"
+ "move %[l], %[istep] \n\t"
+ "blt %[l], %[n], 3b \n\t"
+ " addiu %[k], %[k], -1 \n\t"
+
+ ".set pop \n\t"
+
+ : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
+ [tmp4] "=&r" (tmp4), [tmp5] "=&r" (tmp5), [tmp6] "=&r" (tmp6),
+ [ptr_i] "=&r" (ptr_i), [i] "=&r" (i), [wi] "=&r" (wi), [wr] "=&r" (wr),
+ [m] "=&r" (m), [istep] "=&r" (istep), [l] "=&r" (l), [k] "=&r" (k),
+ [ptr_j] "=&r" (ptr_j), [tmp] "=&r" (tmp)
+ : [n] "r" (n), [frfi] "r" (frfi), [kSinTable1024] "r" (kSinTable1024)
+ : "hi", "lo", "memory"
+#if defined(MIPS_DSP_R2_LE)
+ , "$ac1hi", "$ac1lo"
+#endif // #if defined(MIPS_DSP_R2_LE)
+ );
+
+ return 0;
+}
+
+int WebRtcSpl_ComplexIFFT(int16_t frfi[], int stages, int mode) {
+ int i = 0, l = 0, k = 0;
+ int istep = 0, n = 0, m = 0;
+ int scale = 0, shift = 0;
+ int32_t wr = 0, wi = 0;
+ int32_t tmp1 = 0, tmp2 = 0, tmp3 = 0, tmp4 = 0;
+ int32_t tmp5 = 0, tmp6 = 0, tmp = 0, tempMax = 0, round2 = 0;
+ int16_t* ptr_j = NULL;
+ int16_t* ptr_i = NULL;
+
+ n = 1 << stages;
+ if (n > 1024) {
+ return -1;
+ }
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+
+ "addiu %[k], $zero, 10 \n\t"
+ "addiu %[l], $zero, 1 \n\t"
+ "move %[scale], $zero \n\t"
+ "3: \n\t"
+ "addiu %[shift], $zero, 14 \n\t"
+ "addiu %[round2], $zero, 8192 \n\t"
+ "move %[ptr_i], %[frfi] \n\t"
+ "move %[tempMax], $zero \n\t"
+ "addu %[i], %[n], %[n] \n\t"
+ "5: \n\t"
+ "lh %[tmp1], 0(%[ptr_i]) \n\t"
+ "lh %[tmp2], 2(%[ptr_i]) \n\t"
+ "lh %[tmp3], 4(%[ptr_i]) \n\t"
+ "lh %[tmp4], 6(%[ptr_i]) \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "absq_s.w %[tmp1], %[tmp1] \n\t"
+ "absq_s.w %[tmp2], %[tmp2] \n\t"
+ "absq_s.w %[tmp3], %[tmp3] \n\t"
+ "absq_s.w %[tmp4], %[tmp4] \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "slt %[tmp5], %[tmp1], $zero \n\t"
+ "subu %[tmp6], $zero, %[tmp1] \n\t"
+ "movn %[tmp1], %[tmp6], %[tmp5] \n\t"
+ "slt %[tmp5], %[tmp2], $zero \n\t"
+ "subu %[tmp6], $zero, %[tmp2] \n\t"
+ "movn %[tmp2], %[tmp6], %[tmp5] \n\t"
+ "slt %[tmp5], %[tmp3], $zero \n\t"
+ "subu %[tmp6], $zero, %[tmp3] \n\t"
+ "movn %[tmp3], %[tmp6], %[tmp5] \n\t"
+ "slt %[tmp5], %[tmp4], $zero \n\t"
+ "subu %[tmp6], $zero, %[tmp4] \n\t"
+ "movn %[tmp4], %[tmp6], %[tmp5] \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "slt %[tmp5], %[tempMax], %[tmp1] \n\t"
+ "movn %[tempMax], %[tmp1], %[tmp5] \n\t"
+ "addiu %[i], %[i], -4 \n\t"
+ "slt %[tmp5], %[tempMax], %[tmp2] \n\t"
+ "movn %[tempMax], %[tmp2], %[tmp5] \n\t"
+ "slt %[tmp5], %[tempMax], %[tmp3] \n\t"
+ "movn %[tempMax], %[tmp3], %[tmp5] \n\t"
+ "slt %[tmp5], %[tempMax], %[tmp4] \n\t"
+ "movn %[tempMax], %[tmp4], %[tmp5] \n\t"
+ "bgtz %[i], 5b \n\t"
+ " addiu %[ptr_i], %[ptr_i], 8 \n\t"
+ "addiu %[tmp1], $zero, 13573 \n\t"
+ "addiu %[tmp2], $zero, 27146 \n\t"
+#if !defined(MIPS32_R2_LE)
+ "sll %[tempMax], %[tempMax], 16 \n\t"
+ "sra %[tempMax], %[tempMax], 16 \n\t"
+#else // #if !defined(MIPS32_R2_LE)
+ "seh %[tempMax] \n\t"
+#endif // #if !defined(MIPS32_R2_LE)
+ "slt %[tmp1], %[tmp1], %[tempMax] \n\t"
+ "slt %[tmp2], %[tmp2], %[tempMax] \n\t"
+ "addu %[tmp1], %[tmp1], %[tmp2] \n\t"
+ "addu %[shift], %[shift], %[tmp1] \n\t"
+ "addu %[scale], %[scale], %[tmp1] \n\t"
+ "sllv %[round2], %[round2], %[tmp1] \n\t"
+ "sll %[istep], %[l], 1 \n\t"
+ "move %[m], $zero \n\t"
+ "sll %[tmp], %[l], 2 \n\t"
+ "2: \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "sllv %[tmp3], %[m], %[k] \n\t"
+ "addiu %[tmp2], %[tmp3], 512 \n\t"
+ "addiu %[m], %[m], 1 \n\t"
+ "lhx %[wi], %[tmp3](%[kSinTable1024]) \n\t"
+ "lhx %[wr], %[tmp2](%[kSinTable1024]) \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "sllv %[tmp3], %[m], %[k] \n\t"
+ "addu %[ptr_j], %[tmp3], %[kSinTable1024] \n\t"
+ "addiu %[ptr_i], %[ptr_j], 512 \n\t"
+ "addiu %[m], %[m], 1 \n\t"
+ "lh %[wi], 0(%[ptr_j]) \n\t"
+ "lh %[wr], 0(%[ptr_i]) \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "1: \n\t"
+ "sll %[tmp1], %[i], 2 \n\t"
+ "addu %[ptr_i], %[frfi], %[tmp1] \n\t"
+ "addu %[ptr_j], %[ptr_i], %[tmp] \n\t"
+ "lh %[tmp3], 0(%[ptr_j]) \n\t"
+ "lh %[tmp4], 2(%[ptr_j]) \n\t"
+ "lh %[tmp6], 0(%[ptr_i]) \n\t"
+ "lh %[tmp5], 2(%[ptr_i]) \n\t"
+ "addu %[i], %[i], %[istep] \n\t"
+#if defined(MIPS_DSP_R2_LE)
+ "mult %[wr], %[tmp3] \n\t"
+ "msub %[wi], %[tmp4] \n\t"
+ "mult $ac1, %[wr], %[tmp4] \n\t"
+ "madd $ac1, %[wi], %[tmp3] \n\t"
+ "mflo %[tmp1] \n\t"
+ "mflo %[tmp2], $ac1 \n\t"
+ "sll %[tmp6], %[tmp6], 14 \n\t"
+ "sll %[tmp5], %[tmp5], 14 \n\t"
+ "shra_r.w %[tmp1], %[tmp1], 1 \n\t"
+ "shra_r.w %[tmp2], %[tmp2], 1 \n\t"
+ "addu %[tmp6], %[tmp6], %[round2] \n\t"
+ "addu %[tmp5], %[tmp5], %[round2] \n\t"
+ "subu %[tmp4], %[tmp6], %[tmp1] \n\t"
+ "addu %[tmp1], %[tmp6], %[tmp1] \n\t"
+ "addu %[tmp6], %[tmp5], %[tmp2] \n\t"
+ "subu %[tmp5], %[tmp5], %[tmp2] \n\t"
+ "srav %[tmp4], %[tmp4], %[shift] \n\t"
+ "srav %[tmp1], %[tmp1], %[shift] \n\t"
+ "srav %[tmp6], %[tmp6], %[shift] \n\t"
+ "srav %[tmp5], %[tmp5], %[shift] \n\t"
+#else // #if defined(MIPS_DSP_R2_LE)
+ "mul %[tmp1], %[wr], %[tmp3] \n\t"
+ "mul %[tmp2], %[wr], %[tmp4] \n\t"
+ "mul %[tmp4], %[wi], %[tmp4] \n\t"
+ "mul %[tmp3], %[wi], %[tmp3] \n\t"
+ "sll %[tmp6], %[tmp6], 14 \n\t"
+ "sll %[tmp5], %[tmp5], 14 \n\t"
+ "sub %[tmp1], %[tmp1], %[tmp4] \n\t"
+ "addu %[tmp2], %[tmp2], %[tmp3] \n\t"
+ "addiu %[tmp1], %[tmp1], 1 \n\t"
+ "addiu %[tmp2], %[tmp2], 1 \n\t"
+ "sra %[tmp2], %[tmp2], 1 \n\t"
+ "sra %[tmp1], %[tmp1], 1 \n\t"
+ "addu %[tmp6], %[tmp6], %[round2] \n\t"
+ "addu %[tmp5], %[tmp5], %[round2] \n\t"
+ "subu %[tmp4], %[tmp6], %[tmp1] \n\t"
+ "addu %[tmp1], %[tmp6], %[tmp1] \n\t"
+ "addu %[tmp6], %[tmp5], %[tmp2] \n\t"
+ "subu %[tmp5], %[tmp5], %[tmp2] \n\t"
+ "sra %[tmp4], %[tmp4], %[shift] \n\t"
+ "sra %[tmp1], %[tmp1], %[shift] \n\t"
+ "sra %[tmp6], %[tmp6], %[shift] \n\t"
+ "sra %[tmp5], %[tmp5], %[shift] \n\t"
+#endif // #if defined(MIPS_DSP_R2_LE)
+ "sh %[tmp1], 0(%[ptr_i]) \n\t"
+ "sh %[tmp6], 2(%[ptr_i]) \n\t"
+ "sh %[tmp4], 0(%[ptr_j]) \n\t"
+ "blt %[i], %[n], 1b \n\t"
+ " sh %[tmp5], 2(%[ptr_j]) \n\t"
+ "blt %[m], %[l], 2b \n\t"
+ " addu %[i], $zero, %[m] \n\t"
+ "move %[l], %[istep] \n\t"
+ "blt %[l], %[n], 3b \n\t"
+ " addiu %[k], %[k], -1 \n\t"
+
+ ".set pop \n\t"
+
+ : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
+ [tmp4] "=&r" (tmp4), [tmp5] "=&r" (tmp5), [tmp6] "=&r" (tmp6),
+ [ptr_i] "=&r" (ptr_i), [i] "=&r" (i), [m] "=&r" (m), [tmp] "=&r" (tmp),
+ [istep] "=&r" (istep), [wi] "=&r" (wi), [wr] "=&r" (wr), [l] "=&r" (l),
+ [k] "=&r" (k), [round2] "=&r" (round2), [ptr_j] "=&r" (ptr_j),
+ [shift] "=&r" (shift), [scale] "=&r" (scale), [tempMax] "=&r" (tempMax)
+ : [n] "r" (n), [frfi] "r" (frfi), [kSinTable1024] "r" (kSinTable1024)
+ : "hi", "lo", "memory"
+#if defined(MIPS_DSP_R2_LE)
+ , "$ac1hi", "$ac1lo"
+#endif // #if defined(MIPS_DSP_R2_LE)
+ );
+
+ return scale;
+
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft_tables.h b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft_tables.h
new file mode 100644
index 00000000..ca7b7fe3
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft_tables.h
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef WEBRTC_COMMON_AUDIO_SIGNAL_PROCESSING_COMPLEX_FFT_TABLES_H_
+#define WEBRTC_COMMON_AUDIO_SIGNAL_PROCESSING_COMPLEX_FFT_TABLES_H_
+
+#include "webrtc/typedefs.h"
+
+static const int16_t kSinTable1024[] = {
+ 0, 201, 402, 603, 804, 1005, 1206, 1406,
+ 1607, 1808, 2009, 2209, 2410, 2610, 2811, 3011,
+ 3211, 3411, 3611, 3811, 4011, 4210, 4409, 4608,
+ 4807, 5006, 5205, 5403, 5601, 5799, 5997, 6195,
+ 6392, 6589, 6786, 6982, 7179, 7375, 7571, 7766,
+ 7961, 8156, 8351, 8545, 8739, 8932, 9126, 9319,
+ 9511, 9703, 9895, 10087, 10278, 10469, 10659, 10849,
+ 11038, 11227, 11416, 11604, 11792, 11980, 12166, 12353,
+ 12539, 12724, 12909, 13094, 13278, 13462, 13645, 13827,
+ 14009, 14191, 14372, 14552, 14732, 14911, 15090, 15268,
+ 15446, 15623, 15799, 15975, 16150, 16325, 16499, 16672,
+ 16845, 17017, 17189, 17360, 17530, 17699, 17868, 18036,
+ 18204, 18371, 18537, 18702, 18867, 19031, 19194, 19357,
+ 19519, 19680, 19840, 20000, 20159, 20317, 20474, 20631,
+ 20787, 20942, 21096, 21249, 21402, 21554, 21705, 21855,
+ 22004, 22153, 22301, 22448, 22594, 22739, 22883, 23027,
+ 23169, 23311, 23452, 23592, 23731, 23869, 24006, 24143,
+ 24278, 24413, 24546, 24679, 24811, 24942, 25072, 25201,
+ 25329, 25456, 25582, 25707, 25831, 25954, 26077, 26198,
+ 26318, 26437, 26556, 26673, 26789, 26905, 27019, 27132,
+ 27244, 27355, 27466, 27575, 27683, 27790, 27896, 28001,
+ 28105, 28208, 28309, 28410, 28510, 28608, 28706, 28802,
+ 28897, 28992, 29085, 29177, 29268, 29358, 29446, 29534,
+ 29621, 29706, 29790, 29873, 29955, 30036, 30116, 30195,
+ 30272, 30349, 30424, 30498, 30571, 30643, 30713, 30783,
+ 30851, 30918, 30984, 31049, 31113, 31175, 31236, 31297,
+ 31356, 31413, 31470, 31525, 31580, 31633, 31684, 31735,
+ 31785, 31833, 31880, 31926, 31970, 32014, 32056, 32097,
+ 32137, 32176, 32213, 32249, 32284, 32318, 32350, 32382,
+ 32412, 32441, 32468, 32495, 32520, 32544, 32567, 32588,
+ 32609, 32628, 32646, 32662, 32678, 32692, 32705, 32717,
+ 32727, 32736, 32744, 32751, 32757, 32761, 32764, 32766,
+ 32767, 32766, 32764, 32761, 32757, 32751, 32744, 32736,
+ 32727, 32717, 32705, 32692, 32678, 32662, 32646, 32628,
+ 32609, 32588, 32567, 32544, 32520, 32495, 32468, 32441,
+ 32412, 32382, 32350, 32318, 32284, 32249, 32213, 32176,
+ 32137, 32097, 32056, 32014, 31970, 31926, 31880, 31833,
+ 31785, 31735, 31684, 31633, 31580, 31525, 31470, 31413,
+ 31356, 31297, 31236, 31175, 31113, 31049, 30984, 30918,
+ 30851, 30783, 30713, 30643, 30571, 30498, 30424, 30349,
+ 30272, 30195, 30116, 30036, 29955, 29873, 29790, 29706,
+ 29621, 29534, 29446, 29358, 29268, 29177, 29085, 28992,
+ 28897, 28802, 28706, 28608, 28510, 28410, 28309, 28208,
+ 28105, 28001, 27896, 27790, 27683, 27575, 27466, 27355,
+ 27244, 27132, 27019, 26905, 26789, 26673, 26556, 26437,
+ 26318, 26198, 26077, 25954, 25831, 25707, 25582, 25456,
+ 25329, 25201, 25072, 24942, 24811, 24679, 24546, 24413,
+ 24278, 24143, 24006, 23869, 23731, 23592, 23452, 23311,
+ 23169, 23027, 22883, 22739, 22594, 22448, 22301, 22153,
+ 22004, 21855, 21705, 21554, 21402, 21249, 21096, 20942,
+ 20787, 20631, 20474, 20317, 20159, 20000, 19840, 19680,
+ 19519, 19357, 19194, 19031, 18867, 18702, 18537, 18371,
+ 18204, 18036, 17868, 17699, 17530, 17360, 17189, 17017,
+ 16845, 16672, 16499, 16325, 16150, 15975, 15799, 15623,
+ 15446, 15268, 15090, 14911, 14732, 14552, 14372, 14191,
+ 14009, 13827, 13645, 13462, 13278, 13094, 12909, 12724,
+ 12539, 12353, 12166, 11980, 11792, 11604, 11416, 11227,
+ 11038, 10849, 10659, 10469, 10278, 10087, 9895, 9703,
+ 9511, 9319, 9126, 8932, 8739, 8545, 8351, 8156,
+ 7961, 7766, 7571, 7375, 7179, 6982, 6786, 6589,
+ 6392, 6195, 5997, 5799, 5601, 5403, 5205, 5006,
+ 4807, 4608, 4409, 4210, 4011, 3811, 3611, 3411,
+ 3211, 3011, 2811, 2610, 2410, 2209, 2009, 1808,
+ 1607, 1406, 1206, 1005, 804, 603, 402, 201,
+ 0, -201, -402, -603, -804, -1005, -1206, -1406,
+ -1607, -1808, -2009, -2209, -2410, -2610, -2811, -3011,
+ -3211, -3411, -3611, -3811, -4011, -4210, -4409, -4608,
+ -4807, -5006, -5205, -5403, -5601, -5799, -5997, -6195,
+ -6392, -6589, -6786, -6982, -7179, -7375, -7571, -7766,
+ -7961, -8156, -8351, -8545, -8739, -8932, -9126, -9319,
+ -9511, -9703, -9895, -10087, -10278, -10469, -10659, -10849,
+ -11038, -11227, -11416, -11604, -11792, -11980, -12166, -12353,
+ -12539, -12724, -12909, -13094, -13278, -13462, -13645, -13827,
+ -14009, -14191, -14372, -14552, -14732, -14911, -15090, -15268,
+ -15446, -15623, -15799, -15975, -16150, -16325, -16499, -16672,
+ -16845, -17017, -17189, -17360, -17530, -17699, -17868, -18036,
+ -18204, -18371, -18537, -18702, -18867, -19031, -19194, -19357,
+ -19519, -19680, -19840, -20000, -20159, -20317, -20474, -20631,
+ -20787, -20942, -21096, -21249, -21402, -21554, -21705, -21855,
+ -22004, -22153, -22301, -22448, -22594, -22739, -22883, -23027,
+ -23169, -23311, -23452, -23592, -23731, -23869, -24006, -24143,
+ -24278, -24413, -24546, -24679, -24811, -24942, -25072, -25201,
+ -25329, -25456, -25582, -25707, -25831, -25954, -26077, -26198,
+ -26318, -26437, -26556, -26673, -26789, -26905, -27019, -27132,
+ -27244, -27355, -27466, -27575, -27683, -27790, -27896, -28001,
+ -28105, -28208, -28309, -28410, -28510, -28608, -28706, -28802,
+ -28897, -28992, -29085, -29177, -29268, -29358, -29446, -29534,
+ -29621, -29706, -29790, -29873, -29955, -30036, -30116, -30195,
+ -30272, -30349, -30424, -30498, -30571, -30643, -30713, -30783,
+ -30851, -30918, -30984, -31049, -31113, -31175, -31236, -31297,
+ -31356, -31413, -31470, -31525, -31580, -31633, -31684, -31735,
+ -31785, -31833, -31880, -31926, -31970, -32014, -32056, -32097,
+ -32137, -32176, -32213, -32249, -32284, -32318, -32350, -32382,
+ -32412, -32441, -32468, -32495, -32520, -32544, -32567, -32588,
+ -32609, -32628, -32646, -32662, -32678, -32692, -32705, -32717,
+ -32727, -32736, -32744, -32751, -32757, -32761, -32764, -32766,
+ -32767, -32766, -32764, -32761, -32757, -32751, -32744, -32736,
+ -32727, -32717, -32705, -32692, -32678, -32662, -32646, -32628,
+ -32609, -32588, -32567, -32544, -32520, -32495, -32468, -32441,
+ -32412, -32382, -32350, -32318, -32284, -32249, -32213, -32176,
+ -32137, -32097, -32056, -32014, -31970, -31926, -31880, -31833,
+ -31785, -31735, -31684, -31633, -31580, -31525, -31470, -31413,
+ -31356, -31297, -31236, -31175, -31113, -31049, -30984, -30918,
+ -30851, -30783, -30713, -30643, -30571, -30498, -30424, -30349,
+ -30272, -30195, -30116, -30036, -29955, -29873, -29790, -29706,
+ -29621, -29534, -29446, -29358, -29268, -29177, -29085, -28992,
+ -28897, -28802, -28706, -28608, -28510, -28410, -28309, -28208,
+ -28105, -28001, -27896, -27790, -27683, -27575, -27466, -27355,
+ -27244, -27132, -27019, -26905, -26789, -26673, -26556, -26437,
+ -26318, -26198, -26077, -25954, -25831, -25707, -25582, -25456,
+ -25329, -25201, -25072, -24942, -24811, -24679, -24546, -24413,
+ -24278, -24143, -24006, -23869, -23731, -23592, -23452, -23311,
+ -23169, -23027, -22883, -22739, -22594, -22448, -22301, -22153,
+ -22004, -21855, -21705, -21554, -21402, -21249, -21096, -20942,
+ -20787, -20631, -20474, -20317, -20159, -20000, -19840, -19680,
+ -19519, -19357, -19194, -19031, -18867, -18702, -18537, -18371,
+ -18204, -18036, -17868, -17699, -17530, -17360, -17189, -17017,
+ -16845, -16672, -16499, -16325, -16150, -15975, -15799, -15623,
+ -15446, -15268, -15090, -14911, -14732, -14552, -14372, -14191,
+ -14009, -13827, -13645, -13462, -13278, -13094, -12909, -12724,
+ -12539, -12353, -12166, -11980, -11792, -11604, -11416, -11227,
+ -11038, -10849, -10659, -10469, -10278, -10087, -9895, -9703,
+ -9511, -9319, -9126, -8932, -8739, -8545, -8351, -8156,
+ -7961, -7766, -7571, -7375, -7179, -6982, -6786, -6589,
+ -6392, -6195, -5997, -5799, -5601, -5403, -5205, -5006,
+ -4807, -4608, -4409, -4210, -4011, -3811, -3611, -3411,
+ -3211, -3011, -2811, -2610, -2410, -2209, -2009, -1808,
+ -1607, -1406, -1206, -1005, -804, -603, -402, -201
+};
+
+#endif // WEBRTC_COMMON_AUDIO_SIGNAL_PROCESSING_COMPLEX_FFT_TABLES_H_
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/copy_set_operations.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/copy_set_operations.c
new file mode 100644
index 00000000..9d7cf47e
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/copy_set_operations.c
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the implementation of functions
+ * WebRtcSpl_MemSetW16()
+ * WebRtcSpl_MemSetW32()
+ * WebRtcSpl_MemCpyReversedOrder()
+ * WebRtcSpl_CopyFromEndW16()
+ * WebRtcSpl_ZerosArrayW16()
+ * WebRtcSpl_ZerosArrayW32()
+ *
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include <string.h>
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+
+void WebRtcSpl_MemSetW16(int16_t *ptr, int16_t set_value, size_t length)
+{
+ size_t j;
+ int16_t *arrptr = ptr;
+
+ for (j = length; j > 0; j--)
+ {
+ *arrptr++ = set_value;
+ }
+}
+
+void WebRtcSpl_MemSetW32(int32_t *ptr, int32_t set_value, size_t length)
+{
+ size_t j;
+ int32_t *arrptr = ptr;
+
+ for (j = length; j > 0; j--)
+ {
+ *arrptr++ = set_value;
+ }
+}
+
+void WebRtcSpl_MemCpyReversedOrder(int16_t* dest,
+ int16_t* source,
+ size_t length)
+{
+ size_t j;
+ int16_t* destPtr = dest;
+ int16_t* sourcePtr = source;
+
+ for (j = 0; j < length; j++)
+ {
+ *destPtr-- = *sourcePtr++;
+ }
+}
+
+void WebRtcSpl_CopyFromEndW16(const int16_t *vector_in,
+ size_t length,
+ size_t samples,
+ int16_t *vector_out)
+{
+ // Copy the last <samples> of the input vector to vector_out
+ WEBRTC_SPL_MEMCPY_W16(vector_out, &vector_in[length - samples], samples);
+}
+
+void WebRtcSpl_ZerosArrayW16(int16_t *vector, size_t length)
+{
+ WebRtcSpl_MemSetW16(vector, 0, length);
+}
+
+void WebRtcSpl_ZerosArrayW32(int32_t *vector, size_t length)
+{
+ WebRtcSpl_MemSetW32(vector, 0, length);
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation.c
new file mode 100644
index 00000000..d7c9f2b9
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation.c
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+/* C version of WebRtcSpl_CrossCorrelation() for generic platforms. */
+void WebRtcSpl_CrossCorrelationC(int32_t* cross_correlation,
+ const int16_t* seq1,
+ const int16_t* seq2,
+ size_t dim_seq,
+ size_t dim_cross_correlation,
+ int right_shifts,
+ int step_seq2) {
+ size_t i = 0, j = 0;
+
+ for (i = 0; i < dim_cross_correlation; i++) {
+ int32_t corr = 0;
+ for (j = 0; j < dim_seq; j++)
+ corr += (seq1[j] * seq2[j]) >> right_shifts;
+ seq2 += step_seq2;
+ *cross_correlation++ = corr;
+ }
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation_mips.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation_mips.c
new file mode 100644
index 00000000..b2364026
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation_mips.c
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+void WebRtcSpl_CrossCorrelation_mips(int32_t* cross_correlation,
+ const int16_t* seq1,
+ const int16_t* seq2,
+ size_t dim_seq,
+ size_t dim_cross_correlation,
+ int right_shifts,
+ int step_seq2) {
+
+ int32_t t0 = 0, t1 = 0, t2 = 0, t3 = 0, sum = 0;
+ int16_t *pseq2 = NULL;
+ int16_t *pseq1 = NULL;
+ int16_t *pseq1_0 = (int16_t*)&seq1[0];
+ int16_t *pseq2_0 = (int16_t*)&seq2[0];
+ int k = 0;
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "sll %[step_seq2], %[step_seq2], 1 \n\t"
+ "andi %[t0], %[dim_seq], 1 \n\t"
+ "bgtz %[t0], 3f \n\t"
+ " nop \n\t"
+ "1: \n\t"
+ "move %[pseq1], %[pseq1_0] \n\t"
+ "move %[pseq2], %[pseq2_0] \n\t"
+ "sra %[k], %[dim_seq], 1 \n\t"
+ "addiu %[dim_cc], %[dim_cc], -1 \n\t"
+ "xor %[sum], %[sum], %[sum] \n\t"
+ "2: \n\t"
+ "lh %[t0], 0(%[pseq1]) \n\t"
+ "lh %[t1], 0(%[pseq2]) \n\t"
+ "lh %[t2], 2(%[pseq1]) \n\t"
+ "lh %[t3], 2(%[pseq2]) \n\t"
+ "mul %[t0], %[t0], %[t1] \n\t"
+ "addiu %[k], %[k], -1 \n\t"
+ "mul %[t2], %[t2], %[t3] \n\t"
+ "addiu %[pseq1], %[pseq1], 4 \n\t"
+ "addiu %[pseq2], %[pseq2], 4 \n\t"
+ "srav %[t0], %[t0], %[right_shifts] \n\t"
+ "addu %[sum], %[sum], %[t0] \n\t"
+ "srav %[t2], %[t2], %[right_shifts] \n\t"
+ "bgtz %[k], 2b \n\t"
+ " addu %[sum], %[sum], %[t2] \n\t"
+ "addu %[pseq2_0], %[pseq2_0], %[step_seq2] \n\t"
+ "sw %[sum], 0(%[cc]) \n\t"
+ "bgtz %[dim_cc], 1b \n\t"
+ " addiu %[cc], %[cc], 4 \n\t"
+ "b 6f \n\t"
+ " nop \n\t"
+ "3: \n\t"
+ "move %[pseq1], %[pseq1_0] \n\t"
+ "move %[pseq2], %[pseq2_0] \n\t"
+ "sra %[k], %[dim_seq], 1 \n\t"
+ "addiu %[dim_cc], %[dim_cc], -1 \n\t"
+ "beqz %[k], 5f \n\t"
+ " xor %[sum], %[sum], %[sum] \n\t"
+ "4: \n\t"
+ "lh %[t0], 0(%[pseq1]) \n\t"
+ "lh %[t1], 0(%[pseq2]) \n\t"
+ "lh %[t2], 2(%[pseq1]) \n\t"
+ "lh %[t3], 2(%[pseq2]) \n\t"
+ "mul %[t0], %[t0], %[t1] \n\t"
+ "addiu %[k], %[k], -1 \n\t"
+ "mul %[t2], %[t2], %[t3] \n\t"
+ "addiu %[pseq1], %[pseq1], 4 \n\t"
+ "addiu %[pseq2], %[pseq2], 4 \n\t"
+ "srav %[t0], %[t0], %[right_shifts] \n\t"
+ "addu %[sum], %[sum], %[t0] \n\t"
+ "srav %[t2], %[t2], %[right_shifts] \n\t"
+ "bgtz %[k], 4b \n\t"
+ " addu %[sum], %[sum], %[t2] \n\t"
+ "5: \n\t"
+ "lh %[t0], 0(%[pseq1]) \n\t"
+ "lh %[t1], 0(%[pseq2]) \n\t"
+ "mul %[t0], %[t0], %[t1] \n\t"
+ "srav %[t0], %[t0], %[right_shifts] \n\t"
+ "addu %[sum], %[sum], %[t0] \n\t"
+ "addu %[pseq2_0], %[pseq2_0], %[step_seq2] \n\t"
+ "sw %[sum], 0(%[cc]) \n\t"
+ "bgtz %[dim_cc], 3b \n\t"
+ " addiu %[cc], %[cc], 4 \n\t"
+ "6: \n\t"
+ ".set pop \n\t"
+ : [step_seq2] "+r" (step_seq2), [t0] "=&r" (t0), [t1] "=&r" (t1),
+ [t2] "=&r" (t2), [t3] "=&r" (t3), [pseq1] "=&r" (pseq1),
+ [pseq2] "=&r" (pseq2), [pseq1_0] "+r" (pseq1_0), [pseq2_0] "+r" (pseq2_0),
+ [k] "=&r" (k), [dim_cc] "+r" (dim_cross_correlation), [sum] "=&r" (sum),
+ [cc] "+r" (cross_correlation)
+ : [dim_seq] "r" (dim_seq), [right_shifts] "r" (right_shifts)
+ : "hi", "lo", "memory"
+ );
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation_neon.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation_neon.c
new file mode 100644
index 00000000..918b6715
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation_neon.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+#include <arm_neon.h>
+
+static inline void DotProductWithScaleNeon(int32_t* cross_correlation,
+ const int16_t* vector1,
+ const int16_t* vector2,
+ size_t length,
+ int scaling) {
+ size_t i = 0;
+ size_t len1 = length >> 3;
+ size_t len2 = length & 7;
+ int64x2_t sum0 = vdupq_n_s64(0);
+ int64x2_t sum1 = vdupq_n_s64(0);
+
+ for (i = len1; i > 0; i -= 1) {
+ int16x8_t seq1_16x8 = vld1q_s16(vector1);
+ int16x8_t seq2_16x8 = vld1q_s16(vector2);
+#if defined(WEBRTC_ARCH_ARM64)
+ int32x4_t tmp0 = vmull_s16(vget_low_s16(seq1_16x8),
+ vget_low_s16(seq2_16x8));
+ int32x4_t tmp1 = vmull_high_s16(seq1_16x8, seq2_16x8);
+#else
+ int32x4_t tmp0 = vmull_s16(vget_low_s16(seq1_16x8),
+ vget_low_s16(seq2_16x8));
+ int32x4_t tmp1 = vmull_s16(vget_high_s16(seq1_16x8),
+ vget_high_s16(seq2_16x8));
+#endif
+ sum0 = vpadalq_s32(sum0, tmp0);
+ sum1 = vpadalq_s32(sum1, tmp1);
+ vector1 += 8;
+ vector2 += 8;
+ }
+
+ // Calculate the rest of the samples.
+ int64_t sum_res = 0;
+ for (i = len2; i > 0; i -= 1) {
+ sum_res += WEBRTC_SPL_MUL_16_16(*vector1, *vector2);
+ vector1++;
+ vector2++;
+ }
+
+ sum0 = vaddq_s64(sum0, sum1);
+#if defined(WEBRTC_ARCH_ARM64)
+ int64_t sum2 = vaddvq_s64(sum0);
+ *cross_correlation = (int32_t)((sum2 + sum_res) >> scaling);
+#else
+ int64x1_t shift = vdup_n_s64(-scaling);
+ int64x1_t sum2 = vadd_s64(vget_low_s64(sum0), vget_high_s64(sum0));
+ sum2 = vadd_s64(sum2, vdup_n_s64(sum_res));
+ sum2 = vshl_s64(sum2, shift);
+ vst1_lane_s32(cross_correlation, vreinterpret_s32_s64(sum2), 0);
+#endif
+}
+
+/* NEON version of WebRtcSpl_CrossCorrelation() for ARM32/64 platforms. */
+void WebRtcSpl_CrossCorrelationNeon(int32_t* cross_correlation,
+ const int16_t* seq1,
+ const int16_t* seq2,
+ size_t dim_seq,
+ size_t dim_cross_correlation,
+ int right_shifts,
+ int step_seq2) {
+ size_t i = 0;
+
+ for (i = 0; i < dim_cross_correlation; i++) {
+ const int16_t* seq1_ptr = seq1;
+ const int16_t* seq2_ptr = seq2 + (step_seq2 * i);
+
+ DotProductWithScaleNeon(cross_correlation,
+ seq1_ptr,
+ seq2_ptr,
+ dim_seq,
+ right_shifts);
+ cross_correlation++;
+ }
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/division_operations.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/division_operations.c
new file mode 100644
index 00000000..eaa06a1f
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/division_operations.c
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains implementations of the divisions
+ * WebRtcSpl_DivU32U16()
+ * WebRtcSpl_DivW32W16()
+ * WebRtcSpl_DivW32W16ResW16()
+ * WebRtcSpl_DivResultInQ31()
+ * WebRtcSpl_DivW32HiLow()
+ *
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+uint32_t WebRtcSpl_DivU32U16(uint32_t num, uint16_t den)
+{
+ // Guard against division with 0
+ if (den != 0)
+ {
+ return (uint32_t)(num / den);
+ } else
+ {
+ return (uint32_t)0xFFFFFFFF;
+ }
+}
+
+int32_t WebRtcSpl_DivW32W16(int32_t num, int16_t den)
+{
+ // Guard against division with 0
+ if (den != 0)
+ {
+ return (int32_t)(num / den);
+ } else
+ {
+ return (int32_t)0x7FFFFFFF;
+ }
+}
+
+int16_t WebRtcSpl_DivW32W16ResW16(int32_t num, int16_t den)
+{
+ // Guard against division with 0
+ if (den != 0)
+ {
+ return (int16_t)(num / den);
+ } else
+ {
+ return (int16_t)0x7FFF;
+ }
+}
+
+int32_t WebRtcSpl_DivResultInQ31(int32_t num, int32_t den)
+{
+ int32_t L_num = num;
+ int32_t L_den = den;
+ int32_t div = 0;
+ int k = 31;
+ int change_sign = 0;
+
+ if (num == 0)
+ return 0;
+
+ if (num < 0)
+ {
+ change_sign++;
+ L_num = -num;
+ }
+ if (den < 0)
+ {
+ change_sign++;
+ L_den = -den;
+ }
+ while (k--)
+ {
+ div <<= 1;
+ L_num <<= 1;
+ if (L_num >= L_den)
+ {
+ L_num -= L_den;
+ div++;
+ }
+ }
+ if (change_sign == 1)
+ {
+ div = -div;
+ }
+ return div;
+}
+
+int32_t WebRtcSpl_DivW32HiLow(int32_t num, int16_t den_hi, int16_t den_low)
+{
+ int16_t approx, tmp_hi, tmp_low, num_hi, num_low;
+ int32_t tmpW32;
+
+ approx = (int16_t)WebRtcSpl_DivW32W16((int32_t)0x1FFFFFFF, den_hi);
+ // result in Q14 (Note: 3FFFFFFF = 0.5 in Q30)
+
+ // tmpW32 = 1/den = approx * (2.0 - den * approx) (in Q30)
+ tmpW32 = (den_hi * approx << 1) + ((den_low * approx >> 15) << 1);
+ // tmpW32 = den * approx
+
+ tmpW32 = (int32_t)0x7fffffffL - tmpW32; // result in Q30 (tmpW32 = 2.0-(den*approx))
+
+ // Store tmpW32 in hi and low format
+ tmp_hi = (int16_t)(tmpW32 >> 16);
+ tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1);
+
+ // tmpW32 = 1/den in Q29
+ tmpW32 = (tmp_hi * approx + (tmp_low * approx >> 15)) << 1;
+
+ // 1/den in hi and low format
+ tmp_hi = (int16_t)(tmpW32 >> 16);
+ tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1);
+
+ // Store num in hi and low format
+ num_hi = (int16_t)(num >> 16);
+ num_low = (int16_t)((num - ((int32_t)num_hi << 16)) >> 1);
+
+ // num * (1/den) by 32 bit multiplication (result in Q28)
+
+ tmpW32 = num_hi * tmp_hi + (num_hi * tmp_low >> 15) +
+ (num_low * tmp_hi >> 15);
+
+ // Put result in Q31 (convert from Q28)
+ tmpW32 = WEBRTC_SPL_LSHIFT_W32(tmpW32, 3);
+
+ return tmpW32;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/dot_product_with_scale.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/dot_product_with_scale.c
new file mode 100644
index 00000000..1302d625
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/dot_product_with_scale.c
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+int32_t WebRtcSpl_DotProductWithScale(const int16_t* vector1,
+ const int16_t* vector2,
+ size_t length,
+ int scaling) {
+ int32_t sum = 0;
+ size_t i = 0;
+
+ /* Unroll the loop to improve performance. */
+ for (i = 0; i + 3 < length; i += 4) {
+ sum += (vector1[i + 0] * vector2[i + 0]) >> scaling;
+ sum += (vector1[i + 1] * vector2[i + 1]) >> scaling;
+ sum += (vector1[i + 2] * vector2[i + 2]) >> scaling;
+ sum += (vector1[i + 3] * vector2[i + 3]) >> scaling;
+ }
+ for (; i < length; i++) {
+ sum += (vector1[i] * vector2[i]) >> scaling;
+ }
+
+ return sum;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast.c
new file mode 100644
index 00000000..726a8881
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+// TODO(Bjornv): Change the function parameter order to WebRTC code style.
+// C version of WebRtcSpl_DownsampleFast() for generic platforms.
+int WebRtcSpl_DownsampleFastC(const int16_t* data_in,
+ size_t data_in_length,
+ int16_t* data_out,
+ size_t data_out_length,
+ const int16_t* __restrict coefficients,
+ size_t coefficients_length,
+ int factor,
+ size_t delay) {
+ size_t i = 0;
+ size_t j = 0;
+ int32_t out_s32 = 0;
+ size_t endpos = delay + factor * (data_out_length - 1) + 1;
+
+ // Return error if any of the running conditions doesn't meet.
+ if (data_out_length == 0 || coefficients_length == 0
+ || data_in_length < endpos) {
+ return -1;
+ }
+
+ for (i = delay; i < endpos; i += factor) {
+ out_s32 = 2048; // Round value, 0.5 in Q12.
+
+ for (j = 0; j < coefficients_length; j++) {
+ out_s32 += coefficients[j] * data_in[i - j]; // Q12.
+ }
+
+ out_s32 >>= 12; // Q0.
+
+ // Saturate and store the output.
+ *data_out++ = WebRtcSpl_SatW32ToW16(out_s32);
+ }
+
+ return 0;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast_mips.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast_mips.c
new file mode 100644
index 00000000..ac39401a
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast_mips.c
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+// Version of WebRtcSpl_DownsampleFast() for MIPS platforms.
+int WebRtcSpl_DownsampleFast_mips(const int16_t* data_in,
+ size_t data_in_length,
+ int16_t* data_out,
+ size_t data_out_length,
+ const int16_t* __restrict coefficients,
+ size_t coefficients_length,
+ int factor,
+ size_t delay) {
+ int i;
+ int j;
+ int k;
+ int32_t out_s32 = 0;
+ size_t endpos = delay + factor * (data_out_length - 1) + 1;
+
+ int32_t tmp1, tmp2, tmp3, tmp4, factor_2;
+ int16_t* p_coefficients;
+ int16_t* p_data_in;
+ int16_t* p_data_in_0 = (int16_t*)&data_in[delay];
+ int16_t* p_coefficients_0 = (int16_t*)&coefficients[0];
+#if !defined(MIPS_DSP_R1_LE)
+ int32_t max_16 = 0x7FFF;
+ int32_t min_16 = 0xFFFF8000;
+#endif // #if !defined(MIPS_DSP_R1_LE)
+
+ // Return error if any of the running conditions doesn't meet.
+ if (data_out_length == 0 || coefficients_length == 0
+ || data_in_length < endpos) {
+ return -1;
+ }
+#if defined(MIPS_DSP_R2_LE)
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "subu %[i], %[endpos], %[delay] \n\t"
+ "sll %[factor_2], %[factor], 1 \n\t"
+ "1: \n\t"
+ "move %[p_data_in], %[p_data_in_0] \n\t"
+ "mult $zero, $zero \n\t"
+ "move %[p_coefs], %[p_coefs_0] \n\t"
+ "sra %[j], %[coef_length], 2 \n\t"
+ "beq %[j], $zero, 3f \n\t"
+ " andi %[k], %[coef_length], 3 \n\t"
+ "2: \n\t"
+ "lwl %[tmp1], 1(%[p_data_in]) \n\t"
+ "lwl %[tmp2], 3(%[p_coefs]) \n\t"
+ "lwl %[tmp3], -3(%[p_data_in]) \n\t"
+ "lwl %[tmp4], 7(%[p_coefs]) \n\t"
+ "lwr %[tmp1], -2(%[p_data_in]) \n\t"
+ "lwr %[tmp2], 0(%[p_coefs]) \n\t"
+ "lwr %[tmp3], -6(%[p_data_in]) \n\t"
+ "lwr %[tmp4], 4(%[p_coefs]) \n\t"
+ "packrl.ph %[tmp1], %[tmp1], %[tmp1] \n\t"
+ "packrl.ph %[tmp3], %[tmp3], %[tmp3] \n\t"
+ "dpa.w.ph $ac0, %[tmp1], %[tmp2] \n\t"
+ "dpa.w.ph $ac0, %[tmp3], %[tmp4] \n\t"
+ "addiu %[j], %[j], -1 \n\t"
+ "addiu %[p_data_in], %[p_data_in], -8 \n\t"
+ "bgtz %[j], 2b \n\t"
+ " addiu %[p_coefs], %[p_coefs], 8 \n\t"
+ "3: \n\t"
+ "beq %[k], $zero, 5f \n\t"
+ " nop \n\t"
+ "4: \n\t"
+ "lhu %[tmp1], 0(%[p_data_in]) \n\t"
+ "lhu %[tmp2], 0(%[p_coefs]) \n\t"
+ "addiu %[p_data_in], %[p_data_in], -2 \n\t"
+ "addiu %[k], %[k], -1 \n\t"
+ "dpa.w.ph $ac0, %[tmp1], %[tmp2] \n\t"
+ "bgtz %[k], 4b \n\t"
+ " addiu %[p_coefs], %[p_coefs], 2 \n\t"
+ "5: \n\t"
+ "extr_r.w %[out_s32], $ac0, 12 \n\t"
+ "addu %[p_data_in_0], %[p_data_in_0], %[factor_2] \n\t"
+ "subu %[i], %[i], %[factor] \n\t"
+ "shll_s.w %[out_s32], %[out_s32], 16 \n\t"
+ "sra %[out_s32], %[out_s32], 16 \n\t"
+ "sh %[out_s32], 0(%[data_out]) \n\t"
+ "bgtz %[i], 1b \n\t"
+ " addiu %[data_out], %[data_out], 2 \n\t"
+ ".set pop \n\t"
+ : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
+ [tmp4] "=&r" (tmp4), [p_data_in] "=&r" (p_data_in),
+ [p_data_in_0] "+r" (p_data_in_0), [p_coefs] "=&r" (p_coefficients),
+ [j] "=&r" (j), [out_s32] "=&r" (out_s32), [factor_2] "=&r" (factor_2),
+ [i] "=&r" (i), [k] "=&r" (k)
+ : [coef_length] "r" (coefficients_length), [data_out] "r" (data_out),
+ [p_coefs_0] "r" (p_coefficients_0), [endpos] "r" (endpos),
+ [delay] "r" (delay), [factor] "r" (factor)
+ : "memory", "hi", "lo"
+ );
+#else // #if defined(MIPS_DSP_R2_LE)
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "sll %[factor_2], %[factor], 1 \n\t"
+ "subu %[i], %[endpos], %[delay] \n\t"
+ "1: \n\t"
+ "move %[p_data_in], %[p_data_in_0] \n\t"
+ "addiu %[out_s32], $zero, 2048 \n\t"
+ "move %[p_coefs], %[p_coefs_0] \n\t"
+ "sra %[j], %[coef_length], 1 \n\t"
+ "beq %[j], $zero, 3f \n\t"
+ " andi %[k], %[coef_length], 1 \n\t"
+ "2: \n\t"
+ "lh %[tmp1], 0(%[p_data_in]) \n\t"
+ "lh %[tmp2], 0(%[p_coefs]) \n\t"
+ "lh %[tmp3], -2(%[p_data_in]) \n\t"
+ "lh %[tmp4], 2(%[p_coefs]) \n\t"
+ "mul %[tmp1], %[tmp1], %[tmp2] \n\t"
+ "addiu %[p_coefs], %[p_coefs], 4 \n\t"
+ "mul %[tmp3], %[tmp3], %[tmp4] \n\t"
+ "addiu %[j], %[j], -1 \n\t"
+ "addiu %[p_data_in], %[p_data_in], -4 \n\t"
+ "addu %[tmp1], %[tmp1], %[tmp3] \n\t"
+ "bgtz %[j], 2b \n\t"
+ " addu %[out_s32], %[out_s32], %[tmp1] \n\t"
+ "3: \n\t"
+ "beq %[k], $zero, 4f \n\t"
+ " nop \n\t"
+ "lh %[tmp1], 0(%[p_data_in]) \n\t"
+ "lh %[tmp2], 0(%[p_coefs]) \n\t"
+ "mul %[tmp1], %[tmp1], %[tmp2] \n\t"
+ "addu %[out_s32], %[out_s32], %[tmp1] \n\t"
+ "4: \n\t"
+ "sra %[out_s32], %[out_s32], 12 \n\t"
+ "addu %[p_data_in_0], %[p_data_in_0], %[factor_2] \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "shll_s.w %[out_s32], %[out_s32], 16 \n\t"
+ "sra %[out_s32], %[out_s32], 16 \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "slt %[tmp1], %[max_16], %[out_s32] \n\t"
+ "movn %[out_s32], %[max_16], %[tmp1] \n\t"
+ "slt %[tmp1], %[out_s32], %[min_16] \n\t"
+ "movn %[out_s32], %[min_16], %[tmp1] \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "subu %[i], %[i], %[factor] \n\t"
+ "sh %[out_s32], 0(%[data_out]) \n\t"
+ "bgtz %[i], 1b \n\t"
+ " addiu %[data_out], %[data_out], 2 \n\t"
+ ".set pop \n\t"
+ : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
+ [tmp4] "=&r" (tmp4), [p_data_in] "=&r" (p_data_in), [k] "=&r" (k),
+ [p_data_in_0] "+r" (p_data_in_0), [p_coefs] "=&r" (p_coefficients),
+ [j] "=&r" (j), [out_s32] "=&r" (out_s32), [factor_2] "=&r" (factor_2),
+ [i] "=&r" (i)
+ : [coef_length] "r" (coefficients_length), [data_out] "r" (data_out),
+ [p_coefs_0] "r" (p_coefficients_0), [endpos] "r" (endpos),
+#if !defined(MIPS_DSP_R1_LE)
+ [max_16] "r" (max_16), [min_16] "r" (min_16),
+#endif // #if !defined(MIPS_DSP_R1_LE)
+ [delay] "r" (delay), [factor] "r" (factor)
+ : "memory", "hi", "lo"
+ );
+#endif // #if defined(MIPS_DSP_R2_LE)
+ return 0;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast_neon.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast_neon.c
new file mode 100644
index 00000000..58732dab
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast_neon.c
@@ -0,0 +1,217 @@
+/*
+ * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+#include <arm_neon.h>
+
+// NEON intrinsics version of WebRtcSpl_DownsampleFast()
+// for ARM 32-bit/64-bit platforms.
+int WebRtcSpl_DownsampleFastNeon(const int16_t* data_in,
+ size_t data_in_length,
+ int16_t* data_out,
+ size_t data_out_length,
+ const int16_t* __restrict coefficients,
+ size_t coefficients_length,
+ int factor,
+ size_t delay) {
+ size_t i = 0;
+ size_t j = 0;
+ int32_t out_s32 = 0;
+ size_t endpos = delay + factor * (data_out_length - 1) + 1;
+ size_t res = data_out_length & 0x7;
+ size_t endpos1 = endpos - factor * res;
+
+ // Return error if any of the running conditions doesn't meet.
+ if (data_out_length == 0 || coefficients_length == 0
+ || data_in_length < endpos) {
+ return -1;
+ }
+
+ // First part, unroll the loop 8 times, with 3 subcases
+ // (factor == 2, 4, others).
+ switch (factor) {
+ case 2: {
+ for (i = delay; i < endpos1; i += 16) {
+ // Round value, 0.5 in Q12.
+ int32x4_t out32x4_0 = vdupq_n_s32(2048);
+ int32x4_t out32x4_1 = vdupq_n_s32(2048);
+
+#if defined(WEBRTC_ARCH_ARM64)
+ // Unroll the loop 2 times.
+ for (j = 0; j < coefficients_length - 1; j += 2) {
+ int32x2_t coeff32 = vld1_dup_s32((int32_t*)&coefficients[j]);
+ int16x4_t coeff16x4 = vreinterpret_s16_s32(coeff32);
+ int16x8x2_t in16x8x2 = vld2q_s16(&data_in[i - j - 1]);
+
+ // Mul and accumulate low 64-bit data.
+ int16x4_t in16x4_0 = vget_low_s16(in16x8x2.val[0]);
+ int16x4_t in16x4_1 = vget_low_s16(in16x8x2.val[1]);
+ out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 1);
+ out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_1, coeff16x4, 0);
+
+ // Mul and accumulate high 64-bit data.
+ // TODO: vget_high_s16 need extra cost on ARM64. This could be
+ // replaced by vmlal_high_lane_s16. But for the interface of
+ // vmlal_high_lane_s16, there is a bug in gcc 4.9.
+ // This issue need to be tracked in the future.
+ int16x4_t in16x4_2 = vget_high_s16(in16x8x2.val[0]);
+ int16x4_t in16x4_3 = vget_high_s16(in16x8x2.val[1]);
+ out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_2, coeff16x4, 1);
+ out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_3, coeff16x4, 0);
+ }
+
+ for (; j < coefficients_length; j++) {
+ int16x4_t coeff16x4 = vld1_dup_s16(&coefficients[j]);
+ int16x8x2_t in16x8x2 = vld2q_s16(&data_in[i - j]);
+
+ // Mul and accumulate low 64-bit data.
+ int16x4_t in16x4_0 = vget_low_s16(in16x8x2.val[0]);
+ out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 0);
+
+ // Mul and accumulate high 64-bit data.
+ // TODO: vget_high_s16 need extra cost on ARM64. This could be
+ // replaced by vmlal_high_lane_s16. But for the interface of
+ // vmlal_high_lane_s16, there is a bug in gcc 4.9.
+ // This issue need to be tracked in the future.
+ int16x4_t in16x4_1 = vget_high_s16(in16x8x2.val[0]);
+ out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_1, coeff16x4, 0);
+ }
+#else
+ // On ARMv7, the loop unrolling 2 times results in performance
+ // regression.
+ for (j = 0; j < coefficients_length; j++) {
+ int16x4_t coeff16x4 = vld1_dup_s16(&coefficients[j]);
+ int16x8x2_t in16x8x2 = vld2q_s16(&data_in[i - j]);
+
+ // Mul and accumulate.
+ int16x4_t in16x4_0 = vget_low_s16(in16x8x2.val[0]);
+ int16x4_t in16x4_1 = vget_high_s16(in16x8x2.val[0]);
+ out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 0);
+ out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_1, coeff16x4, 0);
+ }
+#endif
+
+ // Saturate and store the output.
+ int16x4_t out16x4_0 = vqshrn_n_s32(out32x4_0, 12);
+ int16x4_t out16x4_1 = vqshrn_n_s32(out32x4_1, 12);
+ vst1q_s16(data_out, vcombine_s16(out16x4_0, out16x4_1));
+ data_out += 8;
+ }
+ break;
+ }
+ case 4: {
+ for (i = delay; i < endpos1; i += 32) {
+ // Round value, 0.5 in Q12.
+ int32x4_t out32x4_0 = vdupq_n_s32(2048);
+ int32x4_t out32x4_1 = vdupq_n_s32(2048);
+
+ // Unroll the loop 4 times.
+ for (j = 0; j < coefficients_length - 3; j += 4) {
+ int16x4_t coeff16x4 = vld1_s16(&coefficients[j]);
+ int16x8x4_t in16x8x4 = vld4q_s16(&data_in[i - j - 3]);
+
+ // Mul and accumulate low 64-bit data.
+ int16x4_t in16x4_0 = vget_low_s16(in16x8x4.val[0]);
+ int16x4_t in16x4_2 = vget_low_s16(in16x8x4.val[1]);
+ int16x4_t in16x4_4 = vget_low_s16(in16x8x4.val[2]);
+ int16x4_t in16x4_6 = vget_low_s16(in16x8x4.val[3]);
+ out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 3);
+ out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_2, coeff16x4, 2);
+ out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_4, coeff16x4, 1);
+ out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_6, coeff16x4, 0);
+
+ // Mul and accumulate high 64-bit data.
+ // TODO: vget_high_s16 need extra cost on ARM64. This could be
+ // replaced by vmlal_high_lane_s16. But for the interface of
+ // vmlal_high_lane_s16, there is a bug in gcc 4.9.
+ // This issue need to be tracked in the future.
+ int16x4_t in16x4_1 = vget_high_s16(in16x8x4.val[0]);
+ int16x4_t in16x4_3 = vget_high_s16(in16x8x4.val[1]);
+ int16x4_t in16x4_5 = vget_high_s16(in16x8x4.val[2]);
+ int16x4_t in16x4_7 = vget_high_s16(in16x8x4.val[3]);
+ out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_1, coeff16x4, 3);
+ out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_3, coeff16x4, 2);
+ out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_5, coeff16x4, 1);
+ out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_7, coeff16x4, 0);
+ }
+
+ for (; j < coefficients_length; j++) {
+ int16x4_t coeff16x4 = vld1_dup_s16(&coefficients[j]);
+ int16x8x4_t in16x8x4 = vld4q_s16(&data_in[i - j]);
+
+ // Mul and accumulate low 64-bit data.
+ int16x4_t in16x4_0 = vget_low_s16(in16x8x4.val[0]);
+ out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 0);
+
+ // Mul and accumulate high 64-bit data.
+ // TODO: vget_high_s16 need extra cost on ARM64. This could be
+ // replaced by vmlal_high_lane_s16. But for the interface of
+ // vmlal_high_lane_s16, there is a bug in gcc 4.9.
+ // This issue need to be tracked in the future.
+ int16x4_t in16x4_1 = vget_high_s16(in16x8x4.val[0]);
+ out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_1, coeff16x4, 0);
+ }
+
+ // Saturate and store the output.
+ int16x4_t out16x4_0 = vqshrn_n_s32(out32x4_0, 12);
+ int16x4_t out16x4_1 = vqshrn_n_s32(out32x4_1, 12);
+ vst1q_s16(data_out, vcombine_s16(out16x4_0, out16x4_1));
+ data_out += 8;
+ }
+ break;
+ }
+ default: {
+ for (i = delay; i < endpos1; i += factor * 8) {
+ // Round value, 0.5 in Q12.
+ int32x4_t out32x4_0 = vdupq_n_s32(2048);
+ int32x4_t out32x4_1 = vdupq_n_s32(2048);
+
+ for (j = 0; j < coefficients_length; j++) {
+ int16x4_t coeff16x4 = vld1_dup_s16(&coefficients[j]);
+ int16x4_t in16x4_0 = vld1_dup_s16(&data_in[i - j]);
+ in16x4_0 = vld1_lane_s16(&data_in[i + factor - j], in16x4_0, 1);
+ in16x4_0 = vld1_lane_s16(&data_in[i + factor * 2 - j], in16x4_0, 2);
+ in16x4_0 = vld1_lane_s16(&data_in[i + factor * 3 - j], in16x4_0, 3);
+ int16x4_t in16x4_1 = vld1_dup_s16(&data_in[i + factor * 4 - j]);
+ in16x4_1 = vld1_lane_s16(&data_in[i + factor * 5 - j], in16x4_1, 1);
+ in16x4_1 = vld1_lane_s16(&data_in[i + factor * 6 - j], in16x4_1, 2);
+ in16x4_1 = vld1_lane_s16(&data_in[i + factor * 7 - j], in16x4_1, 3);
+
+ // Mul and accumulate.
+ out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 0);
+ out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_1, coeff16x4, 0);
+ }
+
+ // Saturate and store the output.
+ int16x4_t out16x4_0 = vqshrn_n_s32(out32x4_0, 12);
+ int16x4_t out16x4_1 = vqshrn_n_s32(out32x4_1, 12);
+ vst1q_s16(data_out, vcombine_s16(out16x4_0, out16x4_1));
+ data_out += 8;
+ }
+ break;
+ }
+ }
+
+ // Second part, do the rest iterations (if any).
+ for (; i < endpos; i += factor) {
+ out_s32 = 2048; // Round value, 0.5 in Q12.
+
+ for (j = 0; j < coefficients_length; j++) {
+ out_s32 = WebRtc_MulAccumW16(coefficients[j], data_in[i - j], out_s32);
+ }
+
+ // Saturate and store the output.
+ out_s32 >>= 12;
+ *data_out++ = WebRtcSpl_SatW32ToW16(out_s32);
+ }
+
+ return 0;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/energy.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/energy.c
new file mode 100644
index 00000000..e83f1a69
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/energy.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_Energy().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+int32_t WebRtcSpl_Energy(int16_t* vector,
+ size_t vector_length,
+ int* scale_factor)
+{
+ int32_t en = 0;
+ size_t i;
+ int scaling =
+ WebRtcSpl_GetScalingSquare(vector, vector_length, vector_length);
+ size_t looptimes = vector_length;
+ int16_t *vectorptr = vector;
+
+ for (i = 0; i < looptimes; i++)
+ {
+ en += (*vectorptr * *vectorptr) >> scaling;
+ vectorptr++;
+ }
+ *scale_factor = scaling;
+
+ return en;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar.c
new file mode 100644
index 00000000..dfbc4c2f
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_FilterAR().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+size_t WebRtcSpl_FilterAR(const int16_t* a,
+ size_t a_length,
+ const int16_t* x,
+ size_t x_length,
+ int16_t* state,
+ size_t state_length,
+ int16_t* state_low,
+ size_t state_low_length,
+ int16_t* filtered,
+ int16_t* filtered_low,
+ size_t filtered_low_length)
+{
+ int32_t o;
+ int32_t oLOW;
+ size_t i, j, stop;
+ const int16_t* x_ptr = &x[0];
+ int16_t* filteredFINAL_ptr = filtered;
+ int16_t* filteredFINAL_LOW_ptr = filtered_low;
+
+ for (i = 0; i < x_length; i++)
+ {
+ // Calculate filtered[i] and filtered_low[i]
+ const int16_t* a_ptr = &a[1];
+ int16_t* filtered_ptr = &filtered[i - 1];
+ int16_t* filtered_low_ptr = &filtered_low[i - 1];
+ int16_t* state_ptr = &state[state_length - 1];
+ int16_t* state_low_ptr = &state_low[state_length - 1];
+
+ o = (int32_t)(*x_ptr++) << 12;
+ oLOW = (int32_t)0;
+
+ stop = (i < a_length) ? i + 1 : a_length;
+ for (j = 1; j < stop; j++)
+ {
+ o -= *a_ptr * *filtered_ptr--;
+ oLOW -= *a_ptr++ * *filtered_low_ptr--;
+ }
+ for (j = i + 1; j < a_length; j++)
+ {
+ o -= *a_ptr * *state_ptr--;
+ oLOW -= *a_ptr++ * *state_low_ptr--;
+ }
+
+ o += (oLOW >> 12);
+ *filteredFINAL_ptr = (int16_t)((o + (int32_t)2048) >> 12);
+ *filteredFINAL_LOW_ptr++ = (int16_t)(o - ((int32_t)(*filteredFINAL_ptr++)
+ << 12));
+ }
+
+ // Save the filter state
+ if (x_length >= state_length)
+ {
+ WebRtcSpl_CopyFromEndW16(filtered, x_length, a_length - 1, state);
+ WebRtcSpl_CopyFromEndW16(filtered_low, x_length, a_length - 1, state_low);
+ } else
+ {
+ for (i = 0; i < state_length - x_length; i++)
+ {
+ state[i] = state[i + x_length];
+ state_low[i] = state_low[i + x_length];
+ }
+ for (i = 0; i < x_length; i++)
+ {
+ state[state_length - x_length + i] = filtered[i];
+ state[state_length - x_length + i] = filtered_low[i];
+ }
+ }
+
+ return x_length;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12.c
new file mode 100644
index 00000000..70001a08
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12.c
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include <assert.h>
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+// TODO(bjornv): Change the return type to report errors.
+
+void WebRtcSpl_FilterARFastQ12(const int16_t* data_in,
+ int16_t* data_out,
+ const int16_t* __restrict coefficients,
+ size_t coefficients_length,
+ size_t data_length) {
+ size_t i = 0;
+ size_t j = 0;
+
+ assert(data_length > 0);
+ assert(coefficients_length > 1);
+
+ for (i = 0; i < data_length; i++) {
+ int32_t output = 0;
+ int32_t sum = 0;
+
+ for (j = coefficients_length - 1; j > 0; j--) {
+ sum += coefficients[j] * data_out[i - j];
+ }
+
+ output = coefficients[0] * data_in[i];
+ output -= sum;
+
+ // Saturate and store the output.
+ output = WEBRTC_SPL_SAT(134215679, output, -134217728);
+ data_out[i] = (int16_t)((output + 2048) >> 12);
+ }
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12_armv7.S b/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12_armv7.S
new file mode 100644
index 00000000..76c8eee7
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12_armv7.S
@@ -0,0 +1,218 @@
+@
+@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+@
+@ Use of this source code is governed by a BSD-style license
+@ that can be found in the LICENSE file in the root of the source
+@ tree. An additional intellectual property rights grant can be found
+@ in the file PATENTS. All contributing project authors may
+@ be found in the AUTHORS file in the root of the source tree.
+@
+
+@ This file contains the function WebRtcSpl_FilterARFastQ12(), optimized for
+@ ARMv7 platform. The description header can be found in
+@ signal_processing_library.h
+@
+@ Output is bit-exact with the generic C code as in filter_ar_fast_q12.c, and
+@ the reference C code at end of this file.
+
+@ Assumptions:
+@ (1) data_length > 0
+@ (2) coefficients_length > 1
+
+@ Register usage:
+@
+@ r0: &data_in[i]
+@ r1: &data_out[i], for result ouput
+@ r2: &coefficients[0]
+@ r3: coefficients_length
+@ r4: Iteration counter for the outer loop.
+@ r5: data_out[j] as multiplication inputs
+@ r6: Calculated value for output data_out[]; interation counter for inner loop
+@ r7: Partial sum of a filtering multiplication results
+@ r8: Partial sum of a filtering multiplication results
+@ r9: &data_out[], for filtering input; data_in[i]
+@ r10: coefficients[j]
+@ r11: Scratch
+@ r12: &coefficients[j]
+
+#include "webrtc/system_wrappers/interface/asm_defines.h"
+
+GLOBAL_FUNCTION WebRtcSpl_FilterARFastQ12
+.align 2
+DEFINE_FUNCTION WebRtcSpl_FilterARFastQ12
+ push {r4-r11}
+
+ ldrsh r12, [sp, #32] @ data_length
+ subs r4, r12, #1
+ beq ODD_LENGTH @ jump if data_length == 1
+
+LOOP_LENGTH:
+ add r12, r2, r3, lsl #1
+ sub r12, #4 @ &coefficients[coefficients_length - 2]
+ sub r9, r1, r3, lsl #1
+ add r9, #2 @ &data_out[i - coefficients_length + 1]
+ ldr r5, [r9], #4 @ data_out[i - coefficients_length + {1,2}]
+
+ mov r7, #0 @ sum1
+ mov r8, #0 @ sum2
+ subs r6, r3, #3 @ Iteration counter for inner loop.
+ beq ODD_A_LENGTH @ branch if coefficients_length == 3
+ blt POST_LOOP_A_LENGTH @ branch if coefficients_length == 2
+
+LOOP_A_LENGTH:
+ ldr r10, [r12], #-4 @ coefficients[j - 1], coefficients[j]
+ subs r6, #2
+ smlatt r8, r10, r5, r8 @ sum2 += coefficients[j] * data_out[i - j + 1];
+ smlatb r7, r10, r5, r7 @ sum1 += coefficients[j] * data_out[i - j];
+ smlabt r7, r10, r5, r7 @ coefficients[j - 1] * data_out[i - j + 1];
+ ldr r5, [r9], #4 @ data_out[i - j + 2], data_out[i - j + 3]
+ smlabb r8, r10, r5, r8 @ coefficients[j - 1] * data_out[i - j + 2];
+ bgt LOOP_A_LENGTH
+ blt POST_LOOP_A_LENGTH
+
+ODD_A_LENGTH:
+ ldrsh r10, [r12, #2] @ Filter coefficients coefficients[2]
+ sub r12, #2 @ &coefficients[0]
+ smlabb r7, r10, r5, r7 @ sum1 += coefficients[2] * data_out[i - 2];
+ smlabt r8, r10, r5, r8 @ sum2 += coefficients[2] * data_out[i - 1];
+ ldr r5, [r9, #-2] @ data_out[i - 1], data_out[i]
+
+POST_LOOP_A_LENGTH:
+ ldr r10, [r12] @ coefficients[0], coefficients[1]
+ smlatb r7, r10, r5, r7 @ sum1 += coefficients[1] * data_out[i - 1];
+
+ ldr r9, [r0], #4 @ data_in[i], data_in[i + 1]
+ smulbb r6, r10, r9 @ output1 = coefficients[0] * data_in[i];
+ sub r6, r7 @ output1 -= sum1;
+
+ sbfx r11, r6, #12, #16
+ ssat r7, #16, r6, asr #12
+ cmp r7, r11
+ addeq r6, r6, #2048
+ ssat r6, #16, r6, asr #12
+ strh r6, [r1], #2 @ Store data_out[i]
+
+ smlatb r8, r10, r6, r8 @ sum2 += coefficients[1] * data_out[i];
+ smulbt r6, r10, r9 @ output2 = coefficients[0] * data_in[i + 1];
+ sub r6, r8 @ output1 -= sum1;
+
+ sbfx r11, r6, #12, #16
+ ssat r7, #16, r6, asr #12
+ cmp r7, r11
+ addeq r6, r6, #2048
+ ssat r6, #16, r6, asr #12
+ strh r6, [r1], #2 @ Store data_out[i + 1]
+
+ subs r4, #2
+ bgt LOOP_LENGTH
+ blt END @ For even data_length, it's done. Jump to END.
+
+@ Process i = data_length -1, for the case of an odd length.
+ODD_LENGTH:
+ add r12, r2, r3, lsl #1
+ sub r12, #4 @ &coefficients[coefficients_length - 2]
+ sub r9, r1, r3, lsl #1
+ add r9, #2 @ &data_out[i - coefficients_length + 1]
+ mov r7, #0 @ sum1
+ mov r8, #0 @ sum1
+ subs r6, r3, #2 @ inner loop counter
+ beq EVEN_A_LENGTH @ branch if coefficients_length == 2
+
+LOOP2_A_LENGTH:
+ ldr r10, [r12], #-4 @ coefficients[j - 1], coefficients[j]
+ ldr r5, [r9], #4 @ data_out[i - j], data_out[i - j + 1]
+ subs r6, #2
+ smlatb r7, r10, r5, r7 @ sum1 += coefficients[j] * data_out[i - j];
+ smlabt r8, r10, r5, r8 @ coefficients[j - 1] * data_out[i - j + 1];
+ bgt LOOP2_A_LENGTH
+ addlt r12, #2
+ blt POST_LOOP2_A_LENGTH
+
+EVEN_A_LENGTH:
+ ldrsh r10, [r12, #2] @ Filter coefficients coefficients[1]
+ ldrsh r5, [r9] @ data_out[i - 1]
+ smlabb r7, r10, r5, r7 @ sum1 += coefficients[1] * data_out[i - 1];
+
+POST_LOOP2_A_LENGTH:
+ ldrsh r10, [r12] @ Filter coefficients coefficients[0]
+ ldrsh r9, [r0] @ data_in[i]
+ smulbb r6, r10, r9 @ output1 = coefficients[0] * data_in[i];
+ sub r6, r7 @ output1 -= sum1;
+ sub r6, r8 @ output1 -= sum1;
+ sbfx r8, r6, #12, #16
+ ssat r7, #16, r6, asr #12
+ cmp r7, r8
+ addeq r6, r6, #2048
+ ssat r6, #16, r6, asr #12
+ strh r6, [r1] @ Store the data_out[i]
+
+END:
+ pop {r4-r11}
+ bx lr
+
+@Reference C code:
+@
+@void WebRtcSpl_FilterARFastQ12(int16_t* data_in,
+@ int16_t* data_out,
+@ int16_t* __restrict coefficients,
+@ size_t coefficients_length,
+@ size_t data_length) {
+@ size_t i = 0;
+@ size_t j = 0;
+@
+@ assert(data_length > 0);
+@ assert(coefficients_length > 1);
+@
+@ for (i = 0; i < data_length - 1; i += 2) {
+@ int32_t output1 = 0;
+@ int32_t sum1 = 0;
+@ int32_t output2 = 0;
+@ int32_t sum2 = 0;
+@
+@ for (j = coefficients_length - 1; j > 2; j -= 2) {
+@ sum1 += coefficients[j] * data_out[i - j];
+@ sum1 += coefficients[j - 1] * data_out[i - j + 1];
+@ sum2 += coefficients[j] * data_out[i - j + 1];
+@ sum2 += coefficients[j - 1] * data_out[i - j + 2];
+@ }
+@
+@ if (j == 2) {
+@ sum1 += coefficients[2] * data_out[i - 2];
+@ sum2 += coefficients[2] * data_out[i - 1];
+@ }
+@
+@ sum1 += coefficients[1] * data_out[i - 1];
+@ output1 = coefficients[0] * data_in[i];
+@ output1 -= sum1;
+@ // Saturate and store the output.
+@ output1 = WEBRTC_SPL_SAT(134215679, output1, -134217728);
+@ data_out[i] = (int16_t)((output1 + 2048) >> 12);
+@
+@ sum2 += coefficients[1] * data_out[i];
+@ output2 = coefficients[0] * data_in[i + 1];
+@ output2 -= sum2;
+@ // Saturate and store the output.
+@ output2 = WEBRTC_SPL_SAT(134215679, output2, -134217728);
+@ data_out[i + 1] = (int16_t)((output2 + 2048) >> 12);
+@ }
+@
+@ if (i == data_length - 1) {
+@ int32_t output1 = 0;
+@ int32_t sum1 = 0;
+@
+@ for (j = coefficients_length - 1; j > 1; j -= 2) {
+@ sum1 += coefficients[j] * data_out[i - j];
+@ sum1 += coefficients[j - 1] * data_out[i - j + 1];
+@ }
+@
+@ if (j == 1) {
+@ sum1 += coefficients[1] * data_out[i - 1];
+@ }
+@
+@ output1 = coefficients[0] * data_in[i];
+@ output1 -= sum1;
+@ // Saturate and store the output.
+@ output1 = WEBRTC_SPL_SAT(134215679, output1, -134217728);
+@ data_out[i] = (int16_t)((output1 + 2048) >> 12);
+@ }
+@}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12_mips.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12_mips.c
new file mode 100644
index 00000000..03847018
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12_mips.c
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include <assert.h>
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+void WebRtcSpl_FilterARFastQ12(const int16_t* data_in,
+ int16_t* data_out,
+ const int16_t* __restrict coefficients,
+ size_t coefficients_length,
+ size_t data_length) {
+ int r0, r1, r2, r3;
+ int coef0, offset;
+ int i, j, k;
+ int coefptr, outptr, tmpout, inptr;
+#if !defined(MIPS_DSP_R1_LE)
+ int max16 = 0x7FFF;
+ int min16 = 0xFFFF8000;
+#endif // #if !defined(MIPS_DSP_R1_LE)
+
+ assert(data_length > 0);
+ assert(coefficients_length > 1);
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "addiu %[i], %[data_length], 0 \n\t"
+ "lh %[coef0], 0(%[coefficients]) \n\t"
+ "addiu %[j], %[coefficients_length], -1 \n\t"
+ "andi %[k], %[j], 1 \n\t"
+ "sll %[offset], %[j], 1 \n\t"
+ "subu %[outptr], %[data_out], %[offset] \n\t"
+ "addiu %[inptr], %[data_in], 0 \n\t"
+ "bgtz %[k], 3f \n\t"
+ " addu %[coefptr], %[coefficients], %[offset] \n\t"
+ "1: \n\t"
+ "lh %[r0], 0(%[inptr]) \n\t"
+ "addiu %[i], %[i], -1 \n\t"
+ "addiu %[tmpout], %[outptr], 0 \n\t"
+ "mult %[r0], %[coef0] \n\t"
+ "2: \n\t"
+ "lh %[r0], 0(%[tmpout]) \n\t"
+ "lh %[r1], 0(%[coefptr]) \n\t"
+ "lh %[r2], 2(%[tmpout]) \n\t"
+ "lh %[r3], -2(%[coefptr]) \n\t"
+ "addiu %[tmpout], %[tmpout], 4 \n\t"
+ "msub %[r0], %[r1] \n\t"
+ "msub %[r2], %[r3] \n\t"
+ "addiu %[j], %[j], -2 \n\t"
+ "bgtz %[j], 2b \n\t"
+ " addiu %[coefptr], %[coefptr], -4 \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "extr_r.w %[r0], $ac0, 12 \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "mflo %[r0] \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "addu %[coefptr], %[coefficients], %[offset] \n\t"
+ "addiu %[inptr], %[inptr], 2 \n\t"
+ "addiu %[j], %[coefficients_length], -1 \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "shll_s.w %[r0], %[r0], 16 \n\t"
+ "sra %[r0], %[r0], 16 \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "addiu %[r0], %[r0], 2048 \n\t"
+ "sra %[r0], %[r0], 12 \n\t"
+ "slt %[r1], %[max16], %[r0] \n\t"
+ "movn %[r0], %[max16], %[r1] \n\t"
+ "slt %[r1], %[r0], %[min16] \n\t"
+ "movn %[r0], %[min16], %[r1] \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "sh %[r0], 0(%[tmpout]) \n\t"
+ "bgtz %[i], 1b \n\t"
+ " addiu %[outptr], %[outptr], 2 \n\t"
+ "b 5f \n\t"
+ " nop \n\t"
+ "3: \n\t"
+ "lh %[r0], 0(%[inptr]) \n\t"
+ "addiu %[i], %[i], -1 \n\t"
+ "addiu %[tmpout], %[outptr], 0 \n\t"
+ "mult %[r0], %[coef0] \n\t"
+ "4: \n\t"
+ "lh %[r0], 0(%[tmpout]) \n\t"
+ "lh %[r1], 0(%[coefptr]) \n\t"
+ "lh %[r2], 2(%[tmpout]) \n\t"
+ "lh %[r3], -2(%[coefptr]) \n\t"
+ "addiu %[tmpout], %[tmpout], 4 \n\t"
+ "msub %[r0], %[r1] \n\t"
+ "msub %[r2], %[r3] \n\t"
+ "addiu %[j], %[j], -2 \n\t"
+ "bgtz %[j], 4b \n\t"
+ " addiu %[coefptr], %[coefptr], -4 \n\t"
+ "lh %[r0], 0(%[tmpout]) \n\t"
+ "lh %[r1], 0(%[coefptr]) \n\t"
+ "msub %[r0], %[r1] \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "extr_r.w %[r0], $ac0, 12 \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "mflo %[r0] \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "addu %[coefptr], %[coefficients], %[offset] \n\t"
+ "addiu %[inptr], %[inptr], 2 \n\t"
+ "addiu %[j], %[coefficients_length], -1 \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "shll_s.w %[r0], %[r0], 16 \n\t"
+ "sra %[r0], %[r0], 16 \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "addiu %[r0], %[r0], 2048 \n\t"
+ "sra %[r0], %[r0], 12 \n\t"
+ "slt %[r1], %[max16], %[r0] \n\t"
+ "movn %[r0], %[max16], %[r1] \n\t"
+ "slt %[r1], %[r0], %[min16] \n\t"
+ "movn %[r0], %[min16], %[r1] \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "sh %[r0], 2(%[tmpout]) \n\t"
+ "bgtz %[i], 3b \n\t"
+ " addiu %[outptr], %[outptr], 2 \n\t"
+ "5: \n\t"
+ ".set pop \n\t"
+ : [i] "=&r" (i), [j] "=&r" (j), [k] "=&r" (k), [r0] "=&r" (r0),
+ [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3),
+ [coef0] "=&r" (coef0), [offset] "=&r" (offset),
+ [outptr] "=&r" (outptr), [inptr] "=&r" (inptr),
+ [coefptr] "=&r" (coefptr), [tmpout] "=&r" (tmpout)
+ : [coefficients] "r" (coefficients), [data_length] "r" (data_length),
+ [coefficients_length] "r" (coefficients_length),
+#if !defined(MIPS_DSP_R1_LE)
+ [max16] "r" (max16), [min16] "r" (min16),
+#endif
+ [data_out] "r" (data_out), [data_in] "r" (data_in)
+ : "hi", "lo", "memory"
+ );
+}
+
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ma_fast_q12.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ma_fast_q12.c
new file mode 100644
index 00000000..f4d9a3d3
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ma_fast_q12.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_FilterMAFastQ12().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+void WebRtcSpl_FilterMAFastQ12(const int16_t* in_ptr,
+ int16_t* out_ptr,
+ const int16_t* B,
+ size_t B_length,
+ size_t length)
+{
+ size_t i, j;
+ for (i = 0; i < length; i++)
+ {
+ int32_t o = 0;
+
+ for (j = 0; j < B_length; j++)
+ {
+ o += B[j] * in_ptr[i - j];
+ }
+
+ // If output is higher than 32768, saturate it. Same with negative side
+ // 2^27 = 134217728, which corresponds to 32768 in Q12
+
+ // Saturate the output
+ o = WEBRTC_SPL_SAT((int32_t)134215679, o, (int32_t)-134217728);
+
+ *out_ptr++ = (int16_t)((o + (int32_t)2048) >> 12);
+ }
+ return;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/get_hanning_window.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/get_hanning_window.c
new file mode 100644
index 00000000..d83ac216
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/get_hanning_window.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_GetHanningWindow().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+// Hanning table with 256 entries
+static const int16_t kHanningTable[] = {
+ 1, 2, 6, 10, 15, 22, 30, 39,
+ 50, 62, 75, 89, 104, 121, 138, 157,
+ 178, 199, 222, 246, 271, 297, 324, 353,
+ 383, 413, 446, 479, 513, 549, 586, 624,
+ 663, 703, 744, 787, 830, 875, 920, 967,
+ 1015, 1064, 1114, 1165, 1218, 1271, 1325, 1381,
+ 1437, 1494, 1553, 1612, 1673, 1734, 1796, 1859,
+ 1924, 1989, 2055, 2122, 2190, 2259, 2329, 2399,
+ 2471, 2543, 2617, 2691, 2765, 2841, 2918, 2995,
+ 3073, 3152, 3232, 3312, 3393, 3475, 3558, 3641,
+ 3725, 3809, 3895, 3980, 4067, 4154, 4242, 4330,
+ 4419, 4509, 4599, 4689, 4781, 4872, 4964, 5057,
+ 5150, 5244, 5338, 5432, 5527, 5622, 5718, 5814,
+ 5910, 6007, 6104, 6202, 6299, 6397, 6495, 6594,
+ 6693, 6791, 6891, 6990, 7090, 7189, 7289, 7389,
+ 7489, 7589, 7690, 7790, 7890, 7991, 8091, 8192,
+ 8293, 8393, 8494, 8594, 8694, 8795, 8895, 8995,
+ 9095, 9195, 9294, 9394, 9493, 9593, 9691, 9790,
+ 9889, 9987, 10085, 10182, 10280, 10377, 10474, 10570,
+10666, 10762, 10857, 10952, 11046, 11140, 11234, 11327,
+11420, 11512, 11603, 11695, 11785, 11875, 11965, 12054,
+12142, 12230, 12317, 12404, 12489, 12575, 12659, 12743,
+12826, 12909, 12991, 13072, 13152, 13232, 13311, 13389,
+13466, 13543, 13619, 13693, 13767, 13841, 13913, 13985,
+14055, 14125, 14194, 14262, 14329, 14395, 14460, 14525,
+14588, 14650, 14711, 14772, 14831, 14890, 14947, 15003,
+15059, 15113, 15166, 15219, 15270, 15320, 15369, 15417,
+15464, 15509, 15554, 15597, 15640, 15681, 15721, 15760,
+15798, 15835, 15871, 15905, 15938, 15971, 16001, 16031,
+16060, 16087, 16113, 16138, 16162, 16185, 16206, 16227,
+16246, 16263, 16280, 16295, 16309, 16322, 16334, 16345,
+16354, 16362, 16369, 16374, 16378, 16382, 16383, 16384
+};
+
+void WebRtcSpl_GetHanningWindow(int16_t *v, size_t size)
+{
+ size_t jj;
+ int16_t *vptr1;
+
+ int32_t index;
+ int32_t factor = ((int32_t)0x40000000);
+
+ factor = WebRtcSpl_DivW32W16(factor, (int16_t)size);
+ if (size < 513)
+ index = (int32_t)-0x200000;
+ else
+ index = (int32_t)-0x100000;
+ vptr1 = v;
+
+ for (jj = 0; jj < size; jj++)
+ {
+ index += factor;
+ (*vptr1++) = kHanningTable[index >> 22];
+ }
+
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/get_scaling_square.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/get_scaling_square.c
new file mode 100644
index 00000000..82e3c8b0
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/get_scaling_square.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_GetScalingSquare().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+int16_t WebRtcSpl_GetScalingSquare(int16_t* in_vector,
+ size_t in_vector_length,
+ size_t times)
+{
+ int16_t nbits = WebRtcSpl_GetSizeInBits((uint32_t)times);
+ size_t i;
+ int16_t smax = -1;
+ int16_t sabs;
+ int16_t *sptr = in_vector;
+ int16_t t;
+ size_t looptimes = in_vector_length;
+
+ for (i = looptimes; i > 0; i--)
+ {
+ sabs = (*sptr > 0 ? *sptr++ : -*sptr++);
+ smax = (sabs > smax ? sabs : smax);
+ }
+ t = WebRtcSpl_NormW32(WEBRTC_SPL_MUL(smax, smax));
+
+ if (smax == 0)
+ {
+ return 0; // Since norm(0) returns 0
+ } else
+ {
+ return (t > nbits) ? 0 : nbits - t;
+ }
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/ilbc_specific_functions.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/ilbc_specific_functions.c
new file mode 100644
index 00000000..301a922d
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/ilbc_specific_functions.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains implementations of the iLBC specific functions
+ * WebRtcSpl_ReverseOrderMultArrayElements()
+ * WebRtcSpl_ElementwiseVectorMult()
+ * WebRtcSpl_AddVectorsAndShift()
+ * WebRtcSpl_AddAffineVectorToVector()
+ * WebRtcSpl_AffineTransformVector()
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+void WebRtcSpl_ReverseOrderMultArrayElements(int16_t *out, const int16_t *in,
+ const int16_t *win,
+ size_t vector_length,
+ int16_t right_shifts)
+{
+ size_t i;
+ int16_t *outptr = out;
+ const int16_t *inptr = in;
+ const int16_t *winptr = win;
+ for (i = 0; i < vector_length; i++)
+ {
+ *outptr++ = (int16_t)((*inptr++ * *winptr--) >> right_shifts);
+ }
+}
+
+void WebRtcSpl_ElementwiseVectorMult(int16_t *out, const int16_t *in,
+ const int16_t *win, size_t vector_length,
+ int16_t right_shifts)
+{
+ size_t i;
+ int16_t *outptr = out;
+ const int16_t *inptr = in;
+ const int16_t *winptr = win;
+ for (i = 0; i < vector_length; i++)
+ {
+ *outptr++ = (int16_t)((*inptr++ * *winptr++) >> right_shifts);
+ }
+}
+
+void WebRtcSpl_AddVectorsAndShift(int16_t *out, const int16_t *in1,
+ const int16_t *in2, size_t vector_length,
+ int16_t right_shifts)
+{
+ size_t i;
+ int16_t *outptr = out;
+ const int16_t *in1ptr = in1;
+ const int16_t *in2ptr = in2;
+ for (i = vector_length; i > 0; i--)
+ {
+ (*outptr++) = (int16_t)(((*in1ptr++) + (*in2ptr++)) >> right_shifts);
+ }
+}
+
+void WebRtcSpl_AddAffineVectorToVector(int16_t *out, int16_t *in,
+ int16_t gain, int32_t add_constant,
+ int16_t right_shifts,
+ size_t vector_length)
+{
+ size_t i;
+
+ for (i = 0; i < vector_length; i++)
+ {
+ out[i] += (int16_t)((in[i] * gain + add_constant) >> right_shifts);
+ }
+}
+
+void WebRtcSpl_AffineTransformVector(int16_t *out, int16_t *in,
+ int16_t gain, int32_t add_constant,
+ int16_t right_shifts, size_t vector_length)
+{
+ size_t i;
+
+ for (i = 0; i < vector_length; i++)
+ {
+ out[i] = (int16_t)((in[i] * gain + add_constant) >> right_shifts);
+ }
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/real_fft.h b/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/real_fft.h
new file mode 100644
index 00000000..e7942f04
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/real_fft.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_
+#define WEBRTC_COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_
+
+#include "webrtc/typedefs.h"
+
+// For ComplexFFT(), the maximum fft order is 10;
+// for OpenMax FFT in ARM, it is 12;
+// WebRTC APM uses orders of only 7 and 8.
+enum {kMaxFFTOrder = 10};
+
+struct RealFFT;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct RealFFT* WebRtcSpl_CreateRealFFT(int order);
+void WebRtcSpl_FreeRealFFT(struct RealFFT* self);
+
+// Compute an FFT for a real-valued signal of length of 2^order,
+// where 1 < order <= MAX_FFT_ORDER. Transform length is determined by the
+// specification structure, which must be initialized prior to calling the FFT
+// function with WebRtcSpl_CreateRealFFT().
+// The relationship between the input and output sequences can
+// be expressed in terms of the DFT, i.e.:
+// x[n] = (2^(-scalefactor)/N) . SUM[k=0,...,N-1] X[k].e^(jnk.2.pi/N)
+// n=0,1,2,...N-1
+// N=2^order.
+// The conjugate-symmetric output sequence is represented using a CCS vector,
+// which is of length N+2, and is organized as follows:
+// Index: 0 1 2 3 4 5 . . . N-2 N-1 N N+1
+// Component: R0 0 R1 I1 R2 I2 . . . R[N/2-1] I[N/2-1] R[N/2] 0
+// where R[n] and I[n], respectively, denote the real and imaginary components
+// for FFT bin 'n'. Bins are numbered from 0 to N/2, where N is the FFT length.
+// Bin index 0 corresponds to the DC component, and bin index N/2 corresponds to
+// the foldover frequency.
+//
+// Input Arguments:
+// self - pointer to preallocated and initialized FFT specification structure.
+// real_data_in - the input signal. For an ARM Neon platform, it must be
+// aligned on a 32-byte boundary.
+//
+// Output Arguments:
+// complex_data_out - the output complex signal with (2^order + 2) 16-bit
+// elements. For an ARM Neon platform, it must be different
+// from real_data_in, and aligned on a 32-byte boundary.
+//
+// Return Value:
+// 0 - FFT calculation is successful.
+// -1 - Error with bad arguments (NULL pointers).
+int WebRtcSpl_RealForwardFFT(struct RealFFT* self,
+ const int16_t* real_data_in,
+ int16_t* complex_data_out);
+
+// Compute the inverse FFT for a conjugate-symmetric input sequence of length of
+// 2^order, where 1 < order <= MAX_FFT_ORDER. Transform length is determined by
+// the specification structure, which must be initialized prior to calling the
+// FFT function with WebRtcSpl_CreateRealFFT().
+// For a transform of length M, the input sequence is represented using a packed
+// CCS vector of length M+2, which is explained in the comments for
+// WebRtcSpl_RealForwardFFTC above.
+//
+// Input Arguments:
+// self - pointer to preallocated and initialized FFT specification structure.
+// complex_data_in - the input complex signal with (2^order + 2) 16-bit
+// elements. For an ARM Neon platform, it must be aligned on
+// a 32-byte boundary.
+//
+// Output Arguments:
+// real_data_out - the output real signal. For an ARM Neon platform, it must
+// be different to complex_data_in, and aligned on a 32-byte
+// boundary.
+//
+// Return Value:
+// 0 or a positive number - a value that the elements in the |real_data_out|
+// should be shifted left with in order to get
+// correct physical values.
+// -1 - Error with bad arguments (NULL pointers).
+int WebRtcSpl_RealInverseFFT(struct RealFFT* self,
+ const int16_t* complex_data_in,
+ int16_t* real_data_out);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // WEBRTC_COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/signal_processing_library.h b/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/signal_processing_library.h
new file mode 100644
index 00000000..2e96883e
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/signal_processing_library.h
@@ -0,0 +1,1645 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This header file includes all of the fix point signal processing library (SPL) function
+ * descriptions and declarations.
+ * For specific function calls, see bottom of file.
+ */
+
+#ifndef WEBRTC_SPL_SIGNAL_PROCESSING_LIBRARY_H_
+#define WEBRTC_SPL_SIGNAL_PROCESSING_LIBRARY_H_
+
+#include <string.h>
+#include "webrtc/typedefs.h"
+
+// Macros specific for the fixed point implementation
+#define WEBRTC_SPL_WORD16_MAX 32767
+#define WEBRTC_SPL_WORD16_MIN -32768
+#define WEBRTC_SPL_WORD32_MAX (int32_t)0x7fffffff
+#define WEBRTC_SPL_WORD32_MIN (int32_t)0x80000000
+#define WEBRTC_SPL_MAX_LPC_ORDER 14
+#define WEBRTC_SPL_MIN(A, B) (A < B ? A : B) // Get min value
+#define WEBRTC_SPL_MAX(A, B) (A > B ? A : B) // Get max value
+// TODO(kma/bjorn): For the next two macros, investigate how to correct the code
+// for inputs of a = WEBRTC_SPL_WORD16_MIN or WEBRTC_SPL_WORD32_MIN.
+#define WEBRTC_SPL_ABS_W16(a) \
+ (((int16_t)a >= 0) ? ((int16_t)a) : -((int16_t)a))
+#define WEBRTC_SPL_ABS_W32(a) \
+ (((int32_t)a >= 0) ? ((int32_t)a) : -((int32_t)a))
+
+#define WEBRTC_SPL_MUL(a, b) \
+ ((int32_t) ((int32_t)(a) * (int32_t)(b)))
+#define WEBRTC_SPL_UMUL(a, b) \
+ ((uint32_t) ((uint32_t)(a) * (uint32_t)(b)))
+#define WEBRTC_SPL_UMUL_32_16(a, b) \
+ ((uint32_t) ((uint32_t)(a) * (uint16_t)(b)))
+#define WEBRTC_SPL_MUL_16_U16(a, b) \
+ ((int32_t)(int16_t)(a) * (uint16_t)(b))
+
+#ifndef WEBRTC_ARCH_ARM_V7
+// For ARMv7 platforms, these are inline functions in spl_inl_armv7.h
+#ifndef MIPS32_LE
+// For MIPS platforms, these are inline functions in spl_inl_mips.h
+#define WEBRTC_SPL_MUL_16_16(a, b) \
+ ((int32_t) (((int16_t)(a)) * ((int16_t)(b))))
+#define WEBRTC_SPL_MUL_16_32_RSFT16(a, b) \
+ (WEBRTC_SPL_MUL_16_16(a, b >> 16) \
+ + ((WEBRTC_SPL_MUL_16_16(a, (b & 0xffff) >> 1) + 0x4000) >> 15))
+#endif
+#endif
+
+#define WEBRTC_SPL_MUL_16_32_RSFT11(a, b) \
+ ((WEBRTC_SPL_MUL_16_16(a, (b) >> 16) << 5) \
+ + (((WEBRTC_SPL_MUL_16_U16(a, (uint16_t)(b)) >> 1) + 0x0200) >> 10))
+#define WEBRTC_SPL_MUL_16_32_RSFT14(a, b) \
+ ((WEBRTC_SPL_MUL_16_16(a, (b) >> 16) << 2) \
+ + (((WEBRTC_SPL_MUL_16_U16(a, (uint16_t)(b)) >> 1) + 0x1000) >> 13))
+#define WEBRTC_SPL_MUL_16_32_RSFT15(a, b) \
+ ((WEBRTC_SPL_MUL_16_16(a, (b) >> 16) << 1) \
+ + (((WEBRTC_SPL_MUL_16_U16(a, (uint16_t)(b)) >> 1) + 0x2000) >> 14))
+
+#define WEBRTC_SPL_MUL_16_16_RSFT(a, b, c) \
+ (WEBRTC_SPL_MUL_16_16(a, b) >> (c))
+
+#define WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(a, b, c) \
+ ((WEBRTC_SPL_MUL_16_16(a, b) + ((int32_t) \
+ (((int32_t)1) << ((c) - 1)))) >> (c))
+
+// C + the 32 most significant bits of A * B
+#define WEBRTC_SPL_SCALEDIFF32(A, B, C) \
+ (C + (B >> 16) * A + (((uint32_t)(0x0000FFFF & B) * A) >> 16))
+
+#define WEBRTC_SPL_SAT(a, b, c) (b > a ? a : b < c ? c : b)
+
+// Shifting with negative numbers allowed
+// Positive means left shift
+#define WEBRTC_SPL_SHIFT_W32(x, c) \
+ (((c) >= 0) ? ((x) << (c)) : ((x) >> (-(c))))
+
+// Shifting with negative numbers not allowed
+// We cannot do casting here due to signed/unsigned problem
+#define WEBRTC_SPL_LSHIFT_W32(x, c) ((x) << (c))
+
+#define WEBRTC_SPL_RSHIFT_U32(x, c) ((uint32_t)(x) >> (c))
+
+#define WEBRTC_SPL_RAND(a) \
+ ((int16_t)((((int16_t)a * 18816) >> 7) & 0x00007fff))
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define WEBRTC_SPL_MEMCPY_W16(v1, v2, length) \
+ memcpy(v1, v2, (length) * sizeof(int16_t))
+
+// inline functions:
+#include "webrtc/common_audio/signal_processing/include/spl_inl.h"
+
+// Initialize SPL. Currently it contains only function pointer initialization.
+// If the underlying platform is known to be ARM-Neon (WEBRTC_HAS_NEON defined),
+// the pointers will be assigned to code optimized for Neon; otherwise
+// if run-time Neon detection (WEBRTC_DETECT_NEON) is enabled, the pointers
+// will be assigned to either Neon code or generic C code; otherwise, generic C
+// code will be assigned.
+// Note that this function MUST be called in any application that uses SPL
+// functions.
+void WebRtcSpl_Init();
+
+int16_t WebRtcSpl_GetScalingSquare(int16_t* in_vector,
+ size_t in_vector_length,
+ size_t times);
+
+// Copy and set operations. Implementation in copy_set_operations.c.
+// Descriptions at bottom of file.
+void WebRtcSpl_MemSetW16(int16_t* vector,
+ int16_t set_value,
+ size_t vector_length);
+void WebRtcSpl_MemSetW32(int32_t* vector,
+ int32_t set_value,
+ size_t vector_length);
+void WebRtcSpl_MemCpyReversedOrder(int16_t* out_vector,
+ int16_t* in_vector,
+ size_t vector_length);
+void WebRtcSpl_CopyFromEndW16(const int16_t* in_vector,
+ size_t in_vector_length,
+ size_t samples,
+ int16_t* out_vector);
+void WebRtcSpl_ZerosArrayW16(int16_t* vector,
+ size_t vector_length);
+void WebRtcSpl_ZerosArrayW32(int32_t* vector,
+ size_t vector_length);
+// End: Copy and set operations.
+
+
+// Minimum and maximum operation functions and their pointers.
+// Implementation in min_max_operations.c.
+
+// Returns the largest absolute value in a signed 16-bit vector.
+//
+// Input:
+// - vector : 16-bit input vector.
+// - length : Number of samples in vector.
+//
+// Return value : Maximum absolute value in vector.
+typedef int16_t (*MaxAbsValueW16)(const int16_t* vector, size_t length);
+extern MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16;
+int16_t WebRtcSpl_MaxAbsValueW16C(const int16_t* vector, size_t length);
+#if (defined WEBRTC_DETECT_NEON) || (defined WEBRTC_HAS_NEON)
+int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, size_t length);
+#endif
+#if defined(MIPS32_LE)
+int16_t WebRtcSpl_MaxAbsValueW16_mips(const int16_t* vector, size_t length);
+#endif
+
+// Returns the largest absolute value in a signed 32-bit vector.
+//
+// Input:
+// - vector : 32-bit input vector.
+// - length : Number of samples in vector.
+//
+// Return value : Maximum absolute value in vector.
+typedef int32_t (*MaxAbsValueW32)(const int32_t* vector, size_t length);
+extern MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32;
+int32_t WebRtcSpl_MaxAbsValueW32C(const int32_t* vector, size_t length);
+#if (defined WEBRTC_DETECT_NEON) || (defined WEBRTC_HAS_NEON)
+int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, size_t length);
+#endif
+#if defined(MIPS_DSP_R1_LE)
+int32_t WebRtcSpl_MaxAbsValueW32_mips(const int32_t* vector, size_t length);
+#endif
+
+// Returns the maximum value of a 16-bit vector.
+//
+// Input:
+// - vector : 16-bit input vector.
+// - length : Number of samples in vector.
+//
+// Return value : Maximum sample value in |vector|.
+typedef int16_t (*MaxValueW16)(const int16_t* vector, size_t length);
+extern MaxValueW16 WebRtcSpl_MaxValueW16;
+int16_t WebRtcSpl_MaxValueW16C(const int16_t* vector, size_t length);
+#if (defined WEBRTC_DETECT_NEON) || (defined WEBRTC_HAS_NEON)
+int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, size_t length);
+#endif
+#if defined(MIPS32_LE)
+int16_t WebRtcSpl_MaxValueW16_mips(const int16_t* vector, size_t length);
+#endif
+
+// Returns the maximum value of a 32-bit vector.
+//
+// Input:
+// - vector : 32-bit input vector.
+// - length : Number of samples in vector.
+//
+// Return value : Maximum sample value in |vector|.
+typedef int32_t (*MaxValueW32)(const int32_t* vector, size_t length);
+extern MaxValueW32 WebRtcSpl_MaxValueW32;
+int32_t WebRtcSpl_MaxValueW32C(const int32_t* vector, size_t length);
+#if (defined WEBRTC_DETECT_NEON) || (defined WEBRTC_HAS_NEON)
+int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, size_t length);
+#endif
+#if defined(MIPS32_LE)
+int32_t WebRtcSpl_MaxValueW32_mips(const int32_t* vector, size_t length);
+#endif
+
+// Returns the minimum value of a 16-bit vector.
+//
+// Input:
+// - vector : 16-bit input vector.
+// - length : Number of samples in vector.
+//
+// Return value : Minimum sample value in |vector|.
+typedef int16_t (*MinValueW16)(const int16_t* vector, size_t length);
+extern MinValueW16 WebRtcSpl_MinValueW16;
+int16_t WebRtcSpl_MinValueW16C(const int16_t* vector, size_t length);
+#if (defined WEBRTC_DETECT_NEON) || (defined WEBRTC_HAS_NEON)
+int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, size_t length);
+#endif
+#if defined(MIPS32_LE)
+int16_t WebRtcSpl_MinValueW16_mips(const int16_t* vector, size_t length);
+#endif
+
+// Returns the minimum value of a 32-bit vector.
+//
+// Input:
+// - vector : 32-bit input vector.
+// - length : Number of samples in vector.
+//
+// Return value : Minimum sample value in |vector|.
+typedef int32_t (*MinValueW32)(const int32_t* vector, size_t length);
+extern MinValueW32 WebRtcSpl_MinValueW32;
+int32_t WebRtcSpl_MinValueW32C(const int32_t* vector, size_t length);
+#if (defined WEBRTC_DETECT_NEON) || (defined WEBRTC_HAS_NEON)
+int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, size_t length);
+#endif
+#if defined(MIPS32_LE)
+int32_t WebRtcSpl_MinValueW32_mips(const int32_t* vector, size_t length);
+#endif
+
+// Returns the vector index to the largest absolute value of a 16-bit vector.
+//
+// Input:
+// - vector : 16-bit input vector.
+// - length : Number of samples in vector.
+//
+// Return value : Index to the maximum absolute value in vector.
+// If there are multiple equal maxima, return the index of the
+// first. -32768 will always have precedence over 32767 (despite
+// -32768 presenting an int16 absolute value of 32767).
+size_t WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, size_t length);
+
+// Returns the vector index to the maximum sample value of a 16-bit vector.
+//
+// Input:
+// - vector : 16-bit input vector.
+// - length : Number of samples in vector.
+//
+// Return value : Index to the maximum value in vector (if multiple
+// indexes have the maximum, return the first).
+size_t WebRtcSpl_MaxIndexW16(const int16_t* vector, size_t length);
+
+// Returns the vector index to the maximum sample value of a 32-bit vector.
+//
+// Input:
+// - vector : 32-bit input vector.
+// - length : Number of samples in vector.
+//
+// Return value : Index to the maximum value in vector (if multiple
+// indexes have the maximum, return the first).
+size_t WebRtcSpl_MaxIndexW32(const int32_t* vector, size_t length);
+
+// Returns the vector index to the minimum sample value of a 16-bit vector.
+//
+// Input:
+// - vector : 16-bit input vector.
+// - length : Number of samples in vector.
+//
+// Return value : Index to the mimimum value in vector (if multiple
+// indexes have the minimum, return the first).
+size_t WebRtcSpl_MinIndexW16(const int16_t* vector, size_t length);
+
+// Returns the vector index to the minimum sample value of a 32-bit vector.
+//
+// Input:
+// - vector : 32-bit input vector.
+// - length : Number of samples in vector.
+//
+// Return value : Index to the mimimum value in vector (if multiple
+// indexes have the minimum, return the first).
+size_t WebRtcSpl_MinIndexW32(const int32_t* vector, size_t length);
+
+// End: Minimum and maximum operations.
+
+
+// Vector scaling operations. Implementation in vector_scaling_operations.c.
+// Description at bottom of file.
+void WebRtcSpl_VectorBitShiftW16(int16_t* out_vector,
+ size_t vector_length,
+ const int16_t* in_vector,
+ int16_t right_shifts);
+void WebRtcSpl_VectorBitShiftW32(int32_t* out_vector,
+ size_t vector_length,
+ const int32_t* in_vector,
+ int16_t right_shifts);
+void WebRtcSpl_VectorBitShiftW32ToW16(int16_t* out_vector,
+ size_t vector_length,
+ const int32_t* in_vector,
+ int right_shifts);
+void WebRtcSpl_ScaleVector(const int16_t* in_vector,
+ int16_t* out_vector,
+ int16_t gain,
+ size_t vector_length,
+ int16_t right_shifts);
+void WebRtcSpl_ScaleVectorWithSat(const int16_t* in_vector,
+ int16_t* out_vector,
+ int16_t gain,
+ size_t vector_length,
+ int16_t right_shifts);
+void WebRtcSpl_ScaleAndAddVectors(const int16_t* in_vector1,
+ int16_t gain1, int right_shifts1,
+ const int16_t* in_vector2,
+ int16_t gain2, int right_shifts2,
+ int16_t* out_vector,
+ size_t vector_length);
+
+// The functions (with related pointer) perform the vector operation:
+// out_vector[k] = ((scale1 * in_vector1[k]) + (scale2 * in_vector2[k])
+// + round_value) >> right_shifts,
+// where round_value = (1 << right_shifts) >> 1.
+//
+// Input:
+// - in_vector1 : Input vector 1
+// - in_vector1_scale : Gain to be used for vector 1
+// - in_vector2 : Input vector 2
+// - in_vector2_scale : Gain to be used for vector 2
+// - right_shifts : Number of right bit shifts to be applied
+// - length : Number of elements in the input vectors
+//
+// Output:
+// - out_vector : Output vector
+// Return value : 0 if OK, -1 if (in_vector1 == NULL
+// || in_vector2 == NULL || out_vector == NULL
+// || length <= 0 || right_shift < 0).
+typedef int (*ScaleAndAddVectorsWithRound)(const int16_t* in_vector1,
+ int16_t in_vector1_scale,
+ const int16_t* in_vector2,
+ int16_t in_vector2_scale,
+ int right_shifts,
+ int16_t* out_vector,
+ size_t length);
+extern ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound;
+int WebRtcSpl_ScaleAndAddVectorsWithRoundC(const int16_t* in_vector1,
+ int16_t in_vector1_scale,
+ const int16_t* in_vector2,
+ int16_t in_vector2_scale,
+ int right_shifts,
+ int16_t* out_vector,
+ size_t length);
+#if defined(MIPS_DSP_R1_LE)
+int WebRtcSpl_ScaleAndAddVectorsWithRound_mips(const int16_t* in_vector1,
+ int16_t in_vector1_scale,
+ const int16_t* in_vector2,
+ int16_t in_vector2_scale,
+ int right_shifts,
+ int16_t* out_vector,
+ size_t length);
+#endif
+// End: Vector scaling operations.
+
+// iLBC specific functions. Implementations in ilbc_specific_functions.c.
+// Description at bottom of file.
+void WebRtcSpl_ReverseOrderMultArrayElements(int16_t* out_vector,
+ const int16_t* in_vector,
+ const int16_t* window,
+ size_t vector_length,
+ int16_t right_shifts);
+void WebRtcSpl_ElementwiseVectorMult(int16_t* out_vector,
+ const int16_t* in_vector,
+ const int16_t* window,
+ size_t vector_length,
+ int16_t right_shifts);
+void WebRtcSpl_AddVectorsAndShift(int16_t* out_vector,
+ const int16_t* in_vector1,
+ const int16_t* in_vector2,
+ size_t vector_length,
+ int16_t right_shifts);
+void WebRtcSpl_AddAffineVectorToVector(int16_t* out_vector,
+ int16_t* in_vector,
+ int16_t gain,
+ int32_t add_constant,
+ int16_t right_shifts,
+ size_t vector_length);
+void WebRtcSpl_AffineTransformVector(int16_t* out_vector,
+ int16_t* in_vector,
+ int16_t gain,
+ int32_t add_constant,
+ int16_t right_shifts,
+ size_t vector_length);
+// End: iLBC specific functions.
+
+// Signal processing operations.
+
+// A 32-bit fix-point implementation of auto-correlation computation
+//
+// Input:
+// - in_vector : Vector to calculate autocorrelation upon
+// - in_vector_length : Length (in samples) of |vector|
+// - order : The order up to which the autocorrelation should be
+// calculated
+//
+// Output:
+// - result : auto-correlation values (values should be seen
+// relative to each other since the absolute values
+// might have been down shifted to avoid overflow)
+//
+// - scale : The number of left shifts required to obtain the
+// auto-correlation in Q0
+//
+// Return value : Number of samples in |result|, i.e. (order+1)
+size_t WebRtcSpl_AutoCorrelation(const int16_t* in_vector,
+ size_t in_vector_length,
+ size_t order,
+ int32_t* result,
+ int* scale);
+
+// A 32-bit fix-point implementation of the Levinson-Durbin algorithm that
+// does NOT use the 64 bit class
+//
+// Input:
+// - auto_corr : Vector with autocorrelation values of length >= |order|+1
+// - order : The LPC filter order (support up to order 20)
+//
+// Output:
+// - lpc_coef : lpc_coef[0..order] LPC coefficients in Q12
+// - refl_coef : refl_coef[0...order-1]| Reflection coefficients in Q15
+//
+// Return value : 1 for stable 0 for unstable
+int16_t WebRtcSpl_LevinsonDurbin(const int32_t* auto_corr,
+ int16_t* lpc_coef,
+ int16_t* refl_coef,
+ size_t order);
+
+// Converts reflection coefficients |refl_coef| to LPC coefficients |lpc_coef|.
+// This version is a 16 bit operation.
+//
+// NOTE: The 16 bit refl_coef -> lpc_coef conversion might result in a
+// "slightly unstable" filter (i.e., a pole just outside the unit circle) in
+// "rare" cases even if the reflection coefficients are stable.
+//
+// Input:
+// - refl_coef : Reflection coefficients in Q15 that should be converted
+// to LPC coefficients
+// - use_order : Number of coefficients in |refl_coef|
+//
+// Output:
+// - lpc_coef : LPC coefficients in Q12
+void WebRtcSpl_ReflCoefToLpc(const int16_t* refl_coef,
+ int use_order,
+ int16_t* lpc_coef);
+
+// Converts LPC coefficients |lpc_coef| to reflection coefficients |refl_coef|.
+// This version is a 16 bit operation.
+// The conversion is implemented by the step-down algorithm.
+//
+// Input:
+// - lpc_coef : LPC coefficients in Q12, that should be converted to
+// reflection coefficients
+// - use_order : Number of coefficients in |lpc_coef|
+//
+// Output:
+// - refl_coef : Reflection coefficients in Q15.
+void WebRtcSpl_LpcToReflCoef(int16_t* lpc_coef,
+ int use_order,
+ int16_t* refl_coef);
+
+// Calculates reflection coefficients (16 bit) from auto-correlation values
+//
+// Input:
+// - auto_corr : Auto-correlation values
+// - use_order : Number of coefficients wanted be calculated
+//
+// Output:
+// - refl_coef : Reflection coefficients in Q15.
+void WebRtcSpl_AutoCorrToReflCoef(const int32_t* auto_corr,
+ int use_order,
+ int16_t* refl_coef);
+
+// The functions (with related pointer) calculate the cross-correlation between
+// two sequences |seq1| and |seq2|.
+// |seq1| is fixed and |seq2| slides as the pointer is increased with the
+// amount |step_seq2|. Note the arguments should obey the relationship:
+// |dim_seq| - 1 + |step_seq2| * (|dim_cross_correlation| - 1) <
+// buffer size of |seq2|
+//
+// Input:
+// - seq1 : First sequence (fixed throughout the correlation)
+// - seq2 : Second sequence (slides |step_vector2| for each
+// new correlation)
+// - dim_seq : Number of samples to use in the cross-correlation
+// - dim_cross_correlation : Number of cross-correlations to calculate (the
+// start position for |vector2| is updated for each
+// new one)
+// - right_shifts : Number of right bit shifts to use. This will
+// become the output Q-domain.
+// - step_seq2 : How many (positive or negative) steps the
+// |vector2| pointer should be updated for each new
+// cross-correlation value.
+//
+// Output:
+// - cross_correlation : The cross-correlation in Q(-right_shifts)
+typedef void (*CrossCorrelation)(int32_t* cross_correlation,
+ const int16_t* seq1,
+ const int16_t* seq2,
+ size_t dim_seq,
+ size_t dim_cross_correlation,
+ int right_shifts,
+ int step_seq2);
+extern CrossCorrelation WebRtcSpl_CrossCorrelation;
+void WebRtcSpl_CrossCorrelationC(int32_t* cross_correlation,
+ const int16_t* seq1,
+ const int16_t* seq2,
+ size_t dim_seq,
+ size_t dim_cross_correlation,
+ int right_shifts,
+ int step_seq2);
+#if (defined WEBRTC_DETECT_NEON) || (defined WEBRTC_HAS_NEON)
+void WebRtcSpl_CrossCorrelationNeon(int32_t* cross_correlation,
+ const int16_t* seq1,
+ const int16_t* seq2,
+ size_t dim_seq,
+ size_t dim_cross_correlation,
+ int right_shifts,
+ int step_seq2);
+#endif
+#if defined(MIPS32_LE)
+void WebRtcSpl_CrossCorrelation_mips(int32_t* cross_correlation,
+ const int16_t* seq1,
+ const int16_t* seq2,
+ size_t dim_seq,
+ size_t dim_cross_correlation,
+ int right_shifts,
+ int step_seq2);
+#endif
+
+// Creates (the first half of) a Hanning window. Size must be at least 1 and
+// at most 512.
+//
+// Input:
+// - size : Length of the requested Hanning window (1 to 512)
+//
+// Output:
+// - window : Hanning vector in Q14.
+void WebRtcSpl_GetHanningWindow(int16_t* window, size_t size);
+
+// Calculates y[k] = sqrt(1 - x[k]^2) for each element of the input vector
+// |in_vector|. Input and output values are in Q15.
+//
+// Inputs:
+// - in_vector : Values to calculate sqrt(1 - x^2) of
+// - vector_length : Length of vector |in_vector|
+//
+// Output:
+// - out_vector : Output values in Q15
+void WebRtcSpl_SqrtOfOneMinusXSquared(int16_t* in_vector,
+ size_t vector_length,
+ int16_t* out_vector);
+// End: Signal processing operations.
+
+// Randomization functions. Implementations collected in
+// randomization_functions.c and descriptions at bottom of this file.
+int16_t WebRtcSpl_RandU(uint32_t* seed);
+int16_t WebRtcSpl_RandN(uint32_t* seed);
+int16_t WebRtcSpl_RandUArray(int16_t* vector,
+ int16_t vector_length,
+ uint32_t* seed);
+// End: Randomization functions.
+
+// Math functions
+int32_t WebRtcSpl_Sqrt(int32_t value);
+int32_t WebRtcSpl_SqrtFloor(int32_t value);
+
+// Divisions. Implementations collected in division_operations.c and
+// descriptions at bottom of this file.
+uint32_t WebRtcSpl_DivU32U16(uint32_t num, uint16_t den);
+int32_t WebRtcSpl_DivW32W16(int32_t num, int16_t den);
+int16_t WebRtcSpl_DivW32W16ResW16(int32_t num, int16_t den);
+int32_t WebRtcSpl_DivResultInQ31(int32_t num, int32_t den);
+int32_t WebRtcSpl_DivW32HiLow(int32_t num, int16_t den_hi, int16_t den_low);
+// End: Divisions.
+
+int32_t WebRtcSpl_Energy(int16_t* vector,
+ size_t vector_length,
+ int* scale_factor);
+
+// Calculates the dot product between two (int16_t) vectors.
+//
+// Input:
+// - vector1 : Vector 1
+// - vector2 : Vector 2
+// - vector_length : Number of samples used in the dot product
+// - scaling : The number of right bit shifts to apply on each term
+// during calculation to avoid overflow, i.e., the
+// output will be in Q(-|scaling|)
+//
+// Return value : The dot product in Q(-scaling)
+int32_t WebRtcSpl_DotProductWithScale(const int16_t* vector1,
+ const int16_t* vector2,
+ size_t length,
+ int scaling);
+
+// Filter operations.
+size_t WebRtcSpl_FilterAR(const int16_t* ar_coef,
+ size_t ar_coef_length,
+ const int16_t* in_vector,
+ size_t in_vector_length,
+ int16_t* filter_state,
+ size_t filter_state_length,
+ int16_t* filter_state_low,
+ size_t filter_state_low_length,
+ int16_t* out_vector,
+ int16_t* out_vector_low,
+ size_t out_vector_low_length);
+
+// WebRtcSpl_FilterMAFastQ12(...)
+//
+// Performs a MA filtering on a vector in Q12
+//
+// Input:
+// - in_vector : Input samples (state in positions
+// in_vector[-order] .. in_vector[-1])
+// - ma_coef : Filter coefficients (in Q12)
+// - ma_coef_length : Number of B coefficients (order+1)
+// - vector_length : Number of samples to be filtered
+//
+// Output:
+// - out_vector : Filtered samples
+//
+void WebRtcSpl_FilterMAFastQ12(const int16_t* in_vector,
+ int16_t* out_vector,
+ const int16_t* ma_coef,
+ size_t ma_coef_length,
+ size_t vector_length);
+
+// Performs a AR filtering on a vector in Q12
+// Input:
+// - data_in : Input samples
+// - data_out : State information in positions
+// data_out[-order] .. data_out[-1]
+// - coefficients : Filter coefficients (in Q12)
+// - coefficients_length: Number of coefficients (order+1)
+// - data_length : Number of samples to be filtered
+// Output:
+// - data_out : Filtered samples
+void WebRtcSpl_FilterARFastQ12(const int16_t* data_in,
+ int16_t* data_out,
+ const int16_t* __restrict coefficients,
+ size_t coefficients_length,
+ size_t data_length);
+
+// The functions (with related pointer) perform a MA down sampling filter
+// on a vector.
+// Input:
+// - data_in : Input samples (state in positions
+// data_in[-order] .. data_in[-1])
+// - data_in_length : Number of samples in |data_in| to be filtered.
+// This must be at least
+// |delay| + |factor|*(|out_vector_length|-1) + 1)
+// - data_out_length : Number of down sampled samples desired
+// - coefficients : Filter coefficients (in Q12)
+// - coefficients_length: Number of coefficients (order+1)
+// - factor : Decimation factor
+// - delay : Delay of filter (compensated for in out_vector)
+// Output:
+// - data_out : Filtered samples
+// Return value : 0 if OK, -1 if |in_vector| is too short
+typedef int (*DownsampleFast)(const int16_t* data_in,
+ size_t data_in_length,
+ int16_t* data_out,
+ size_t data_out_length,
+ const int16_t* __restrict coefficients,
+ size_t coefficients_length,
+ int factor,
+ size_t delay);
+extern DownsampleFast WebRtcSpl_DownsampleFast;
+int WebRtcSpl_DownsampleFastC(const int16_t* data_in,
+ size_t data_in_length,
+ int16_t* data_out,
+ size_t data_out_length,
+ const int16_t* __restrict coefficients,
+ size_t coefficients_length,
+ int factor,
+ size_t delay);
+#if (defined WEBRTC_DETECT_NEON) || (defined WEBRTC_HAS_NEON)
+int WebRtcSpl_DownsampleFastNeon(const int16_t* data_in,
+ size_t data_in_length,
+ int16_t* data_out,
+ size_t data_out_length,
+ const int16_t* __restrict coefficients,
+ size_t coefficients_length,
+ int factor,
+ size_t delay);
+#endif
+#if defined(MIPS32_LE)
+int WebRtcSpl_DownsampleFast_mips(const int16_t* data_in,
+ size_t data_in_length,
+ int16_t* data_out,
+ size_t data_out_length,
+ const int16_t* __restrict coefficients,
+ size_t coefficients_length,
+ int factor,
+ size_t delay);
+#endif
+
+// End: Filter operations.
+
+// FFT operations
+
+int WebRtcSpl_ComplexFFT(int16_t vector[], int stages, int mode);
+int WebRtcSpl_ComplexIFFT(int16_t vector[], int stages, int mode);
+
+// Treat a 16-bit complex data buffer |complex_data| as an array of 32-bit
+// values, and swap elements whose indexes are bit-reverses of each other.
+//
+// Input:
+// - complex_data : Complex data buffer containing 2^|stages| real
+// elements interleaved with 2^|stages| imaginary
+// elements: [Re Im Re Im Re Im....]
+// - stages : Number of FFT stages. Must be at least 3 and at most
+// 10, since the table WebRtcSpl_kSinTable1024[] is 1024
+// elements long.
+//
+// Output:
+// - complex_data : The complex data buffer.
+
+void WebRtcSpl_ComplexBitReverse(int16_t* __restrict complex_data, int stages);
+
+// End: FFT operations
+
+/************************************************************
+ *
+ * RESAMPLING FUNCTIONS AND THEIR STRUCTS ARE DEFINED BELOW
+ *
+ ************************************************************/
+
+/*******************************************************************
+ * resample.c
+ *
+ * Includes the following resampling combinations
+ * 22 kHz -> 16 kHz
+ * 16 kHz -> 22 kHz
+ * 22 kHz -> 8 kHz
+ * 8 kHz -> 22 kHz
+ *
+ ******************************************************************/
+
+// state structure for 22 -> 16 resampler
+typedef struct {
+ int32_t S_22_44[8];
+ int32_t S_44_32[8];
+ int32_t S_32_16[8];
+} WebRtcSpl_State22khzTo16khz;
+
+void WebRtcSpl_Resample22khzTo16khz(const int16_t* in,
+ int16_t* out,
+ WebRtcSpl_State22khzTo16khz* state,
+ int32_t* tmpmem);
+
+void WebRtcSpl_ResetResample22khzTo16khz(WebRtcSpl_State22khzTo16khz* state);
+
+// state structure for 16 -> 22 resampler
+typedef struct {
+ int32_t S_16_32[8];
+ int32_t S_32_22[8];
+} WebRtcSpl_State16khzTo22khz;
+
+void WebRtcSpl_Resample16khzTo22khz(const int16_t* in,
+ int16_t* out,
+ WebRtcSpl_State16khzTo22khz* state,
+ int32_t* tmpmem);
+
+void WebRtcSpl_ResetResample16khzTo22khz(WebRtcSpl_State16khzTo22khz* state);
+
+// state structure for 22 -> 8 resampler
+typedef struct {
+ int32_t S_22_22[16];
+ int32_t S_22_16[8];
+ int32_t S_16_8[8];
+} WebRtcSpl_State22khzTo8khz;
+
+void WebRtcSpl_Resample22khzTo8khz(const int16_t* in, int16_t* out,
+ WebRtcSpl_State22khzTo8khz* state,
+ int32_t* tmpmem);
+
+void WebRtcSpl_ResetResample22khzTo8khz(WebRtcSpl_State22khzTo8khz* state);
+
+// state structure for 8 -> 22 resampler
+typedef struct {
+ int32_t S_8_16[8];
+ int32_t S_16_11[8];
+ int32_t S_11_22[8];
+} WebRtcSpl_State8khzTo22khz;
+
+void WebRtcSpl_Resample8khzTo22khz(const int16_t* in, int16_t* out,
+ WebRtcSpl_State8khzTo22khz* state,
+ int32_t* tmpmem);
+
+void WebRtcSpl_ResetResample8khzTo22khz(WebRtcSpl_State8khzTo22khz* state);
+
+/*******************************************************************
+ * resample_fractional.c
+ * Functions for internal use in the other resample functions
+ *
+ * Includes the following resampling combinations
+ * 48 kHz -> 32 kHz
+ * 32 kHz -> 24 kHz
+ * 44 kHz -> 32 kHz
+ *
+ ******************************************************************/
+
+void WebRtcSpl_Resample48khzTo32khz(const int32_t* In, int32_t* Out, size_t K);
+
+void WebRtcSpl_Resample32khzTo24khz(const int32_t* In, int32_t* Out, size_t K);
+
+void WebRtcSpl_Resample44khzTo32khz(const int32_t* In, int32_t* Out, size_t K);
+
+/*******************************************************************
+ * resample_48khz.c
+ *
+ * Includes the following resampling combinations
+ * 48 kHz -> 16 kHz
+ * 16 kHz -> 48 kHz
+ * 48 kHz -> 8 kHz
+ * 8 kHz -> 48 kHz
+ *
+ ******************************************************************/
+
+typedef struct {
+ int32_t S_48_48[16];
+ int32_t S_48_32[8];
+ int32_t S_32_16[8];
+} WebRtcSpl_State48khzTo16khz;
+
+void WebRtcSpl_Resample48khzTo16khz(const int16_t* in, int16_t* out,
+ WebRtcSpl_State48khzTo16khz* state,
+ int32_t* tmpmem);
+
+void WebRtcSpl_ResetResample48khzTo16khz(WebRtcSpl_State48khzTo16khz* state);
+
+typedef struct {
+ int32_t S_16_32[8];
+ int32_t S_32_24[8];
+ int32_t S_24_48[8];
+} WebRtcSpl_State16khzTo48khz;
+
+void WebRtcSpl_Resample16khzTo48khz(const int16_t* in, int16_t* out,
+ WebRtcSpl_State16khzTo48khz* state,
+ int32_t* tmpmem);
+
+void WebRtcSpl_ResetResample16khzTo48khz(WebRtcSpl_State16khzTo48khz* state);
+
+typedef struct {
+ int32_t S_48_24[8];
+ int32_t S_24_24[16];
+ int32_t S_24_16[8];
+ int32_t S_16_8[8];
+} WebRtcSpl_State48khzTo8khz;
+
+void WebRtcSpl_Resample48khzTo8khz(const int16_t* in, int16_t* out,
+ WebRtcSpl_State48khzTo8khz* state,
+ int32_t* tmpmem);
+
+void WebRtcSpl_ResetResample48khzTo8khz(WebRtcSpl_State48khzTo8khz* state);
+
+typedef struct {
+ int32_t S_8_16[8];
+ int32_t S_16_12[8];
+ int32_t S_12_24[8];
+ int32_t S_24_48[8];
+} WebRtcSpl_State8khzTo48khz;
+
+void WebRtcSpl_Resample8khzTo48khz(const int16_t* in, int16_t* out,
+ WebRtcSpl_State8khzTo48khz* state,
+ int32_t* tmpmem);
+
+void WebRtcSpl_ResetResample8khzTo48khz(WebRtcSpl_State8khzTo48khz* state);
+
+/*******************************************************************
+ * resample_by_2.c
+ *
+ * Includes down and up sampling by a factor of two.
+ *
+ ******************************************************************/
+
+void WebRtcSpl_DownsampleBy2(const int16_t* in, size_t len,
+ int16_t* out, int32_t* filtState);
+
+void WebRtcSpl_UpsampleBy2(const int16_t* in, size_t len,
+ int16_t* out, int32_t* filtState);
+
+/************************************************************
+ * END OF RESAMPLING FUNCTIONS
+ ************************************************************/
+void WebRtcSpl_AnalysisQMF(const int16_t* in_data,
+ size_t in_data_length,
+ int16_t* low_band,
+ int16_t* high_band,
+ int32_t* filter_state1,
+ int32_t* filter_state2);
+void WebRtcSpl_SynthesisQMF(const int16_t* low_band,
+ const int16_t* high_band,
+ size_t band_length,
+ int16_t* out_data,
+ int32_t* filter_state1,
+ int32_t* filter_state2);
+
+#ifdef __cplusplus
+}
+#endif // __cplusplus
+#endif // WEBRTC_SPL_SIGNAL_PROCESSING_LIBRARY_H_
+
+//
+// WebRtcSpl_AddSatW16(...)
+// WebRtcSpl_AddSatW32(...)
+//
+// Returns the result of a saturated 16-bit, respectively 32-bit, addition of
+// the numbers specified by the |var1| and |var2| parameters.
+//
+// Input:
+// - var1 : Input variable 1
+// - var2 : Input variable 2
+//
+// Return value : Added and saturated value
+//
+
+//
+// WebRtcSpl_SubSatW16(...)
+// WebRtcSpl_SubSatW32(...)
+//
+// Returns the result of a saturated 16-bit, respectively 32-bit, subtraction
+// of the numbers specified by the |var1| and |var2| parameters.
+//
+// Input:
+// - var1 : Input variable 1
+// - var2 : Input variable 2
+//
+// Returned value : Subtracted and saturated value
+//
+
+//
+// WebRtcSpl_GetSizeInBits(...)
+//
+// Returns the # of bits that are needed at the most to represent the number
+// specified by the |value| parameter.
+//
+// Input:
+// - value : Input value
+//
+// Return value : Number of bits needed to represent |value|
+//
+
+//
+// WebRtcSpl_NormW32(...)
+//
+// Norm returns the # of left shifts required to 32-bit normalize the 32-bit
+// signed number specified by the |value| parameter.
+//
+// Input:
+// - value : Input value
+//
+// Return value : Number of bit shifts needed to 32-bit normalize |value|
+//
+
+//
+// WebRtcSpl_NormW16(...)
+//
+// Norm returns the # of left shifts required to 16-bit normalize the 16-bit
+// signed number specified by the |value| parameter.
+//
+// Input:
+// - value : Input value
+//
+// Return value : Number of bit shifts needed to 32-bit normalize |value|
+//
+
+//
+// WebRtcSpl_NormU32(...)
+//
+// Norm returns the # of left shifts required to 32-bit normalize the unsigned
+// 32-bit number specified by the |value| parameter.
+//
+// Input:
+// - value : Input value
+//
+// Return value : Number of bit shifts needed to 32-bit normalize |value|
+//
+
+//
+// WebRtcSpl_GetScalingSquare(...)
+//
+// Returns the # of bits required to scale the samples specified in the
+// |in_vector| parameter so that, if the squares of the samples are added the
+// # of times specified by the |times| parameter, the 32-bit addition will not
+// overflow (result in int32_t).
+//
+// Input:
+// - in_vector : Input vector to check scaling on
+// - in_vector_length : Samples in |in_vector|
+// - times : Number of additions to be performed
+//
+// Return value : Number of right bit shifts needed to avoid
+// overflow in the addition calculation
+//
+
+//
+// WebRtcSpl_MemSetW16(...)
+//
+// Sets all the values in the int16_t vector |vector| of length
+// |vector_length| to the specified value |set_value|
+//
+// Input:
+// - vector : Pointer to the int16_t vector
+// - set_value : Value specified
+// - vector_length : Length of vector
+//
+
+//
+// WebRtcSpl_MemSetW32(...)
+//
+// Sets all the values in the int32_t vector |vector| of length
+// |vector_length| to the specified value |set_value|
+//
+// Input:
+// - vector : Pointer to the int16_t vector
+// - set_value : Value specified
+// - vector_length : Length of vector
+//
+
+//
+// WebRtcSpl_MemCpyReversedOrder(...)
+//
+// Copies all the values from the source int16_t vector |in_vector| to a
+// destination int16_t vector |out_vector|. It is done in reversed order,
+// meaning that the first sample of |in_vector| is copied to the last sample of
+// the |out_vector|. The procedure continues until the last sample of
+// |in_vector| has been copied to the first sample of |out_vector|. This
+// creates a reversed vector. Used in e.g. prediction in iLBC.
+//
+// Input:
+// - in_vector : Pointer to the first sample in a int16_t vector
+// of length |length|
+// - vector_length : Number of elements to copy
+//
+// Output:
+// - out_vector : Pointer to the last sample in a int16_t vector
+// of length |length|
+//
+
+//
+// WebRtcSpl_CopyFromEndW16(...)
+//
+// Copies the rightmost |samples| of |in_vector| (of length |in_vector_length|)
+// to the vector |out_vector|.
+//
+// Input:
+// - in_vector : Input vector
+// - in_vector_length : Number of samples in |in_vector|
+// - samples : Number of samples to extract (from right side)
+// from |in_vector|
+//
+// Output:
+// - out_vector : Vector with the requested samples
+//
+
+//
+// WebRtcSpl_ZerosArrayW16(...)
+// WebRtcSpl_ZerosArrayW32(...)
+//
+// Inserts the value "zero" in all positions of a w16 and a w32 vector
+// respectively.
+//
+// Input:
+// - vector_length : Number of samples in vector
+//
+// Output:
+// - vector : Vector containing all zeros
+//
+
+//
+// WebRtcSpl_VectorBitShiftW16(...)
+// WebRtcSpl_VectorBitShiftW32(...)
+//
+// Bit shifts all the values in a vector up or downwards. Different calls for
+// int16_t and int32_t vectors respectively.
+//
+// Input:
+// - vector_length : Length of vector
+// - in_vector : Pointer to the vector that should be bit shifted
+// - right_shifts : Number of right bit shifts (negative value gives left
+// shifts)
+//
+// Output:
+// - out_vector : Pointer to the result vector (can be the same as
+// |in_vector|)
+//
+
+//
+// WebRtcSpl_VectorBitShiftW32ToW16(...)
+//
+// Bit shifts all the values in a int32_t vector up or downwards and
+// stores the result as an int16_t vector. The function will saturate the
+// signal if needed, before storing in the output vector.
+//
+// Input:
+// - vector_length : Length of vector
+// - in_vector : Pointer to the vector that should be bit shifted
+// - right_shifts : Number of right bit shifts (negative value gives left
+// shifts)
+//
+// Output:
+// - out_vector : Pointer to the result vector (can be the same as
+// |in_vector|)
+//
+
+//
+// WebRtcSpl_ScaleVector(...)
+//
+// Performs the vector operation:
+// out_vector[k] = (gain*in_vector[k])>>right_shifts
+//
+// Input:
+// - in_vector : Input vector
+// - gain : Scaling gain
+// - vector_length : Elements in the |in_vector|
+// - right_shifts : Number of right bit shifts applied
+//
+// Output:
+// - out_vector : Output vector (can be the same as |in_vector|)
+//
+
+//
+// WebRtcSpl_ScaleVectorWithSat(...)
+//
+// Performs the vector operation:
+// out_vector[k] = SATURATE( (gain*in_vector[k])>>right_shifts )
+//
+// Input:
+// - in_vector : Input vector
+// - gain : Scaling gain
+// - vector_length : Elements in the |in_vector|
+// - right_shifts : Number of right bit shifts applied
+//
+// Output:
+// - out_vector : Output vector (can be the same as |in_vector|)
+//
+
+//
+// WebRtcSpl_ScaleAndAddVectors(...)
+//
+// Performs the vector operation:
+// out_vector[k] = (gain1*in_vector1[k])>>right_shifts1
+// + (gain2*in_vector2[k])>>right_shifts2
+//
+// Input:
+// - in_vector1 : Input vector 1
+// - gain1 : Gain to be used for vector 1
+// - right_shifts1 : Right bit shift to be used for vector 1
+// - in_vector2 : Input vector 2
+// - gain2 : Gain to be used for vector 2
+// - right_shifts2 : Right bit shift to be used for vector 2
+// - vector_length : Elements in the input vectors
+//
+// Output:
+// - out_vector : Output vector
+//
+
+//
+// WebRtcSpl_ReverseOrderMultArrayElements(...)
+//
+// Performs the vector operation:
+// out_vector[n] = (in_vector[n]*window[-n])>>right_shifts
+//
+// Input:
+// - in_vector : Input vector
+// - window : Window vector (should be reversed). The pointer
+// should be set to the last value in the vector
+// - right_shifts : Number of right bit shift to be applied after the
+// multiplication
+// - vector_length : Number of elements in |in_vector|
+//
+// Output:
+// - out_vector : Output vector (can be same as |in_vector|)
+//
+
+//
+// WebRtcSpl_ElementwiseVectorMult(...)
+//
+// Performs the vector operation:
+// out_vector[n] = (in_vector[n]*window[n])>>right_shifts
+//
+// Input:
+// - in_vector : Input vector
+// - window : Window vector.
+// - right_shifts : Number of right bit shift to be applied after the
+// multiplication
+// - vector_length : Number of elements in |in_vector|
+//
+// Output:
+// - out_vector : Output vector (can be same as |in_vector|)
+//
+
+//
+// WebRtcSpl_AddVectorsAndShift(...)
+//
+// Performs the vector operation:
+// out_vector[k] = (in_vector1[k] + in_vector2[k])>>right_shifts
+//
+// Input:
+// - in_vector1 : Input vector 1
+// - in_vector2 : Input vector 2
+// - right_shifts : Number of right bit shift to be applied after the
+// multiplication
+// - vector_length : Number of elements in |in_vector1| and |in_vector2|
+//
+// Output:
+// - out_vector : Output vector (can be same as |in_vector1|)
+//
+
+//
+// WebRtcSpl_AddAffineVectorToVector(...)
+//
+// Adds an affine transformed vector to another vector |out_vector|, i.e,
+// performs
+// out_vector[k] += (in_vector[k]*gain+add_constant)>>right_shifts
+//
+// Input:
+// - in_vector : Input vector
+// - gain : Gain value, used to multiply the in vector with
+// - add_constant : Constant value to add (usually 1<<(right_shifts-1),
+// but others can be used as well
+// - right_shifts : Number of right bit shifts (0-16)
+// - vector_length : Number of samples in |in_vector| and |out_vector|
+//
+// Output:
+// - out_vector : Vector with the output
+//
+
+//
+// WebRtcSpl_AffineTransformVector(...)
+//
+// Affine transforms a vector, i.e, performs
+// out_vector[k] = (in_vector[k]*gain+add_constant)>>right_shifts
+//
+// Input:
+// - in_vector : Input vector
+// - gain : Gain value, used to multiply the in vector with
+// - add_constant : Constant value to add (usually 1<<(right_shifts-1),
+// but others can be used as well
+// - right_shifts : Number of right bit shifts (0-16)
+// - vector_length : Number of samples in |in_vector| and |out_vector|
+//
+// Output:
+// - out_vector : Vector with the output
+//
+
+//
+// WebRtcSpl_IncreaseSeed(...)
+//
+// Increases the seed (and returns the new value)
+//
+// Input:
+// - seed : Seed for random calculation
+//
+// Output:
+// - seed : Updated seed value
+//
+// Return value : The new seed value
+//
+
+//
+// WebRtcSpl_RandU(...)
+//
+// Produces a uniformly distributed value in the int16_t range
+//
+// Input:
+// - seed : Seed for random calculation
+//
+// Output:
+// - seed : Updated seed value
+//
+// Return value : Uniformly distributed value in the range
+// [Word16_MIN...Word16_MAX]
+//
+
+//
+// WebRtcSpl_RandN(...)
+//
+// Produces a normal distributed value in the int16_t range
+//
+// Input:
+// - seed : Seed for random calculation
+//
+// Output:
+// - seed : Updated seed value
+//
+// Return value : N(0,1) value in the Q13 domain
+//
+
+//
+// WebRtcSpl_RandUArray(...)
+//
+// Produces a uniformly distributed vector with elements in the int16_t
+// range
+//
+// Input:
+// - vector_length : Samples wanted in the vector
+// - seed : Seed for random calculation
+//
+// Output:
+// - vector : Vector with the uniform values
+// - seed : Updated seed value
+//
+// Return value : Number of samples in vector, i.e., |vector_length|
+//
+
+//
+// WebRtcSpl_Sqrt(...)
+//
+// Returns the square root of the input value |value|. The precision of this
+// function is integer precision, i.e., sqrt(8) gives 2 as answer.
+// If |value| is a negative number then 0 is returned.
+//
+// Algorithm:
+//
+// A sixth order Taylor Series expansion is used here to compute the square
+// root of a number y^0.5 = (1+x)^0.5
+// where
+// x = y-1
+// = 1+(x/2)-0.5*((x/2)^2+0.5*((x/2)^3-0.625*((x/2)^4+0.875*((x/2)^5)
+// 0.5 <= x < 1
+//
+// Input:
+// - value : Value to calculate sqrt of
+//
+// Return value : Result of the sqrt calculation
+//
+
+//
+// WebRtcSpl_SqrtFloor(...)
+//
+// Returns the square root of the input value |value|. The precision of this
+// function is rounding down integer precision, i.e., sqrt(8) gives 2 as answer.
+// If |value| is a negative number then 0 is returned.
+//
+// Algorithm:
+//
+// An iterative 4 cylce/bit routine
+//
+// Input:
+// - value : Value to calculate sqrt of
+//
+// Return value : Result of the sqrt calculation
+//
+
+//
+// WebRtcSpl_DivU32U16(...)
+//
+// Divides a uint32_t |num| by a uint16_t |den|.
+//
+// If |den|==0, (uint32_t)0xFFFFFFFF is returned.
+//
+// Input:
+// - num : Numerator
+// - den : Denominator
+//
+// Return value : Result of the division (as a uint32_t), i.e., the
+// integer part of num/den.
+//
+
+//
+// WebRtcSpl_DivW32W16(...)
+//
+// Divides a int32_t |num| by a int16_t |den|.
+//
+// If |den|==0, (int32_t)0x7FFFFFFF is returned.
+//
+// Input:
+// - num : Numerator
+// - den : Denominator
+//
+// Return value : Result of the division (as a int32_t), i.e., the
+// integer part of num/den.
+//
+
+//
+// WebRtcSpl_DivW32W16ResW16(...)
+//
+// Divides a int32_t |num| by a int16_t |den|, assuming that the
+// result is less than 32768, otherwise an unpredictable result will occur.
+//
+// If |den|==0, (int16_t)0x7FFF is returned.
+//
+// Input:
+// - num : Numerator
+// - den : Denominator
+//
+// Return value : Result of the division (as a int16_t), i.e., the
+// integer part of num/den.
+//
+
+//
+// WebRtcSpl_DivResultInQ31(...)
+//
+// Divides a int32_t |num| by a int16_t |den|, assuming that the
+// absolute value of the denominator is larger than the numerator, otherwise
+// an unpredictable result will occur.
+//
+// Input:
+// - num : Numerator
+// - den : Denominator
+//
+// Return value : Result of the division in Q31.
+//
+
+//
+// WebRtcSpl_DivW32HiLow(...)
+//
+// Divides a int32_t |num| by a denominator in hi, low format. The
+// absolute value of the denominator has to be larger (or equal to) the
+// numerator.
+//
+// Input:
+// - num : Numerator
+// - den_hi : High part of denominator
+// - den_low : Low part of denominator
+//
+// Return value : Divided value in Q31
+//
+
+//
+// WebRtcSpl_Energy(...)
+//
+// Calculates the energy of a vector
+//
+// Input:
+// - vector : Vector which the energy should be calculated on
+// - vector_length : Number of samples in vector
+//
+// Output:
+// - scale_factor : Number of left bit shifts needed to get the physical
+// energy value, i.e, to get the Q0 value
+//
+// Return value : Energy value in Q(-|scale_factor|)
+//
+
+//
+// WebRtcSpl_FilterAR(...)
+//
+// Performs a 32-bit AR filtering on a vector in Q12
+//
+// Input:
+// - ar_coef : AR-coefficient vector (values in Q12),
+// ar_coef[0] must be 4096.
+// - ar_coef_length : Number of coefficients in |ar_coef|.
+// - in_vector : Vector to be filtered.
+// - in_vector_length : Number of samples in |in_vector|.
+// - filter_state : Current state (higher part) of the filter.
+// - filter_state_length : Length (in samples) of |filter_state|.
+// - filter_state_low : Current state (lower part) of the filter.
+// - filter_state_low_length : Length (in samples) of |filter_state_low|.
+// - out_vector_low_length : Maximum length (in samples) of
+// |out_vector_low|.
+//
+// Output:
+// - filter_state : Updated state (upper part) vector.
+// - filter_state_low : Updated state (lower part) vector.
+// - out_vector : Vector containing the upper part of the
+// filtered values.
+// - out_vector_low : Vector containing the lower part of the
+// filtered values.
+//
+// Return value : Number of samples in the |out_vector|.
+//
+
+//
+// WebRtcSpl_ComplexIFFT(...)
+//
+// Complex Inverse FFT
+//
+// Computes an inverse complex 2^|stages|-point FFT on the input vector, which
+// is in bit-reversed order. The original content of the vector is destroyed in
+// the process, since the input is overwritten by the output, normal-ordered,
+// FFT vector. With X as the input complex vector, y as the output complex
+// vector and with M = 2^|stages|, the following is computed:
+//
+// M-1
+// y(k) = sum[X(i)*[cos(2*pi*i*k/M) + j*sin(2*pi*i*k/M)]]
+// i=0
+//
+// The implementations are optimized for speed, not for code size. It uses the
+// decimation-in-time algorithm with radix-2 butterfly technique.
+//
+// Input:
+// - vector : In pointer to complex vector containing 2^|stages|
+// real elements interleaved with 2^|stages| imaginary
+// elements.
+// [ReImReImReIm....]
+// The elements are in Q(-scale) domain, see more on Return
+// Value below.
+//
+// - stages : Number of FFT stages. Must be at least 3 and at most 10,
+// since the table WebRtcSpl_kSinTable1024[] is 1024
+// elements long.
+//
+// - mode : This parameter gives the user to choose how the FFT
+// should work.
+// mode==0: Low-complexity and Low-accuracy mode
+// mode==1: High-complexity and High-accuracy mode
+//
+// Output:
+// - vector : Out pointer to the FFT vector (the same as input).
+//
+// Return Value : The scale value that tells the number of left bit shifts
+// that the elements in the |vector| should be shifted with
+// in order to get Q0 values, i.e. the physically correct
+// values. The scale parameter is always 0 or positive,
+// except if N>1024 (|stages|>10), which returns a scale
+// value of -1, indicating error.
+//
+
+//
+// WebRtcSpl_ComplexFFT(...)
+//
+// Complex FFT
+//
+// Computes a complex 2^|stages|-point FFT on the input vector, which is in
+// bit-reversed order. The original content of the vector is destroyed in
+// the process, since the input is overwritten by the output, normal-ordered,
+// FFT vector. With x as the input complex vector, Y as the output complex
+// vector and with M = 2^|stages|, the following is computed:
+//
+// M-1
+// Y(k) = 1/M * sum[x(i)*[cos(2*pi*i*k/M) + j*sin(2*pi*i*k/M)]]
+// i=0
+//
+// The implementations are optimized for speed, not for code size. It uses the
+// decimation-in-time algorithm with radix-2 butterfly technique.
+//
+// This routine prevents overflow by scaling by 2 before each FFT stage. This is
+// a fixed scaling, for proper normalization - there will be log2(n) passes, so
+// this results in an overall factor of 1/n, distributed to maximize arithmetic
+// accuracy.
+//
+// Input:
+// - vector : In pointer to complex vector containing 2^|stages| real
+// elements interleaved with 2^|stages| imaginary elements.
+// [ReImReImReIm....]
+// The output is in the Q0 domain.
+//
+// - stages : Number of FFT stages. Must be at least 3 and at most 10,
+// since the table WebRtcSpl_kSinTable1024[] is 1024
+// elements long.
+//
+// - mode : This parameter gives the user to choose how the FFT
+// should work.
+// mode==0: Low-complexity and Low-accuracy mode
+// mode==1: High-complexity and High-accuracy mode
+//
+// Output:
+// - vector : The output FFT vector is in the Q0 domain.
+//
+// Return value : The scale parameter is always 0, except if N>1024,
+// which returns a scale value of -1, indicating error.
+//
+
+//
+// WebRtcSpl_AnalysisQMF(...)
+//
+// Splits a 0-2*F Hz signal into two sub bands: 0-F Hz and F-2*F Hz. The
+// current version has F = 8000, therefore, a super-wideband audio signal is
+// split to lower-band 0-8 kHz and upper-band 8-16 kHz.
+//
+// Input:
+// - in_data : Wide band speech signal, 320 samples (10 ms)
+//
+// Input & Output:
+// - filter_state1 : Filter state for first All-pass filter
+// - filter_state2 : Filter state for second All-pass filter
+//
+// Output:
+// - low_band : Lower-band signal 0-8 kHz band, 160 samples (10 ms)
+// - high_band : Upper-band signal 8-16 kHz band (flipped in frequency
+// domain), 160 samples (10 ms)
+//
+
+//
+// WebRtcSpl_SynthesisQMF(...)
+//
+// Combines the two sub bands (0-F and F-2*F Hz) into a signal of 0-2*F
+// Hz, (current version has F = 8000 Hz). So the filter combines lower-band
+// (0-8 kHz) and upper-band (8-16 kHz) channels to obtain super-wideband 0-16
+// kHz audio.
+//
+// Input:
+// - low_band : The signal with the 0-8 kHz band, 160 samples (10 ms)
+// - high_band : The signal with the 8-16 kHz band, 160 samples (10 ms)
+//
+// Input & Output:
+// - filter_state1 : Filter state for first All-pass filter
+// - filter_state2 : Filter state for second All-pass filter
+//
+// Output:
+// - out_data : Super-wideband speech signal, 0-16 kHz
+//
+
+// int16_t WebRtcSpl_SatW32ToW16(...)
+//
+// This function saturates a 32-bit word into a 16-bit word.
+//
+// Input:
+// - value32 : The value of a 32-bit word.
+//
+// Output:
+// - out16 : the saturated 16-bit word.
+//
+
+// int32_t WebRtc_MulAccumW16(...)
+//
+// This function multiply a 16-bit word by a 16-bit word, and accumulate this
+// value to a 32-bit integer.
+//
+// Input:
+// - a : The value of the first 16-bit word.
+// - b : The value of the second 16-bit word.
+// - c : The value of an 32-bit integer.
+//
+// Return Value: The value of a * b + c.
+//
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl.h b/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl.h
new file mode 100644
index 00000000..d3cc6dee
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl.h
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+// This header file includes the inline functions in
+// the fix point signal processing library.
+
+#ifndef WEBRTC_SPL_SPL_INL_H_
+#define WEBRTC_SPL_SPL_INL_H_
+
+#ifdef WEBRTC_ARCH_ARM_V7
+#include "webrtc/common_audio/signal_processing/include/spl_inl_armv7.h"
+#else
+
+#if defined(MIPS32_LE)
+#include "webrtc/common_audio/signal_processing/include/spl_inl_mips.h"
+#endif
+
+#if !defined(MIPS_DSP_R1_LE)
+static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) {
+ int16_t out16 = (int16_t) value32;
+
+ if (value32 > 32767)
+ out16 = 32767;
+ else if (value32 < -32768)
+ out16 = -32768;
+
+ return out16;
+}
+
+static __inline int32_t WebRtcSpl_AddSatW32(int32_t l_var1, int32_t l_var2) {
+ int32_t l_sum;
+
+ // Perform long addition
+ l_sum = l_var1 + l_var2;
+
+ if (l_var1 < 0) { // Check for underflow.
+ if ((l_var2 < 0) && (l_sum >= 0)) {
+ l_sum = (int32_t)0x80000000;
+ }
+ } else { // Check for overflow.
+ if ((l_var2 > 0) && (l_sum < 0)) {
+ l_sum = (int32_t)0x7FFFFFFF;
+ }
+ }
+
+ return l_sum;
+}
+
+static __inline int32_t WebRtcSpl_SubSatW32(int32_t l_var1, int32_t l_var2) {
+ int32_t l_diff;
+
+ // Perform subtraction.
+ l_diff = l_var1 - l_var2;
+
+ if (l_var1 < 0) { // Check for underflow.
+ if ((l_var2 > 0) && (l_diff > 0)) {
+ l_diff = (int32_t)0x80000000;
+ }
+ } else { // Check for overflow.
+ if ((l_var2 < 0) && (l_diff < 0)) {
+ l_diff = (int32_t)0x7FFFFFFF;
+ }
+ }
+
+ return l_diff;
+}
+
+static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) {
+ return WebRtcSpl_SatW32ToW16((int32_t) a + (int32_t) b);
+}
+
+static __inline int16_t WebRtcSpl_SubSatW16(int16_t var1, int16_t var2) {
+ return WebRtcSpl_SatW32ToW16((int32_t) var1 - (int32_t) var2);
+}
+#endif // #if !defined(MIPS_DSP_R1_LE)
+
+#if !defined(MIPS32_LE)
+static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) {
+ int16_t bits;
+
+ if (0xFFFF0000 & n) {
+ bits = 16;
+ } else {
+ bits = 0;
+ }
+ if (0x0000FF00 & (n >> bits)) bits += 8;
+ if (0x000000F0 & (n >> bits)) bits += 4;
+ if (0x0000000C & (n >> bits)) bits += 2;
+ if (0x00000002 & (n >> bits)) bits += 1;
+ if (0x00000001 & (n >> bits)) bits += 1;
+
+ return bits;
+}
+
+static __inline int16_t WebRtcSpl_NormW32(int32_t a) {
+ int16_t zeros;
+
+ if (a == 0) {
+ return 0;
+ }
+ else if (a < 0) {
+ a = ~a;
+ }
+
+ if (!(0xFFFF8000 & a)) {
+ zeros = 16;
+ } else {
+ zeros = 0;
+ }
+ if (!(0xFF800000 & (a << zeros))) zeros += 8;
+ if (!(0xF8000000 & (a << zeros))) zeros += 4;
+ if (!(0xE0000000 & (a << zeros))) zeros += 2;
+ if (!(0xC0000000 & (a << zeros))) zeros += 1;
+
+ return zeros;
+}
+
+static __inline int16_t WebRtcSpl_NormU32(uint32_t a) {
+ int16_t zeros;
+
+ if (a == 0) return 0;
+
+ if (!(0xFFFF0000 & a)) {
+ zeros = 16;
+ } else {
+ zeros = 0;
+ }
+ if (!(0xFF000000 & (a << zeros))) zeros += 8;
+ if (!(0xF0000000 & (a << zeros))) zeros += 4;
+ if (!(0xC0000000 & (a << zeros))) zeros += 2;
+ if (!(0x80000000 & (a << zeros))) zeros += 1;
+
+ return zeros;
+}
+
+static __inline int16_t WebRtcSpl_NormW16(int16_t a) {
+ int16_t zeros;
+
+ if (a == 0) {
+ return 0;
+ }
+ else if (a < 0) {
+ a = ~a;
+ }
+
+ if (!(0xFF80 & a)) {
+ zeros = 8;
+ } else {
+ zeros = 0;
+ }
+ if (!(0xF800 & (a << zeros))) zeros += 4;
+ if (!(0xE000 & (a << zeros))) zeros += 2;
+ if (!(0xC000 & (a << zeros))) zeros += 1;
+
+ return zeros;
+}
+
+static __inline int32_t WebRtc_MulAccumW16(int16_t a, int16_t b, int32_t c) {
+ return (a * b + c);
+}
+#endif // #if !defined(MIPS32_LE)
+
+#endif // WEBRTC_ARCH_ARM_V7
+
+#endif // WEBRTC_SPL_SPL_INL_H_
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl_armv7.h b/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl_armv7.h
new file mode 100644
index 00000000..27188011
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl_armv7.h
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/* This header file includes the inline functions for ARM processors in
+ * the fix point signal processing library.
+ */
+
+#ifndef WEBRTC_SPL_SPL_INL_ARMV7_H_
+#define WEBRTC_SPL_SPL_INL_ARMV7_H_
+
+/* TODO(kma): Replace some assembly code with GCC intrinsics
+ * (e.g. __builtin_clz).
+ */
+
+/* This function produces result that is not bit exact with that by the generic
+ * C version in some cases, although the former is at least as accurate as the
+ * later.
+ */
+static __inline int32_t WEBRTC_SPL_MUL_16_32_RSFT16(int16_t a, int32_t b) {
+ int32_t tmp = 0;
+ __asm __volatile ("smulwb %0, %1, %2":"=r"(tmp):"r"(b), "r"(a));
+ return tmp;
+}
+
+static __inline int32_t WEBRTC_SPL_MUL_16_16(int16_t a, int16_t b) {
+ int32_t tmp = 0;
+ __asm __volatile ("smulbb %0, %1, %2":"=r"(tmp):"r"(a), "r"(b));
+ return tmp;
+}
+
+// TODO(kma): add unit test.
+static __inline int32_t WebRtc_MulAccumW16(int16_t a, int16_t b, int32_t c) {
+ int32_t tmp = 0;
+ __asm __volatile ("smlabb %0, %1, %2, %3":"=r"(tmp):"r"(a), "r"(b), "r"(c));
+ return tmp;
+}
+
+static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) {
+ int32_t s_sum = 0;
+
+ __asm __volatile ("qadd16 %0, %1, %2":"=r"(s_sum):"r"(a), "r"(b));
+
+ return (int16_t) s_sum;
+}
+
+static __inline int32_t WebRtcSpl_AddSatW32(int32_t l_var1, int32_t l_var2) {
+ int32_t l_sum = 0;
+
+ __asm __volatile ("qadd %0, %1, %2":"=r"(l_sum):"r"(l_var1), "r"(l_var2));
+
+ return l_sum;
+}
+
+static __inline int32_t WebRtcSpl_SubSatW32(int32_t l_var1, int32_t l_var2) {
+ int32_t l_sub = 0;
+
+ __asm __volatile ("qsub %0, %1, %2":"=r"(l_sub):"r"(l_var1), "r"(l_var2));
+
+ return l_sub;
+}
+
+static __inline int16_t WebRtcSpl_SubSatW16(int16_t var1, int16_t var2) {
+ int32_t s_sub = 0;
+
+ __asm __volatile ("qsub16 %0, %1, %2":"=r"(s_sub):"r"(var1), "r"(var2));
+
+ return (int16_t)s_sub;
+}
+
+static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) {
+ int32_t tmp = 0;
+
+ __asm __volatile ("clz %0, %1":"=r"(tmp):"r"(n));
+
+ return (int16_t)(32 - tmp);
+}
+
+static __inline int16_t WebRtcSpl_NormW32(int32_t a) {
+ int32_t tmp = 0;
+
+ if (a == 0) {
+ return 0;
+ }
+ else if (a < 0) {
+ a ^= 0xFFFFFFFF;
+ }
+
+ __asm __volatile ("clz %0, %1":"=r"(tmp):"r"(a));
+
+ return (int16_t)(tmp - 1);
+}
+
+static __inline int16_t WebRtcSpl_NormU32(uint32_t a) {
+ int tmp = 0;
+
+ if (a == 0) return 0;
+
+ __asm __volatile ("clz %0, %1":"=r"(tmp):"r"(a));
+
+ return (int16_t)tmp;
+}
+
+static __inline int16_t WebRtcSpl_NormW16(int16_t a) {
+ int32_t tmp = 0;
+ int32_t a_32 = a;
+
+ if (a_32 == 0) {
+ return 0;
+ }
+ else if (a_32 < 0) {
+ a_32 ^= 0xFFFFFFFF;
+ }
+
+ __asm __volatile ("clz %0, %1":"=r"(tmp):"r"(a_32));
+
+ return (int16_t)(tmp - 17);
+}
+
+// TODO(kma): add unit test.
+static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) {
+ int32_t out = 0;
+
+ __asm __volatile ("ssat %0, #16, %1" : "=r"(out) : "r"(value32));
+
+ return (int16_t)out;
+}
+
+#endif // WEBRTC_SPL_SPL_INL_ARMV7_H_
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl_mips.h b/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl_mips.h
new file mode 100644
index 00000000..cd04bddc
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl_mips.h
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+// This header file includes the inline functions in
+// the fix point signal processing library.
+
+#ifndef WEBRTC_SPL_SPL_INL_MIPS_H_
+#define WEBRTC_SPL_SPL_INL_MIPS_H_
+
+static __inline int32_t WEBRTC_SPL_MUL_16_16(int32_t a,
+ int32_t b) {
+ int32_t value32 = 0;
+ int32_t a1 = 0, b1 = 0;
+
+ __asm __volatile(
+#if defined(MIPS32_R2_LE)
+ "seh %[a1], %[a] \n\t"
+ "seh %[b1], %[b] \n\t"
+#else
+ "sll %[a1], %[a], 16 \n\t"
+ "sll %[b1], %[b], 16 \n\t"
+ "sra %[a1], %[a1], 16 \n\t"
+ "sra %[b1], %[b1], 16 \n\t"
+#endif
+ "mul %[value32], %[a1], %[b1] \n\t"
+ : [value32] "=r" (value32), [a1] "=&r" (a1), [b1] "=&r" (b1)
+ : [a] "r" (a), [b] "r" (b)
+ : "hi", "lo"
+ );
+ return value32;
+}
+
+static __inline int32_t WEBRTC_SPL_MUL_16_32_RSFT16(int16_t a,
+ int32_t b) {
+ int32_t value32 = 0, b1 = 0, b2 = 0;
+ int32_t a1 = 0;
+
+ __asm __volatile(
+#if defined(MIPS32_R2_LE)
+ "seh %[a1], %[a] \n\t"
+#else
+ "sll %[a1], %[a], 16 \n\t"
+ "sra %[a1], %[a1], 16 \n\t"
+#endif
+ "andi %[b2], %[b], 0xFFFF \n\t"
+ "sra %[b1], %[b], 16 \n\t"
+ "sra %[b2], %[b2], 1 \n\t"
+ "mul %[value32], %[a1], %[b1] \n\t"
+ "mul %[b2], %[a1], %[b2] \n\t"
+ "addiu %[b2], %[b2], 0x4000 \n\t"
+ "sra %[b2], %[b2], 15 \n\t"
+ "addu %[value32], %[value32], %[b2] \n\t"
+ : [value32] "=&r" (value32), [b1] "=&r" (b1), [b2] "=&r" (b2),
+ [a1] "=&r" (a1)
+ : [a] "r" (a), [b] "r" (b)
+ : "hi", "lo"
+ );
+ return value32;
+}
+
+#if defined(MIPS_DSP_R1_LE)
+static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) {
+ __asm __volatile(
+ "shll_s.w %[value32], %[value32], 16 \n\t"
+ "sra %[value32], %[value32], 16 \n\t"
+ : [value32] "+r" (value32)
+ :
+ );
+ int16_t out16 = (int16_t)value32;
+ return out16;
+}
+
+static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) {
+ int32_t value32 = 0;
+
+ __asm __volatile(
+ "addq_s.ph %[value32], %[a], %[b] \n\t"
+ : [value32] "=r" (value32)
+ : [a] "r" (a), [b] "r" (b)
+ );
+ return (int16_t)value32;
+}
+
+static __inline int32_t WebRtcSpl_AddSatW32(int32_t l_var1, int32_t l_var2) {
+ int32_t l_sum;
+
+ __asm __volatile(
+ "addq_s.w %[l_sum], %[l_var1], %[l_var2] \n\t"
+ : [l_sum] "=r" (l_sum)
+ : [l_var1] "r" (l_var1), [l_var2] "r" (l_var2)
+ );
+
+ return l_sum;
+}
+
+static __inline int16_t WebRtcSpl_SubSatW16(int16_t var1, int16_t var2) {
+ int32_t value32;
+
+ __asm __volatile(
+ "subq_s.ph %[value32], %[var1], %[var2] \n\t"
+ : [value32] "=r" (value32)
+ : [var1] "r" (var1), [var2] "r" (var2)
+ );
+
+ return (int16_t)value32;
+}
+
+static __inline int32_t WebRtcSpl_SubSatW32(int32_t l_var1, int32_t l_var2) {
+ int32_t l_diff;
+
+ __asm __volatile(
+ "subq_s.w %[l_diff], %[l_var1], %[l_var2] \n\t"
+ : [l_diff] "=r" (l_diff)
+ : [l_var1] "r" (l_var1), [l_var2] "r" (l_var2)
+ );
+
+ return l_diff;
+}
+#endif
+
+static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) {
+ int bits = 0;
+ int i32 = 32;
+
+ __asm __volatile(
+ "clz %[bits], %[n] \n\t"
+ "subu %[bits], %[i32], %[bits] \n\t"
+ : [bits] "=&r" (bits)
+ : [n] "r" (n), [i32] "r" (i32)
+ );
+
+ return (int16_t)bits;
+}
+
+static __inline int16_t WebRtcSpl_NormW32(int32_t a) {
+ int zeros = 0;
+
+ __asm __volatile(
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "bnez %[a], 1f \n\t"
+ " sra %[zeros], %[a], 31 \n\t"
+ "b 2f \n\t"
+ " move %[zeros], $zero \n\t"
+ "1: \n\t"
+ "xor %[zeros], %[a], %[zeros] \n\t"
+ "clz %[zeros], %[zeros] \n\t"
+ "addiu %[zeros], %[zeros], -1 \n\t"
+ "2: \n\t"
+ ".set pop \n\t"
+ : [zeros]"=&r"(zeros)
+ : [a] "r" (a)
+ );
+
+ return (int16_t)zeros;
+}
+
+static __inline int16_t WebRtcSpl_NormU32(uint32_t a) {
+ int zeros = 0;
+
+ __asm __volatile(
+ "clz %[zeros], %[a] \n\t"
+ : [zeros] "=r" (zeros)
+ : [a] "r" (a)
+ );
+
+ return (int16_t)(zeros & 0x1f);
+}
+
+static __inline int16_t WebRtcSpl_NormW16(int16_t a) {
+ int zeros = 0;
+ int a0 = a << 16;
+
+ __asm __volatile(
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "bnez %[a0], 1f \n\t"
+ " sra %[zeros], %[a0], 31 \n\t"
+ "b 2f \n\t"
+ " move %[zeros], $zero \n\t"
+ "1: \n\t"
+ "xor %[zeros], %[a0], %[zeros] \n\t"
+ "clz %[zeros], %[zeros] \n\t"
+ "addiu %[zeros], %[zeros], -1 \n\t"
+ "2: \n\t"
+ ".set pop \n\t"
+ : [zeros]"=&r"(zeros)
+ : [a0] "r" (a0)
+ );
+
+ return (int16_t)zeros;
+}
+
+static __inline int32_t WebRtc_MulAccumW16(int16_t a,
+ int16_t b,
+ int32_t c) {
+ int32_t res = 0, c1 = 0;
+ __asm __volatile(
+#if defined(MIPS32_R2_LE)
+ "seh %[a], %[a] \n\t"
+ "seh %[b], %[b] \n\t"
+#else
+ "sll %[a], %[a], 16 \n\t"
+ "sll %[b], %[b], 16 \n\t"
+ "sra %[a], %[a], 16 \n\t"
+ "sra %[b], %[b], 16 \n\t"
+#endif
+ "mul %[res], %[a], %[b] \n\t"
+ "addu %[c1], %[c], %[res] \n\t"
+ : [c1] "=r" (c1), [res] "=&r" (res)
+ : [a] "r" (a), [b] "r" (b), [c] "r" (c)
+ : "hi", "lo"
+ );
+ return (c1);
+}
+
+#endif // WEBRTC_SPL_SPL_INL_MIPS_H_
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/levinson_durbin.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/levinson_durbin.c
new file mode 100644
index 00000000..d46e5513
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/levinson_durbin.c
@@ -0,0 +1,246 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_LevinsonDurbin().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+#define SPL_LEVINSON_MAXORDER 20
+
+int16_t WebRtcSpl_LevinsonDurbin(const int32_t* R, int16_t* A, int16_t* K,
+ size_t order)
+{
+ size_t i, j;
+ // Auto-correlation coefficients in high precision
+ int16_t R_hi[SPL_LEVINSON_MAXORDER + 1], R_low[SPL_LEVINSON_MAXORDER + 1];
+ // LPC coefficients in high precision
+ int16_t A_hi[SPL_LEVINSON_MAXORDER + 1], A_low[SPL_LEVINSON_MAXORDER + 1];
+ // LPC coefficients for next iteration
+ int16_t A_upd_hi[SPL_LEVINSON_MAXORDER + 1], A_upd_low[SPL_LEVINSON_MAXORDER + 1];
+ // Reflection coefficient in high precision
+ int16_t K_hi, K_low;
+ // Prediction gain Alpha in high precision and with scale factor
+ int16_t Alpha_hi, Alpha_low, Alpha_exp;
+ int16_t tmp_hi, tmp_low;
+ int32_t temp1W32, temp2W32, temp3W32;
+ int16_t norm;
+
+ // Normalize the autocorrelation R[0]...R[order+1]
+
+ norm = WebRtcSpl_NormW32(R[0]);
+
+ for (i = 0; i <= order; ++i)
+ {
+ temp1W32 = WEBRTC_SPL_LSHIFT_W32(R[i], norm);
+ // Put R in hi and low format
+ R_hi[i] = (int16_t)(temp1W32 >> 16);
+ R_low[i] = (int16_t)((temp1W32 - ((int32_t)R_hi[i] << 16)) >> 1);
+ }
+
+ // K = A[1] = -R[1] / R[0]
+
+ temp2W32 = WEBRTC_SPL_LSHIFT_W32((int32_t)R_hi[1],16)
+ + WEBRTC_SPL_LSHIFT_W32((int32_t)R_low[1],1); // R[1] in Q31
+ temp3W32 = WEBRTC_SPL_ABS_W32(temp2W32); // abs R[1]
+ temp1W32 = WebRtcSpl_DivW32HiLow(temp3W32, R_hi[0], R_low[0]); // abs(R[1])/R[0] in Q31
+ // Put back the sign on R[1]
+ if (temp2W32 > 0)
+ {
+ temp1W32 = -temp1W32;
+ }
+
+ // Put K in hi and low format
+ K_hi = (int16_t)(temp1W32 >> 16);
+ K_low = (int16_t)((temp1W32 - ((int32_t)K_hi << 16)) >> 1);
+
+ // Store first reflection coefficient
+ K[0] = K_hi;
+
+ temp1W32 >>= 4; // A[1] in Q27.
+
+ // Put A[1] in hi and low format
+ A_hi[1] = (int16_t)(temp1W32 >> 16);
+ A_low[1] = (int16_t)((temp1W32 - ((int32_t)A_hi[1] << 16)) >> 1);
+
+ // Alpha = R[0] * (1-K^2)
+
+ temp1W32 = ((K_hi * K_low >> 14) + K_hi * K_hi) << 1; // = k^2 in Q31
+
+ temp1W32 = WEBRTC_SPL_ABS_W32(temp1W32); // Guard against <0
+ temp1W32 = (int32_t)0x7fffffffL - temp1W32; // temp1W32 = (1 - K[0]*K[0]) in Q31
+
+ // Store temp1W32 = 1 - K[0]*K[0] on hi and low format
+ tmp_hi = (int16_t)(temp1W32 >> 16);
+ tmp_low = (int16_t)((temp1W32 - ((int32_t)tmp_hi << 16)) >> 1);
+
+ // Calculate Alpha in Q31
+ temp1W32 = (R_hi[0] * tmp_hi + (R_hi[0] * tmp_low >> 15) +
+ (R_low[0] * tmp_hi >> 15)) << 1;
+
+ // Normalize Alpha and put it in hi and low format
+
+ Alpha_exp = WebRtcSpl_NormW32(temp1W32);
+ temp1W32 = WEBRTC_SPL_LSHIFT_W32(temp1W32, Alpha_exp);
+ Alpha_hi = (int16_t)(temp1W32 >> 16);
+ Alpha_low = (int16_t)((temp1W32 - ((int32_t)Alpha_hi << 16)) >> 1);
+
+ // Perform the iterative calculations in the Levinson-Durbin algorithm
+
+ for (i = 2; i <= order; i++)
+ {
+ /* ----
+ temp1W32 = R[i] + > R[j]*A[i-j]
+ /
+ ----
+ j=1..i-1
+ */
+
+ temp1W32 = 0;
+
+ for (j = 1; j < i; j++)
+ {
+ // temp1W32 is in Q31
+ temp1W32 += (R_hi[j] * A_hi[i - j] << 1) +
+ (((R_hi[j] * A_low[i - j] >> 15) +
+ (R_low[j] * A_hi[i - j] >> 15)) << 1);
+ }
+
+ temp1W32 = WEBRTC_SPL_LSHIFT_W32(temp1W32, 4);
+ temp1W32 += (WEBRTC_SPL_LSHIFT_W32((int32_t)R_hi[i], 16)
+ + WEBRTC_SPL_LSHIFT_W32((int32_t)R_low[i], 1));
+
+ // K = -temp1W32 / Alpha
+ temp2W32 = WEBRTC_SPL_ABS_W32(temp1W32); // abs(temp1W32)
+ temp3W32 = WebRtcSpl_DivW32HiLow(temp2W32, Alpha_hi, Alpha_low); // abs(temp1W32)/Alpha
+
+ // Put the sign of temp1W32 back again
+ if (temp1W32 > 0)
+ {
+ temp3W32 = -temp3W32;
+ }
+
+ // Use the Alpha shifts from earlier to de-normalize
+ norm = WebRtcSpl_NormW32(temp3W32);
+ if ((Alpha_exp <= norm) || (temp3W32 == 0))
+ {
+ temp3W32 = WEBRTC_SPL_LSHIFT_W32(temp3W32, Alpha_exp);
+ } else
+ {
+ if (temp3W32 > 0)
+ {
+ temp3W32 = (int32_t)0x7fffffffL;
+ } else
+ {
+ temp3W32 = (int32_t)0x80000000L;
+ }
+ }
+
+ // Put K on hi and low format
+ K_hi = (int16_t)(temp3W32 >> 16);
+ K_low = (int16_t)((temp3W32 - ((int32_t)K_hi << 16)) >> 1);
+
+ // Store Reflection coefficient in Q15
+ K[i - 1] = K_hi;
+
+ // Test for unstable filter.
+ // If unstable return 0 and let the user decide what to do in that case
+
+ if ((int32_t)WEBRTC_SPL_ABS_W16(K_hi) > (int32_t)32750)
+ {
+ return 0; // Unstable filter
+ }
+
+ /*
+ Compute updated LPC coefficient: Anew[i]
+ Anew[j]= A[j] + K*A[i-j] for j=1..i-1
+ Anew[i]= K
+ */
+
+ for (j = 1; j < i; j++)
+ {
+ // temp1W32 = A[j] in Q27
+ temp1W32 = WEBRTC_SPL_LSHIFT_W32((int32_t)A_hi[j],16)
+ + WEBRTC_SPL_LSHIFT_W32((int32_t)A_low[j],1);
+
+ // temp1W32 += K*A[i-j] in Q27
+ temp1W32 += (K_hi * A_hi[i - j] + (K_hi * A_low[i - j] >> 15) +
+ (K_low * A_hi[i - j] >> 15)) << 1;
+
+ // Put Anew in hi and low format
+ A_upd_hi[j] = (int16_t)(temp1W32 >> 16);
+ A_upd_low[j] = (int16_t)(
+ (temp1W32 - ((int32_t)A_upd_hi[j] << 16)) >> 1);
+ }
+
+ // temp3W32 = K in Q27 (Convert from Q31 to Q27)
+ temp3W32 >>= 4;
+
+ // Store Anew in hi and low format
+ A_upd_hi[i] = (int16_t)(temp3W32 >> 16);
+ A_upd_low[i] = (int16_t)(
+ (temp3W32 - ((int32_t)A_upd_hi[i] << 16)) >> 1);
+
+ // Alpha = Alpha * (1-K^2)
+
+ temp1W32 = ((K_hi * K_low >> 14) + K_hi * K_hi) << 1; // K*K in Q31
+
+ temp1W32 = WEBRTC_SPL_ABS_W32(temp1W32); // Guard against <0
+ temp1W32 = (int32_t)0x7fffffffL - temp1W32; // 1 - K*K in Q31
+
+ // Convert 1- K^2 in hi and low format
+ tmp_hi = (int16_t)(temp1W32 >> 16);
+ tmp_low = (int16_t)((temp1W32 - ((int32_t)tmp_hi << 16)) >> 1);
+
+ // Calculate Alpha = Alpha * (1-K^2) in Q31
+ temp1W32 = (Alpha_hi * tmp_hi + (Alpha_hi * tmp_low >> 15) +
+ (Alpha_low * tmp_hi >> 15)) << 1;
+
+ // Normalize Alpha and store it on hi and low format
+
+ norm = WebRtcSpl_NormW32(temp1W32);
+ temp1W32 = WEBRTC_SPL_LSHIFT_W32(temp1W32, norm);
+
+ Alpha_hi = (int16_t)(temp1W32 >> 16);
+ Alpha_low = (int16_t)((temp1W32 - ((int32_t)Alpha_hi << 16)) >> 1);
+
+ // Update the total normalization of Alpha
+ Alpha_exp = Alpha_exp + norm;
+
+ // Update A[]
+
+ for (j = 1; j <= i; j++)
+ {
+ A_hi[j] = A_upd_hi[j];
+ A_low[j] = A_upd_low[j];
+ }
+ }
+
+ /*
+ Set A[0] to 1.0 and store the A[i] i=1...order in Q12
+ (Convert from Q27 and use rounding)
+ */
+
+ A[0] = 4096;
+
+ for (i = 1; i <= order; i++)
+ {
+ // temp1W32 in Q27
+ temp1W32 = WEBRTC_SPL_LSHIFT_W32((int32_t)A_hi[i], 16)
+ + WEBRTC_SPL_LSHIFT_W32((int32_t)A_low[i], 1);
+ // Round and store upper word
+ A[i] = (int16_t)(((temp1W32 << 1) + 32768) >> 16);
+ }
+ return 1; // Stable filters
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/lpc_to_refl_coef.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/lpc_to_refl_coef.c
new file mode 100644
index 00000000..edcebd4e
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/lpc_to_refl_coef.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_LpcToReflCoef().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+#define SPL_LPC_TO_REFL_COEF_MAX_AR_MODEL_ORDER 50
+
+void WebRtcSpl_LpcToReflCoef(int16_t* a16, int use_order, int16_t* k16)
+{
+ int m, k;
+ int32_t tmp32[SPL_LPC_TO_REFL_COEF_MAX_AR_MODEL_ORDER];
+ int32_t tmp_inv_denom32;
+ int16_t tmp_inv_denom16;
+
+ k16[use_order - 1] = a16[use_order] << 3; // Q12<<3 => Q15
+ for (m = use_order - 1; m > 0; m--)
+ {
+ // (1 - k^2) in Q30
+ tmp_inv_denom32 = 1073741823 - k16[m] * k16[m];
+ // (1 - k^2) in Q15
+ tmp_inv_denom16 = (int16_t)(tmp_inv_denom32 >> 15);
+
+ for (k = 1; k <= m; k++)
+ {
+ // tmp[k] = (a[k] - RC[m] * a[m-k+1]) / (1.0 - RC[m]*RC[m]);
+
+ // [Q12<<16 - (Q15*Q12)<<1] = [Q28 - Q28] = Q28
+ tmp32[k] = (a16[k] << 16) - (k16[m] * a16[m - k + 1] << 1);
+
+ tmp32[k] = WebRtcSpl_DivW32W16(tmp32[k], tmp_inv_denom16); //Q28/Q15 = Q13
+ }
+
+ for (k = 1; k < m; k++)
+ {
+ a16[k] = (int16_t)(tmp32[k] >> 1); // Q13>>1 => Q12
+ }
+
+ tmp32[m] = WEBRTC_SPL_SAT(8191, tmp32[m], -8191);
+ k16[m - 1] = (int16_t)WEBRTC_SPL_LSHIFT_W32(tmp32[m], 2); //Q13<<2 => Q15
+ }
+ return;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations.c
new file mode 100644
index 00000000..4a962f86
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations.c
@@ -0,0 +1,224 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * This file contains the implementation of functions
+ * WebRtcSpl_MaxAbsValueW16C()
+ * WebRtcSpl_MaxAbsValueW32C()
+ * WebRtcSpl_MaxValueW16C()
+ * WebRtcSpl_MaxValueW32C()
+ * WebRtcSpl_MinValueW16C()
+ * WebRtcSpl_MinValueW32C()
+ * WebRtcSpl_MaxAbsIndexW16()
+ * WebRtcSpl_MaxIndexW16()
+ * WebRtcSpl_MaxIndexW32()
+ * WebRtcSpl_MinIndexW16()
+ * WebRtcSpl_MinIndexW32()
+ *
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+// TODO(bjorn/kma): Consolidate function pairs (e.g. combine
+// WebRtcSpl_MaxAbsValueW16C and WebRtcSpl_MaxAbsIndexW16 into a single one.)
+// TODO(kma): Move the next six functions into min_max_operations_c.c.
+
+// Maximum absolute value of word16 vector. C version for generic platforms.
+int16_t WebRtcSpl_MaxAbsValueW16C(const int16_t* vector, size_t length) {
+ size_t i = 0;
+ int absolute = 0, maximum = 0;
+
+ assert(length > 0);
+
+ for (i = 0; i < length; i++) {
+ absolute = abs((int)vector[i]);
+
+ if (absolute > maximum) {
+ maximum = absolute;
+ }
+ }
+
+ // Guard the case for abs(-32768).
+ if (maximum > WEBRTC_SPL_WORD16_MAX) {
+ maximum = WEBRTC_SPL_WORD16_MAX;
+ }
+
+ return (int16_t)maximum;
+}
+
+// Maximum absolute value of word32 vector. C version for generic platforms.
+int32_t WebRtcSpl_MaxAbsValueW32C(const int32_t* vector, size_t length) {
+ // Use uint32_t for the local variables, to accommodate the return value
+ // of abs(0x80000000), which is 0x80000000.
+
+ uint32_t absolute = 0, maximum = 0;
+ size_t i = 0;
+
+ assert(length > 0);
+
+ for (i = 0; i < length; i++) {
+ absolute = abs((int)vector[i]);
+ if (absolute > maximum) {
+ maximum = absolute;
+ }
+ }
+
+ maximum = WEBRTC_SPL_MIN(maximum, WEBRTC_SPL_WORD32_MAX);
+
+ return (int32_t)maximum;
+}
+
+// Maximum value of word16 vector. C version for generic platforms.
+int16_t WebRtcSpl_MaxValueW16C(const int16_t* vector, size_t length) {
+ int16_t maximum = WEBRTC_SPL_WORD16_MIN;
+ size_t i = 0;
+
+ assert(length > 0);
+
+ for (i = 0; i < length; i++) {
+ if (vector[i] > maximum)
+ maximum = vector[i];
+ }
+ return maximum;
+}
+
+// Maximum value of word32 vector. C version for generic platforms.
+int32_t WebRtcSpl_MaxValueW32C(const int32_t* vector, size_t length) {
+ int32_t maximum = WEBRTC_SPL_WORD32_MIN;
+ size_t i = 0;
+
+ assert(length > 0);
+
+ for (i = 0; i < length; i++) {
+ if (vector[i] > maximum)
+ maximum = vector[i];
+ }
+ return maximum;
+}
+
+// Minimum value of word16 vector. C version for generic platforms.
+int16_t WebRtcSpl_MinValueW16C(const int16_t* vector, size_t length) {
+ int16_t minimum = WEBRTC_SPL_WORD16_MAX;
+ size_t i = 0;
+
+ assert(length > 0);
+
+ for (i = 0; i < length; i++) {
+ if (vector[i] < minimum)
+ minimum = vector[i];
+ }
+ return minimum;
+}
+
+// Minimum value of word32 vector. C version for generic platforms.
+int32_t WebRtcSpl_MinValueW32C(const int32_t* vector, size_t length) {
+ int32_t minimum = WEBRTC_SPL_WORD32_MAX;
+ size_t i = 0;
+
+ assert(length > 0);
+
+ for (i = 0; i < length; i++) {
+ if (vector[i] < minimum)
+ minimum = vector[i];
+ }
+ return minimum;
+}
+
+// Index of maximum absolute value in a word16 vector.
+size_t WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, size_t length) {
+ // Use type int for local variables, to accomodate the value of abs(-32768).
+
+ size_t i = 0, index = 0;
+ int absolute = 0, maximum = 0;
+
+ assert(length > 0);
+
+ for (i = 0; i < length; i++) {
+ absolute = abs((int)vector[i]);
+
+ if (absolute > maximum) {
+ maximum = absolute;
+ index = i;
+ }
+ }
+
+ return index;
+}
+
+// Index of maximum value in a word16 vector.
+size_t WebRtcSpl_MaxIndexW16(const int16_t* vector, size_t length) {
+ size_t i = 0, index = 0;
+ int16_t maximum = WEBRTC_SPL_WORD16_MIN;
+
+ assert(length > 0);
+
+ for (i = 0; i < length; i++) {
+ if (vector[i] > maximum) {
+ maximum = vector[i];
+ index = i;
+ }
+ }
+
+ return index;
+}
+
+// Index of maximum value in a word32 vector.
+size_t WebRtcSpl_MaxIndexW32(const int32_t* vector, size_t length) {
+ size_t i = 0, index = 0;
+ int32_t maximum = WEBRTC_SPL_WORD32_MIN;
+
+ assert(length > 0);
+
+ for (i = 0; i < length; i++) {
+ if (vector[i] > maximum) {
+ maximum = vector[i];
+ index = i;
+ }
+ }
+
+ return index;
+}
+
+// Index of minimum value in a word16 vector.
+size_t WebRtcSpl_MinIndexW16(const int16_t* vector, size_t length) {
+ size_t i = 0, index = 0;
+ int16_t minimum = WEBRTC_SPL_WORD16_MAX;
+
+ assert(length > 0);
+
+ for (i = 0; i < length; i++) {
+ if (vector[i] < minimum) {
+ minimum = vector[i];
+ index = i;
+ }
+ }
+
+ return index;
+}
+
+// Index of minimum value in a word32 vector.
+size_t WebRtcSpl_MinIndexW32(const int32_t* vector, size_t length) {
+ size_t i = 0, index = 0;
+ int32_t minimum = WEBRTC_SPL_WORD32_MAX;
+
+ assert(length > 0);
+
+ for (i = 0; i < length; i++) {
+ if (vector[i] < minimum) {
+ minimum = vector[i];
+ index = i;
+ }
+ }
+
+ return index;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations_mips.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations_mips.c
new file mode 100644
index 00000000..28de45b3
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations_mips.c
@@ -0,0 +1,376 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * This file contains the implementation of function
+ * WebRtcSpl_MaxAbsValueW16()
+ *
+ * The description header can be found in signal_processing_library.h.
+ *
+ */
+
+#include <assert.h>
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+// Maximum absolute value of word16 vector.
+int16_t WebRtcSpl_MaxAbsValueW16_mips(const int16_t* vector, size_t length) {
+ int32_t totMax = 0;
+ int32_t tmp32_0, tmp32_1, tmp32_2, tmp32_3;
+ size_t i, loop_size;
+
+ assert(length > 0);
+
+#if defined(MIPS_DSP_R1)
+ const int32_t* tmpvec32 = (int32_t*)vector;
+ loop_size = length >> 4;
+
+ for (i = 0; i < loop_size; i++) {
+ __asm__ volatile (
+ "lw %[tmp32_0], 0(%[tmpvec32]) \n\t"
+ "lw %[tmp32_1], 4(%[tmpvec32]) \n\t"
+ "lw %[tmp32_2], 8(%[tmpvec32]) \n\t"
+ "lw %[tmp32_3], 12(%[tmpvec32]) \n\t"
+
+ "absq_s.ph %[tmp32_0], %[tmp32_0] \n\t"
+ "absq_s.ph %[tmp32_1], %[tmp32_1] \n\t"
+ "cmp.lt.ph %[totMax], %[tmp32_0] \n\t"
+ "pick.ph %[totMax], %[tmp32_0], %[totMax] \n\t"
+
+ "lw %[tmp32_0], 16(%[tmpvec32]) \n\t"
+ "absq_s.ph %[tmp32_2], %[tmp32_2] \n\t"
+ "cmp.lt.ph %[totMax], %[tmp32_1] \n\t"
+ "pick.ph %[totMax], %[tmp32_1], %[totMax] \n\t"
+
+ "lw %[tmp32_1], 20(%[tmpvec32]) \n\t"
+ "absq_s.ph %[tmp32_3], %[tmp32_3] \n\t"
+ "cmp.lt.ph %[totMax], %[tmp32_2] \n\t"
+ "pick.ph %[totMax], %[tmp32_2], %[totMax] \n\t"
+
+ "lw %[tmp32_2], 24(%[tmpvec32]) \n\t"
+ "cmp.lt.ph %[totMax], %[tmp32_3] \n\t"
+ "pick.ph %[totMax], %[tmp32_3], %[totMax] \n\t"
+
+ "lw %[tmp32_3], 28(%[tmpvec32]) \n\t"
+ "absq_s.ph %[tmp32_0], %[tmp32_0] \n\t"
+ "absq_s.ph %[tmp32_1], %[tmp32_1] \n\t"
+ "cmp.lt.ph %[totMax], %[tmp32_0] \n\t"
+ "pick.ph %[totMax], %[tmp32_0], %[totMax] \n\t"
+
+ "absq_s.ph %[tmp32_2], %[tmp32_2] \n\t"
+ "cmp.lt.ph %[totMax], %[tmp32_1] \n\t"
+ "pick.ph %[totMax], %[tmp32_1], %[totMax] \n\t"
+ "absq_s.ph %[tmp32_3], %[tmp32_3] \n\t"
+ "cmp.lt.ph %[totMax], %[tmp32_2] \n\t"
+ "pick.ph %[totMax], %[tmp32_2], %[totMax] \n\t"
+
+ "cmp.lt.ph %[totMax], %[tmp32_3] \n\t"
+ "pick.ph %[totMax], %[tmp32_3], %[totMax] \n\t"
+
+ "addiu %[tmpvec32], %[tmpvec32], 32 \n\t"
+ : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1),
+ [tmp32_2] "=&r" (tmp32_2), [tmp32_3] "=&r" (tmp32_3),
+ [totMax] "+r" (totMax), [tmpvec32] "+r" (tmpvec32)
+ :
+ : "memory"
+ );
+ }
+ __asm__ volatile (
+ "rotr %[tmp32_0], %[totMax], 16 \n\t"
+ "cmp.lt.ph %[totMax], %[tmp32_0] \n\t"
+ "pick.ph %[totMax], %[tmp32_0], %[totMax] \n\t"
+ "packrl.ph %[totMax], $0, %[totMax] \n\t"
+ : [tmp32_0] "=&r" (tmp32_0), [totMax] "+r" (totMax)
+ :
+ );
+ loop_size = length & 0xf;
+ for (i = 0; i < loop_size; i++) {
+ __asm__ volatile (
+ "lh %[tmp32_0], 0(%[tmpvec32]) \n\t"
+ "addiu %[tmpvec32], %[tmpvec32], 2 \n\t"
+ "absq_s.w %[tmp32_0], %[tmp32_0] \n\t"
+ "slt %[tmp32_1], %[totMax], %[tmp32_0] \n\t"
+ "movn %[totMax], %[tmp32_0], %[tmp32_1] \n\t"
+ : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1),
+ [tmpvec32] "+r" (tmpvec32), [totMax] "+r" (totMax)
+ :
+ : "memory"
+ );
+ }
+#else // #if defined(MIPS_DSP_R1)
+ int32_t v16MaxMax = WEBRTC_SPL_WORD16_MAX;
+ int32_t r, r1, r2, r3;
+ const int16_t* tmpvector = vector;
+ loop_size = length >> 4;
+ for (i = 0; i < loop_size; i++) {
+ __asm__ volatile (
+ "lh %[tmp32_0], 0(%[tmpvector]) \n\t"
+ "lh %[tmp32_1], 2(%[tmpvector]) \n\t"
+ "lh %[tmp32_2], 4(%[tmpvector]) \n\t"
+ "lh %[tmp32_3], 6(%[tmpvector]) \n\t"
+
+ "abs %[tmp32_0], %[tmp32_0] \n\t"
+ "abs %[tmp32_1], %[tmp32_1] \n\t"
+ "abs %[tmp32_2], %[tmp32_2] \n\t"
+ "abs %[tmp32_3], %[tmp32_3] \n\t"
+
+ "slt %[r], %[totMax], %[tmp32_0] \n\t"
+ "movn %[totMax], %[tmp32_0], %[r] \n\t"
+ "slt %[r1], %[totMax], %[tmp32_1] \n\t"
+ "movn %[totMax], %[tmp32_1], %[r1] \n\t"
+ "slt %[r2], %[totMax], %[tmp32_2] \n\t"
+ "movn %[totMax], %[tmp32_2], %[r2] \n\t"
+ "slt %[r3], %[totMax], %[tmp32_3] \n\t"
+ "movn %[totMax], %[tmp32_3], %[r3] \n\t"
+
+ "lh %[tmp32_0], 8(%[tmpvector]) \n\t"
+ "lh %[tmp32_1], 10(%[tmpvector]) \n\t"
+ "lh %[tmp32_2], 12(%[tmpvector]) \n\t"
+ "lh %[tmp32_3], 14(%[tmpvector]) \n\t"
+
+ "abs %[tmp32_0], %[tmp32_0] \n\t"
+ "abs %[tmp32_1], %[tmp32_1] \n\t"
+ "abs %[tmp32_2], %[tmp32_2] \n\t"
+ "abs %[tmp32_3], %[tmp32_3] \n\t"
+
+ "slt %[r], %[totMax], %[tmp32_0] \n\t"
+ "movn %[totMax], %[tmp32_0], %[r] \n\t"
+ "slt %[r1], %[totMax], %[tmp32_1] \n\t"
+ "movn %[totMax], %[tmp32_1], %[r1] \n\t"
+ "slt %[r2], %[totMax], %[tmp32_2] \n\t"
+ "movn %[totMax], %[tmp32_2], %[r2] \n\t"
+ "slt %[r3], %[totMax], %[tmp32_3] \n\t"
+ "movn %[totMax], %[tmp32_3], %[r3] \n\t"
+
+ "lh %[tmp32_0], 16(%[tmpvector]) \n\t"
+ "lh %[tmp32_1], 18(%[tmpvector]) \n\t"
+ "lh %[tmp32_2], 20(%[tmpvector]) \n\t"
+ "lh %[tmp32_3], 22(%[tmpvector]) \n\t"
+
+ "abs %[tmp32_0], %[tmp32_0] \n\t"
+ "abs %[tmp32_1], %[tmp32_1] \n\t"
+ "abs %[tmp32_2], %[tmp32_2] \n\t"
+ "abs %[tmp32_3], %[tmp32_3] \n\t"
+
+ "slt %[r], %[totMax], %[tmp32_0] \n\t"
+ "movn %[totMax], %[tmp32_0], %[r] \n\t"
+ "slt %[r1], %[totMax], %[tmp32_1] \n\t"
+ "movn %[totMax], %[tmp32_1], %[r1] \n\t"
+ "slt %[r2], %[totMax], %[tmp32_2] \n\t"
+ "movn %[totMax], %[tmp32_2], %[r2] \n\t"
+ "slt %[r3], %[totMax], %[tmp32_3] \n\t"
+ "movn %[totMax], %[tmp32_3], %[r3] \n\t"
+
+ "lh %[tmp32_0], 24(%[tmpvector]) \n\t"
+ "lh %[tmp32_1], 26(%[tmpvector]) \n\t"
+ "lh %[tmp32_2], 28(%[tmpvector]) \n\t"
+ "lh %[tmp32_3], 30(%[tmpvector]) \n\t"
+
+ "abs %[tmp32_0], %[tmp32_0] \n\t"
+ "abs %[tmp32_1], %[tmp32_1] \n\t"
+ "abs %[tmp32_2], %[tmp32_2] \n\t"
+ "abs %[tmp32_3], %[tmp32_3] \n\t"
+
+ "slt %[r], %[totMax], %[tmp32_0] \n\t"
+ "movn %[totMax], %[tmp32_0], %[r] \n\t"
+ "slt %[r1], %[totMax], %[tmp32_1] \n\t"
+ "movn %[totMax], %[tmp32_1], %[r1] \n\t"
+ "slt %[r2], %[totMax], %[tmp32_2] \n\t"
+ "movn %[totMax], %[tmp32_2], %[r2] \n\t"
+ "slt %[r3], %[totMax], %[tmp32_3] \n\t"
+ "movn %[totMax], %[tmp32_3], %[r3] \n\t"
+
+ "addiu %[tmpvector], %[tmpvector], 32 \n\t"
+ : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1),
+ [tmp32_2] "=&r" (tmp32_2), [tmp32_3] "=&r" (tmp32_3),
+ [totMax] "+r" (totMax), [r] "=&r" (r), [tmpvector] "+r" (tmpvector),
+ [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3)
+ :
+ : "memory"
+ );
+ }
+ loop_size = length & 0xf;
+ for (i = 0; i < loop_size; i++) {
+ __asm__ volatile (
+ "lh %[tmp32_0], 0(%[tmpvector]) \n\t"
+ "addiu %[tmpvector], %[tmpvector], 2 \n\t"
+ "abs %[tmp32_0], %[tmp32_0] \n\t"
+ "slt %[tmp32_1], %[totMax], %[tmp32_0] \n\t"
+ "movn %[totMax], %[tmp32_0], %[tmp32_1] \n\t"
+ : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1),
+ [tmpvector] "+r" (tmpvector), [totMax] "+r" (totMax)
+ :
+ : "memory"
+ );
+ }
+
+ __asm__ volatile (
+ "slt %[r], %[v16MaxMax], %[totMax] \n\t"
+ "movn %[totMax], %[v16MaxMax], %[r] \n\t"
+ : [totMax] "+r" (totMax), [r] "=&r" (r)
+ : [v16MaxMax] "r" (v16MaxMax)
+ );
+#endif // #if defined(MIPS_DSP_R1)
+ return (int16_t)totMax;
+}
+
+#if defined(MIPS_DSP_R1_LE)
+// Maximum absolute value of word32 vector. Version for MIPS platform.
+int32_t WebRtcSpl_MaxAbsValueW32_mips(const int32_t* vector, size_t length) {
+ // Use uint32_t for the local variables, to accommodate the return value
+ // of abs(0x80000000), which is 0x80000000.
+
+ uint32_t absolute = 0, maximum = 0;
+ int tmp1 = 0, max_value = 0x7fffffff;
+
+ assert(length > 0);
+
+ __asm__ volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+
+ "1: \n\t"
+ "lw %[absolute], 0(%[vector]) \n\t"
+ "absq_s.w %[absolute], %[absolute] \n\t"
+ "addiu %[length], %[length], -1 \n\t"
+ "slt %[tmp1], %[maximum], %[absolute] \n\t"
+ "movn %[maximum], %[absolute], %[tmp1] \n\t"
+ "bgtz %[length], 1b \n\t"
+ " addiu %[vector], %[vector], 4 \n\t"
+ "slt %[tmp1], %[max_value], %[maximum] \n\t"
+ "movn %[maximum], %[max_value], %[tmp1] \n\t"
+
+ ".set pop \n\t"
+
+ : [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [absolute] "+r" (absolute)
+ : [vector] "r" (vector), [length] "r" (length), [max_value] "r" (max_value)
+ : "memory"
+ );
+
+ return (int32_t)maximum;
+}
+#endif // #if defined(MIPS_DSP_R1_LE)
+
+// Maximum value of word16 vector. Version for MIPS platform.
+int16_t WebRtcSpl_MaxValueW16_mips(const int16_t* vector, size_t length) {
+ int16_t maximum = WEBRTC_SPL_WORD16_MIN;
+ int tmp1;
+ int16_t value;
+
+ assert(length > 0);
+
+ __asm__ volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+
+ "1: \n\t"
+ "lh %[value], 0(%[vector]) \n\t"
+ "addiu %[length], %[length], -1 \n\t"
+ "slt %[tmp1], %[maximum], %[value] \n\t"
+ "movn %[maximum], %[value], %[tmp1] \n\t"
+ "bgtz %[length], 1b \n\t"
+ " addiu %[vector], %[vector], 2 \n\t"
+ ".set pop \n\t"
+
+ : [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [value] "=&r" (value)
+ : [vector] "r" (vector), [length] "r" (length)
+ : "memory"
+ );
+
+ return maximum;
+}
+
+// Maximum value of word32 vector. Version for MIPS platform.
+int32_t WebRtcSpl_MaxValueW32_mips(const int32_t* vector, size_t length) {
+ int32_t maximum = WEBRTC_SPL_WORD32_MIN;
+ int tmp1, value;
+
+ assert(length > 0);
+
+ __asm__ volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+
+ "1: \n\t"
+ "lw %[value], 0(%[vector]) \n\t"
+ "addiu %[length], %[length], -1 \n\t"
+ "slt %[tmp1], %[maximum], %[value] \n\t"
+ "movn %[maximum], %[value], %[tmp1] \n\t"
+ "bgtz %[length], 1b \n\t"
+ " addiu %[vector], %[vector], 4 \n\t"
+
+ ".set pop \n\t"
+
+ : [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [value] "=&r" (value)
+ : [vector] "r" (vector), [length] "r" (length)
+ : "memory"
+ );
+
+ return maximum;
+}
+
+// Minimum value of word16 vector. Version for MIPS platform.
+int16_t WebRtcSpl_MinValueW16_mips(const int16_t* vector, size_t length) {
+ int16_t minimum = WEBRTC_SPL_WORD16_MAX;
+ int tmp1;
+ int16_t value;
+
+ assert(length > 0);
+
+ __asm__ volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+
+ "1: \n\t"
+ "lh %[value], 0(%[vector]) \n\t"
+ "addiu %[length], %[length], -1 \n\t"
+ "slt %[tmp1], %[value], %[minimum] \n\t"
+ "movn %[minimum], %[value], %[tmp1] \n\t"
+ "bgtz %[length], 1b \n\t"
+ " addiu %[vector], %[vector], 2 \n\t"
+
+ ".set pop \n\t"
+
+ : [tmp1] "=&r" (tmp1), [minimum] "+r" (minimum), [value] "=&r" (value)
+ : [vector] "r" (vector), [length] "r" (length)
+ : "memory"
+ );
+
+ return minimum;
+}
+
+// Minimum value of word32 vector. Version for MIPS platform.
+int32_t WebRtcSpl_MinValueW32_mips(const int32_t* vector, size_t length) {
+ int32_t minimum = WEBRTC_SPL_WORD32_MAX;
+ int tmp1, value;
+
+ assert(length > 0);
+
+ __asm__ volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+
+ "1: \n\t"
+ "lw %[value], 0(%[vector]) \n\t"
+ "addiu %[length], %[length], -1 \n\t"
+ "slt %[tmp1], %[value], %[minimum] \n\t"
+ "movn %[minimum], %[value], %[tmp1] \n\t"
+ "bgtz %[length], 1b \n\t"
+ " addiu %[vector], %[vector], 4 \n\t"
+
+ ".set pop \n\t"
+
+ : [tmp1] "=&r" (tmp1), [minimum] "+r" (minimum), [value] "=&r" (value)
+ : [vector] "r" (vector), [length] "r" (length)
+ : "memory"
+ );
+
+ return minimum;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations_neon.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations_neon.c
new file mode 100644
index 00000000..6fbbf94e
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations_neon.c
@@ -0,0 +1,283 @@
+/*
+ * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+#include <assert.h>
+#include <stdlib.h>
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+// Maximum absolute value of word16 vector. C version for generic platforms.
+int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, size_t length) {
+ int absolute = 0, maximum = 0;
+
+ assert(length > 0);
+
+ const int16_t* p_start = vector;
+ size_t rest = length & 7;
+ const int16_t* p_end = vector + length - rest;
+
+ int16x8_t v;
+ uint16x8_t max_qv;
+ max_qv = vdupq_n_u16(0);
+
+ while (p_start < p_end) {
+ v = vld1q_s16(p_start);
+ // Note vabs doesn't change the value of -32768.
+ v = vabsq_s16(v);
+ // Use u16 so we don't lose the value -32768.
+ max_qv = vmaxq_u16(max_qv, vreinterpretq_u16_s16(v));
+ p_start += 8;
+ }
+
+#ifdef WEBRTC_ARCH_ARM64
+ maximum = (int)vmaxvq_u16(max_qv);
+#else
+ uint16x4_t max_dv;
+ max_dv = vmax_u16(vget_low_u16(max_qv), vget_high_u16(max_qv));
+ max_dv = vpmax_u16(max_dv, max_dv);
+ max_dv = vpmax_u16(max_dv, max_dv);
+
+ maximum = (int)vget_lane_u16(max_dv, 0);
+#endif
+
+ p_end = vector + length;
+ while (p_start < p_end) {
+ absolute = abs((int)(*p_start));
+
+ if (absolute > maximum) {
+ maximum = absolute;
+ }
+ p_start++;
+ }
+
+ // Guard the case for abs(-32768).
+ if (maximum > WEBRTC_SPL_WORD16_MAX) {
+ maximum = WEBRTC_SPL_WORD16_MAX;
+ }
+
+ return (int16_t)maximum;
+}
+
+// Maximum absolute value of word32 vector. NEON intrinsics version for
+// ARM 32-bit/64-bit platforms.
+int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, size_t length) {
+ // Use uint32_t for the local variables, to accommodate the return value
+ // of abs(0x80000000), which is 0x80000000.
+
+ uint32_t absolute = 0, maximum = 0;
+ size_t i = 0;
+ size_t residual = length & 0x7;
+
+ assert(length > 0);
+
+ const int32_t* p_start = vector;
+ uint32x4_t max32x4_0 = vdupq_n_u32(0);
+ uint32x4_t max32x4_1 = vdupq_n_u32(0);
+
+ // First part, unroll the loop 8 times.
+ for (i = 0; i < length - residual; i += 8) {
+ int32x4_t in32x4_0 = vld1q_s32(p_start);
+ p_start += 4;
+ int32x4_t in32x4_1 = vld1q_s32(p_start);
+ p_start += 4;
+ in32x4_0 = vabsq_s32(in32x4_0);
+ in32x4_1 = vabsq_s32(in32x4_1);
+ // vabs doesn't change the value of 0x80000000.
+ // Use u32 so we don't lose the value 0x80000000.
+ max32x4_0 = vmaxq_u32(max32x4_0, vreinterpretq_u32_s32(in32x4_0));
+ max32x4_1 = vmaxq_u32(max32x4_1, vreinterpretq_u32_s32(in32x4_1));
+ }
+
+ uint32x4_t max32x4 = vmaxq_u32(max32x4_0, max32x4_1);
+#if defined(WEBRTC_ARCH_ARM64)
+ maximum = vmaxvq_u32(max32x4);
+#else
+ uint32x2_t max32x2 = vmax_u32(vget_low_u32(max32x4), vget_high_u32(max32x4));
+ max32x2 = vpmax_u32(max32x2, max32x2);
+
+ maximum = vget_lane_u32(max32x2, 0);
+#endif
+
+ // Second part, do the remaining iterations (if any).
+ for (i = residual; i > 0; i--) {
+ absolute = abs((int)(*p_start));
+ if (absolute > maximum) {
+ maximum = absolute;
+ }
+ p_start++;
+ }
+
+ // Guard against the case for 0x80000000.
+ maximum = WEBRTC_SPL_MIN(maximum, WEBRTC_SPL_WORD32_MAX);
+
+ return (int32_t)maximum;
+}
+
+// Maximum value of word16 vector. NEON intrinsics version for
+// ARM 32-bit/64-bit platforms.
+int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, size_t length) {
+ int16_t maximum = WEBRTC_SPL_WORD16_MIN;
+ size_t i = 0;
+ size_t residual = length & 0x7;
+
+ assert(length > 0);
+
+ const int16_t* p_start = vector;
+ int16x8_t max16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MIN);
+
+ // First part, unroll the loop 8 times.
+ for (i = 0; i < length - residual; i += 8) {
+ int16x8_t in16x8 = vld1q_s16(p_start);
+ max16x8 = vmaxq_s16(max16x8, in16x8);
+ p_start += 8;
+ }
+
+#if defined(WEBRTC_ARCH_ARM64)
+ maximum = vmaxvq_s16(max16x8);
+#else
+ int16x4_t max16x4 = vmax_s16(vget_low_s16(max16x8), vget_high_s16(max16x8));
+ max16x4 = vpmax_s16(max16x4, max16x4);
+ max16x4 = vpmax_s16(max16x4, max16x4);
+
+ maximum = vget_lane_s16(max16x4, 0);
+#endif
+
+ // Second part, do the remaining iterations (if any).
+ for (i = residual; i > 0; i--) {
+ if (*p_start > maximum)
+ maximum = *p_start;
+ p_start++;
+ }
+ return maximum;
+}
+
+// Maximum value of word32 vector. NEON intrinsics version for
+// ARM 32-bit/64-bit platforms.
+int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, size_t length) {
+ int32_t maximum = WEBRTC_SPL_WORD32_MIN;
+ size_t i = 0;
+ size_t residual = length & 0x7;
+
+ assert(length > 0);
+
+ const int32_t* p_start = vector;
+ int32x4_t max32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN);
+ int32x4_t max32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN);
+
+ // First part, unroll the loop 8 times.
+ for (i = 0; i < length - residual; i += 8) {
+ int32x4_t in32x4_0 = vld1q_s32(p_start);
+ p_start += 4;
+ int32x4_t in32x4_1 = vld1q_s32(p_start);
+ p_start += 4;
+ max32x4_0 = vmaxq_s32(max32x4_0, in32x4_0);
+ max32x4_1 = vmaxq_s32(max32x4_1, in32x4_1);
+ }
+
+ int32x4_t max32x4 = vmaxq_s32(max32x4_0, max32x4_1);
+#if defined(WEBRTC_ARCH_ARM64)
+ maximum = vmaxvq_s32(max32x4);
+#else
+ int32x2_t max32x2 = vmax_s32(vget_low_s32(max32x4), vget_high_s32(max32x4));
+ max32x2 = vpmax_s32(max32x2, max32x2);
+
+ maximum = vget_lane_s32(max32x2, 0);
+#endif
+
+ // Second part, do the remaining iterations (if any).
+ for (i = residual; i > 0; i--) {
+ if (*p_start > maximum)
+ maximum = *p_start;
+ p_start++;
+ }
+ return maximum;
+}
+
+// Minimum value of word16 vector. NEON intrinsics version for
+// ARM 32-bit/64-bit platforms.
+int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, size_t length) {
+ int16_t minimum = WEBRTC_SPL_WORD16_MAX;
+ size_t i = 0;
+ size_t residual = length & 0x7;
+
+ assert(length > 0);
+
+ const int16_t* p_start = vector;
+ int16x8_t min16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MAX);
+
+ // First part, unroll the loop 8 times.
+ for (i = 0; i < length - residual; i += 8) {
+ int16x8_t in16x8 = vld1q_s16(p_start);
+ min16x8 = vminq_s16(min16x8, in16x8);
+ p_start += 8;
+ }
+
+#if defined(WEBRTC_ARCH_ARM64)
+ minimum = vminvq_s16(min16x8);
+#else
+ int16x4_t min16x4 = vmin_s16(vget_low_s16(min16x8), vget_high_s16(min16x8));
+ min16x4 = vpmin_s16(min16x4, min16x4);
+ min16x4 = vpmin_s16(min16x4, min16x4);
+
+ minimum = vget_lane_s16(min16x4, 0);
+#endif
+
+ // Second part, do the remaining iterations (if any).
+ for (i = residual; i > 0; i--) {
+ if (*p_start < minimum)
+ minimum = *p_start;
+ p_start++;
+ }
+ return minimum;
+}
+
+// Minimum value of word32 vector. NEON intrinsics version for
+// ARM 32-bit/64-bit platforms.
+int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, size_t length) {
+ int32_t minimum = WEBRTC_SPL_WORD32_MAX;
+ size_t i = 0;
+ size_t residual = length & 0x7;
+
+ assert(length > 0);
+
+ const int32_t* p_start = vector;
+ int32x4_t min32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX);
+ int32x4_t min32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX);
+
+ // First part, unroll the loop 8 times.
+ for (i = 0; i < length - residual; i += 8) {
+ int32x4_t in32x4_0 = vld1q_s32(p_start);
+ p_start += 4;
+ int32x4_t in32x4_1 = vld1q_s32(p_start);
+ p_start += 4;
+ min32x4_0 = vminq_s32(min32x4_0, in32x4_0);
+ min32x4_1 = vminq_s32(min32x4_1, in32x4_1);
+ }
+
+ int32x4_t min32x4 = vminq_s32(min32x4_0, min32x4_1);
+#if defined(WEBRTC_ARCH_ARM64)
+ minimum = vminvq_s32(min32x4);
+#else
+ int32x2_t min32x2 = vmin_s32(vget_low_s32(min32x4), vget_high_s32(min32x4));
+ min32x2 = vpmin_s32(min32x2, min32x2);
+
+ minimum = vget_lane_s32(min32x2, 0);
+#endif
+
+ // Second part, do the remaining iterations (if any).
+ for (i = residual; i > 0; i--) {
+ if (*p_start < minimum)
+ minimum = *p_start;
+ p_start++;
+ }
+ return minimum;
+}
+
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/randomization_functions.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/randomization_functions.c
new file mode 100644
index 00000000..73f24093
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/randomization_functions.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains implementations of the randomization functions
+ * WebRtcSpl_RandU()
+ * WebRtcSpl_RandN()
+ * WebRtcSpl_RandUArray()
+ *
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+static const uint32_t kMaxSeedUsed = 0x80000000;
+
+static const int16_t kRandNTable[] = {
+ 9178, -7260, 40, 10189, 4894, -3531, -13779, 14764,
+ -4008, -8884, -8990, 1008, 7368, 5184, 3251, -5817,
+ -9786, 5963, 1770, 8066, -7135, 10772, -2298, 1361,
+ 6484, 2241, -8633, 792, 199, -3344, 6553, -10079,
+ -15040, 95, 11608, -12469, 14161, -4176, 2476, 6403,
+ 13685, -16005, 6646, 2239, 10916, -3004, -602, -3141,
+ 2142, 14144, -5829, 5305, 8209, 4713, 2697, -5112,
+ 16092, -1210, -2891, -6631, -5360, -11878, -6781, -2739,
+ -6392, 536, 10923, 10872, 5059, -4748, -7770, 5477,
+ 38, -1025, -2892, 1638, 6304, 14375, -11028, 1553,
+ -1565, 10762, -393, 4040, 5257, 12310, 6554, -4799,
+ 4899, -6354, 1603, -1048, -2220, 8247, -186, -8944,
+ -12004, 2332, 4801, -4933, 6371, 131, 8614, -5927,
+ -8287, -22760, 4033, -15162, 3385, 3246, 3153, -5250,
+ 3766, 784, 6494, -62, 3531, -1582, 15572, 662,
+ -3952, -330, -3196, 669, 7236, -2678, -6569, 23319,
+ -8645, -741, 14830, -15976, 4903, 315, -11342, 10311,
+ 1858, -7777, 2145, 5436, 5677, -113, -10033, 826,
+ -1353, 17210, 7768, 986, -1471, 8291, -4982, 8207,
+ -14911, -6255, -2449, -11881, -7059, -11703, -4338, 8025,
+ 7538, -2823, -12490, 9470, -1613, -2529, -10092, -7807,
+ 9480, 6970, -12844, 5123, 3532, 4816, 4803, -8455,
+ -5045, 14032, -4378, -1643, 5756, -11041, -2732, -16618,
+ -6430, -18375, -3320, 6098, 5131, -4269, -8840, 2482,
+ -7048, 1547, -21890, -6505, -7414, -424, -11722, 7955,
+ 1653, -17299, 1823, 473, -9232, 3337, 1111, 873,
+ 4018, -8982, 9889, 3531, -11763, -3799, 7373, -4539,
+ 3231, 7054, -8537, 7616, 6244, 16635, 447, -2915,
+ 13967, 705, -2669, -1520, -1771, -16188, 5956, 5117,
+ 6371, -9936, -1448, 2480, 5128, 7550, -8130, 5236,
+ 8213, -6443, 7707, -1950, -13811, 7218, 7031, -3883,
+ 67, 5731, -2874, 13480, -3743, 9298, -3280, 3552,
+ -4425, -18, -3785, -9988, -5357, 5477, -11794, 2117,
+ 1416, -9935, 3376, 802, -5079, -8243, 12652, 66,
+ 3653, -2368, 6781, -21895, -7227, 2487, 7839, -385,
+ 6646, -7016, -4658, 5531, -1705, 834, 129, 3694,
+ -1343, 2238, -22640, -6417, -11139, 11301, -2945, -3494,
+ -5626, 185, -3615, -2041, -7972, -3106, -60, -23497,
+ -1566, 17064, 3519, 2518, 304, -6805, -10269, 2105,
+ 1936, -426, -736, -8122, -1467, 4238, -6939, -13309,
+ 360, 7402, -7970, 12576, 3287, 12194, -6289, -16006,
+ 9171, 4042, -9193, 9123, -2512, 6388, -4734, -8739,
+ 1028, -5406, -1696, 5889, -666, -4736, 4971, 3565,
+ 9362, -6292, 3876, -3652, -19666, 7523, -4061, 391,
+ -11773, 7502, -3763, 4929, -9478, 13278, 2805, 4496,
+ 7814, 16419, 12455, -14773, 2127, -2746, 3763, 4847,
+ 3698, 6978, 4751, -6957, -3581, -45, 6252, 1513,
+ -4797, -7925, 11270, 16188, -2359, -5269, 9376, -10777,
+ 7262, 20031, -6515, -2208, -5353, 8085, -1341, -1303,
+ 7333, 5576, 3625, 5763, -7931, 9833, -3371, -10305,
+ 6534, -13539, -9971, 997, 8464, -4064, -1495, 1857,
+ 13624, 5458, 9490, -11086, -4524, 12022, -550, -198,
+ 408, -8455, -7068, 10289, 9712, -3366, 9028, -7621,
+ -5243, 2362, 6909, 4672, -4933, -1799, 4709, -4563,
+ -62, -566, 1624, -7010, 14730, -17791, -3697, -2344,
+ -1741, 7099, -9509, -6855, -1989, 3495, -2289, 2031,
+ 12784, 891, 14189, -3963, -5683, 421, -12575, 1724,
+ -12682, -5970, -8169, 3143, -1824, -5488, -5130, 8536,
+ 12799, 794, 5738, 3459, -11689, -258, -3738, -3775,
+ -8742, 2333, 8312, -9383, 10331, 13119, 8398, 10644,
+ -19433, -6446, -16277, -11793, 16284, 9345, 15222, 15834,
+ 2009, -7349, 130, -14547, 338, -5998, 3337, 21492,
+ 2406, 7703, -951, 11196, -564, 3406, 2217, 4806,
+ 2374, -5797, 11839, 8940, -11874, 18213, 2855, 10492
+};
+
+static uint32_t IncreaseSeed(uint32_t* seed) {
+ seed[0] = (seed[0] * ((int32_t)69069) + 1) & (kMaxSeedUsed - 1);
+ return seed[0];
+}
+
+int16_t WebRtcSpl_RandU(uint32_t* seed) {
+ return (int16_t)(IncreaseSeed(seed) >> 16);
+}
+
+int16_t WebRtcSpl_RandN(uint32_t* seed) {
+ return kRandNTable[IncreaseSeed(seed) >> 23];
+}
+
+// Creates an array of uniformly distributed variables.
+int16_t WebRtcSpl_RandUArray(int16_t* vector,
+ int16_t vector_length,
+ uint32_t* seed) {
+ int i;
+ for (i = 0; i < vector_length; i++) {
+ vector[i] = WebRtcSpl_RandU(seed);
+ }
+ return vector_length;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/real_fft.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/real_fft.c
new file mode 100644
index 00000000..92daae4d
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/real_fft.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/real_fft.h"
+
+#include <stdlib.h>
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+struct RealFFT {
+ int order;
+};
+
+struct RealFFT* WebRtcSpl_CreateRealFFT(int order) {
+ struct RealFFT* self = NULL;
+
+ if (order > kMaxFFTOrder || order < 0) {
+ return NULL;
+ }
+
+ self = malloc(sizeof(struct RealFFT));
+ if (self == NULL) {
+ return NULL;
+ }
+ self->order = order;
+
+ return self;
+}
+
+void WebRtcSpl_FreeRealFFT(struct RealFFT* self) {
+ if (self != NULL) {
+ free(self);
+ }
+}
+
+// The C version FFT functions (i.e. WebRtcSpl_RealForwardFFT and
+// WebRtcSpl_RealInverseFFT) are real-valued FFT wrappers for complex-valued
+// FFT implementation in SPL.
+
+int WebRtcSpl_RealForwardFFT(struct RealFFT* self,
+ const int16_t* real_data_in,
+ int16_t* complex_data_out) {
+ int i = 0;
+ int j = 0;
+ int result = 0;
+ int n = 1 << self->order;
+ // The complex-value FFT implementation needs a buffer to hold 2^order
+ // 16-bit COMPLEX numbers, for both time and frequency data.
+ int16_t complex_buffer[2 << kMaxFFTOrder];
+
+ // Insert zeros to the imaginary parts for complex forward FFT input.
+ for (i = 0, j = 0; i < n; i += 1, j += 2) {
+ complex_buffer[j] = real_data_in[i];
+ complex_buffer[j + 1] = 0;
+ };
+
+ WebRtcSpl_ComplexBitReverse(complex_buffer, self->order);
+ result = WebRtcSpl_ComplexFFT(complex_buffer, self->order, 1);
+
+ // For real FFT output, use only the first N + 2 elements from
+ // complex forward FFT.
+ memcpy(complex_data_out, complex_buffer, sizeof(int16_t) * (n + 2));
+
+ return result;
+}
+
+int WebRtcSpl_RealInverseFFT(struct RealFFT* self,
+ const int16_t* complex_data_in,
+ int16_t* real_data_out) {
+ int i = 0;
+ int j = 0;
+ int result = 0;
+ int n = 1 << self->order;
+ // Create the buffer specific to complex-valued FFT implementation.
+ int16_t complex_buffer[2 << kMaxFFTOrder];
+
+ // For n-point FFT, first copy the first n + 2 elements into complex
+ // FFT, then construct the remaining n - 2 elements by real FFT's
+ // conjugate-symmetric properties.
+ memcpy(complex_buffer, complex_data_in, sizeof(int16_t) * (n + 2));
+ for (i = n + 2; i < 2 * n; i += 2) {
+ complex_buffer[i] = complex_data_in[2 * n - i];
+ complex_buffer[i + 1] = -complex_data_in[2 * n - i + 1];
+ }
+
+ WebRtcSpl_ComplexBitReverse(complex_buffer, self->order);
+ result = WebRtcSpl_ComplexIFFT(complex_buffer, self->order, 1);
+
+ // Strip out the imaginary parts of the complex inverse FFT output.
+ for (i = 0, j = 0; i < n; i += 1, j += 2) {
+ real_data_out[i] = complex_buffer[j];
+ }
+
+ return result;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/real_fft_unittest.cc b/third_party/webrtc/src/webrtc/common_audio/signal_processing/real_fft_unittest.cc
new file mode 100644
index 00000000..9bd35cd6
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/real_fft_unittest.cc
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/real_fft.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/test/testsupport/gtest_disable.h"
+#include "webrtc/typedefs.h"
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace webrtc {
+namespace {
+
+// FFT order.
+const int kOrder = 5;
+// Lengths for real FFT's time and frequency bufffers.
+// For N-point FFT, the length requirements from API are N and N+2 respectively.
+const int kTimeDataLength = 1 << kOrder;
+const int kFreqDataLength = (1 << kOrder) + 2;
+// For complex FFT's time and freq buffer. The implementation requires
+// 2*N 16-bit words.
+const int kComplexFftDataLength = 2 << kOrder;
+// Reference data for time signal.
+const int16_t kRefData[kTimeDataLength] = {
+ 11739, 6848, -8688, 31980, -30295, 25242, 27085, 19410,
+ -26299, 15607, -10791, 11778, -23819, 14498, -25772, 10076,
+ 1173, 6848, -8688, 31980, -30295, 2522, 27085, 19410,
+ -2629, 5607, -3, 1178, -23819, 1498, -25772, 10076
+};
+
+class RealFFTTest : public ::testing::Test {
+ protected:
+ RealFFTTest() {
+ WebRtcSpl_Init();
+ }
+};
+
+TEST_F(RealFFTTest, CreateFailsOnBadInput) {
+ RealFFT* fft = WebRtcSpl_CreateRealFFT(11);
+ EXPECT_TRUE(fft == NULL);
+ fft = WebRtcSpl_CreateRealFFT(-1);
+ EXPECT_TRUE(fft == NULL);
+}
+
+TEST_F(RealFFTTest, RealAndComplexMatch) {
+ int i = 0;
+ int j = 0;
+ int16_t real_fft_time[kTimeDataLength] = {0};
+ int16_t real_fft_freq[kFreqDataLength] = {0};
+ // One common buffer for complex FFT's time and frequency data.
+ int16_t complex_fft_buff[kComplexFftDataLength] = {0};
+
+ // Prepare the inputs to forward FFT's.
+ memcpy(real_fft_time, kRefData, sizeof(kRefData));
+ for (i = 0, j = 0; i < kTimeDataLength; i += 1, j += 2) {
+ complex_fft_buff[j] = kRefData[i];
+ complex_fft_buff[j + 1] = 0; // Insert zero's to imaginary parts.
+ };
+
+ // Create and run real forward FFT.
+ RealFFT* fft = WebRtcSpl_CreateRealFFT(kOrder);
+ EXPECT_TRUE(fft != NULL);
+ EXPECT_EQ(0, WebRtcSpl_RealForwardFFT(fft, real_fft_time, real_fft_freq));
+
+ // Run complex forward FFT.
+ WebRtcSpl_ComplexBitReverse(complex_fft_buff, kOrder);
+ EXPECT_EQ(0, WebRtcSpl_ComplexFFT(complex_fft_buff, kOrder, 1));
+
+ // Verify the results between complex and real forward FFT.
+ for (i = 0; i < kFreqDataLength; i++) {
+ EXPECT_EQ(real_fft_freq[i], complex_fft_buff[i]);
+ }
+
+ // Prepare the inputs to inverse real FFT.
+ // We use whatever data in complex_fft_buff[] since we don't care
+ // about data contents. Only kFreqDataLength 16-bit words are copied
+ // from complex_fft_buff to real_fft_freq since remaining words (2nd half)
+ // are conjugate-symmetric to the first half in theory.
+ memcpy(real_fft_freq, complex_fft_buff, sizeof(real_fft_freq));
+
+ // Run real inverse FFT.
+ int real_scale = WebRtcSpl_RealInverseFFT(fft, real_fft_freq, real_fft_time);
+ EXPECT_GE(real_scale, 0);
+
+ // Run complex inverse FFT.
+ WebRtcSpl_ComplexBitReverse(complex_fft_buff, kOrder);
+ int complex_scale = WebRtcSpl_ComplexIFFT(complex_fft_buff, kOrder, 1);
+
+ // Verify the results between complex and real inverse FFT.
+ // They are not bit-exact, since complex IFFT doesn't produce
+ // exactly conjugate-symmetric data (between first and second half).
+ EXPECT_EQ(real_scale, complex_scale);
+ for (i = 0, j = 0; i < kTimeDataLength; i += 1, j += 2) {
+ EXPECT_LE(abs(real_fft_time[i] - complex_fft_buff[j]), 1);
+ }
+
+ WebRtcSpl_FreeRealFFT(fft);
+}
+
+} // namespace
+} // namespace webrtc
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/refl_coef_to_lpc.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/refl_coef_to_lpc.c
new file mode 100644
index 00000000..06a29b66
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/refl_coef_to_lpc.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_ReflCoefToLpc().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+void WebRtcSpl_ReflCoefToLpc(const int16_t *k, int use_order, int16_t *a)
+{
+ int16_t any[WEBRTC_SPL_MAX_LPC_ORDER + 1];
+ int16_t *aptr, *aptr2, *anyptr;
+ const int16_t *kptr;
+ int m, i;
+
+ kptr = k;
+ *a = 4096; // i.e., (Word16_MAX >> 3)+1.
+ *any = *a;
+ a[1] = *k >> 3;
+
+ for (m = 1; m < use_order; m++)
+ {
+ kptr++;
+ aptr = a;
+ aptr++;
+ aptr2 = &a[m];
+ anyptr = any;
+ anyptr++;
+
+ any[m + 1] = *kptr >> 3;
+ for (i = 0; i < m; i++)
+ {
+ *anyptr = *aptr + (int16_t)((*aptr2 * *kptr) >> 15);
+ anyptr++;
+ aptr++;
+ aptr2--;
+ }
+
+ aptr = a;
+ anyptr = any;
+ for (i = 0; i < (m + 2); i++)
+ {
+ *aptr = *anyptr;
+ aptr++;
+ anyptr++;
+ }
+ }
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample.c
new file mode 100644
index 00000000..45fe52aa
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample.c
@@ -0,0 +1,505 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the resampling functions for 22 kHz.
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/common_audio/signal_processing/resample_by_2_internal.h"
+
+// Declaration of internally used functions
+static void WebRtcSpl_32khzTo22khzIntToShort(const int32_t *In, int16_t *Out,
+ int32_t K);
+
+void WebRtcSpl_32khzTo22khzIntToInt(const int32_t *In, int32_t *Out,
+ int32_t K);
+
+// interpolation coefficients
+static const int16_t kCoefficients32To22[5][9] = {
+ {127, -712, 2359, -6333, 23456, 16775, -3695, 945, -154},
+ {-39, 230, -830, 2785, 32366, -2324, 760, -218, 38},
+ {117, -663, 2222, -6133, 26634, 13070, -3174, 831, -137},
+ {-77, 457, -1677, 5958, 31175, -4136, 1405, -408, 71},
+ { 98, -560, 1900, -5406, 29240, 9423, -2480, 663, -110}
+};
+
+//////////////////////
+// 22 kHz -> 16 kHz //
+//////////////////////
+
+// number of subblocks; options: 1, 2, 4, 5, 10
+#define SUB_BLOCKS_22_16 5
+
+// 22 -> 16 resampler
+void WebRtcSpl_Resample22khzTo16khz(const int16_t* in, int16_t* out,
+ WebRtcSpl_State22khzTo16khz* state, int32_t* tmpmem)
+{
+ int k;
+
+ // process two blocks of 10/SUB_BLOCKS_22_16 ms (to reduce temp buffer size)
+ for (k = 0; k < SUB_BLOCKS_22_16; k++)
+ {
+ ///// 22 --> 44 /////
+ // int16_t in[220/SUB_BLOCKS_22_16]
+ // int32_t out[440/SUB_BLOCKS_22_16]
+ /////
+ WebRtcSpl_UpBy2ShortToInt(in, 220 / SUB_BLOCKS_22_16, tmpmem + 16, state->S_22_44);
+
+ ///// 44 --> 32 /////
+ // int32_t in[440/SUB_BLOCKS_22_16]
+ // int32_t out[320/SUB_BLOCKS_22_16]
+ /////
+ // copy state to and from input array
+ tmpmem[8] = state->S_44_32[0];
+ tmpmem[9] = state->S_44_32[1];
+ tmpmem[10] = state->S_44_32[2];
+ tmpmem[11] = state->S_44_32[3];
+ tmpmem[12] = state->S_44_32[4];
+ tmpmem[13] = state->S_44_32[5];
+ tmpmem[14] = state->S_44_32[6];
+ tmpmem[15] = state->S_44_32[7];
+ state->S_44_32[0] = tmpmem[440 / SUB_BLOCKS_22_16 + 8];
+ state->S_44_32[1] = tmpmem[440 / SUB_BLOCKS_22_16 + 9];
+ state->S_44_32[2] = tmpmem[440 / SUB_BLOCKS_22_16 + 10];
+ state->S_44_32[3] = tmpmem[440 / SUB_BLOCKS_22_16 + 11];
+ state->S_44_32[4] = tmpmem[440 / SUB_BLOCKS_22_16 + 12];
+ state->S_44_32[5] = tmpmem[440 / SUB_BLOCKS_22_16 + 13];
+ state->S_44_32[6] = tmpmem[440 / SUB_BLOCKS_22_16 + 14];
+ state->S_44_32[7] = tmpmem[440 / SUB_BLOCKS_22_16 + 15];
+
+ WebRtcSpl_Resample44khzTo32khz(tmpmem + 8, tmpmem, 40 / SUB_BLOCKS_22_16);
+
+ ///// 32 --> 16 /////
+ // int32_t in[320/SUB_BLOCKS_22_16]
+ // int32_t out[160/SUB_BLOCKS_22_16]
+ /////
+ WebRtcSpl_DownBy2IntToShort(tmpmem, 320 / SUB_BLOCKS_22_16, out, state->S_32_16);
+
+ // move input/output pointers 10/SUB_BLOCKS_22_16 ms seconds ahead
+ in += 220 / SUB_BLOCKS_22_16;
+ out += 160 / SUB_BLOCKS_22_16;
+ }
+}
+
+// initialize state of 22 -> 16 resampler
+void WebRtcSpl_ResetResample22khzTo16khz(WebRtcSpl_State22khzTo16khz* state)
+{
+ int k;
+ for (k = 0; k < 8; k++)
+ {
+ state->S_22_44[k] = 0;
+ state->S_44_32[k] = 0;
+ state->S_32_16[k] = 0;
+ }
+}
+
+//////////////////////
+// 16 kHz -> 22 kHz //
+//////////////////////
+
+// number of subblocks; options: 1, 2, 4, 5, 10
+#define SUB_BLOCKS_16_22 4
+
+// 16 -> 22 resampler
+void WebRtcSpl_Resample16khzTo22khz(const int16_t* in, int16_t* out,
+ WebRtcSpl_State16khzTo22khz* state, int32_t* tmpmem)
+{
+ int k;
+
+ // process two blocks of 10/SUB_BLOCKS_16_22 ms (to reduce temp buffer size)
+ for (k = 0; k < SUB_BLOCKS_16_22; k++)
+ {
+ ///// 16 --> 32 /////
+ // int16_t in[160/SUB_BLOCKS_16_22]
+ // int32_t out[320/SUB_BLOCKS_16_22]
+ /////
+ WebRtcSpl_UpBy2ShortToInt(in, 160 / SUB_BLOCKS_16_22, tmpmem + 8, state->S_16_32);
+
+ ///// 32 --> 22 /////
+ // int32_t in[320/SUB_BLOCKS_16_22]
+ // int32_t out[220/SUB_BLOCKS_16_22]
+ /////
+ // copy state to and from input array
+ tmpmem[0] = state->S_32_22[0];
+ tmpmem[1] = state->S_32_22[1];
+ tmpmem[2] = state->S_32_22[2];
+ tmpmem[3] = state->S_32_22[3];
+ tmpmem[4] = state->S_32_22[4];
+ tmpmem[5] = state->S_32_22[5];
+ tmpmem[6] = state->S_32_22[6];
+ tmpmem[7] = state->S_32_22[7];
+ state->S_32_22[0] = tmpmem[320 / SUB_BLOCKS_16_22];
+ state->S_32_22[1] = tmpmem[320 / SUB_BLOCKS_16_22 + 1];
+ state->S_32_22[2] = tmpmem[320 / SUB_BLOCKS_16_22 + 2];
+ state->S_32_22[3] = tmpmem[320 / SUB_BLOCKS_16_22 + 3];
+ state->S_32_22[4] = tmpmem[320 / SUB_BLOCKS_16_22 + 4];
+ state->S_32_22[5] = tmpmem[320 / SUB_BLOCKS_16_22 + 5];
+ state->S_32_22[6] = tmpmem[320 / SUB_BLOCKS_16_22 + 6];
+ state->S_32_22[7] = tmpmem[320 / SUB_BLOCKS_16_22 + 7];
+
+ WebRtcSpl_32khzTo22khzIntToShort(tmpmem, out, 20 / SUB_BLOCKS_16_22);
+
+ // move input/output pointers 10/SUB_BLOCKS_16_22 ms seconds ahead
+ in += 160 / SUB_BLOCKS_16_22;
+ out += 220 / SUB_BLOCKS_16_22;
+ }
+}
+
+// initialize state of 16 -> 22 resampler
+void WebRtcSpl_ResetResample16khzTo22khz(WebRtcSpl_State16khzTo22khz* state)
+{
+ int k;
+ for (k = 0; k < 8; k++)
+ {
+ state->S_16_32[k] = 0;
+ state->S_32_22[k] = 0;
+ }
+}
+
+//////////////////////
+// 22 kHz -> 8 kHz //
+//////////////////////
+
+// number of subblocks; options: 1, 2, 5, 10
+#define SUB_BLOCKS_22_8 2
+
+// 22 -> 8 resampler
+void WebRtcSpl_Resample22khzTo8khz(const int16_t* in, int16_t* out,
+ WebRtcSpl_State22khzTo8khz* state, int32_t* tmpmem)
+{
+ int k;
+
+ // process two blocks of 10/SUB_BLOCKS_22_8 ms (to reduce temp buffer size)
+ for (k = 0; k < SUB_BLOCKS_22_8; k++)
+ {
+ ///// 22 --> 22 lowpass /////
+ // int16_t in[220/SUB_BLOCKS_22_8]
+ // int32_t out[220/SUB_BLOCKS_22_8]
+ /////
+ WebRtcSpl_LPBy2ShortToInt(in, 220 / SUB_BLOCKS_22_8, tmpmem + 16, state->S_22_22);
+
+ ///// 22 --> 16 /////
+ // int32_t in[220/SUB_BLOCKS_22_8]
+ // int32_t out[160/SUB_BLOCKS_22_8]
+ /////
+ // copy state to and from input array
+ tmpmem[8] = state->S_22_16[0];
+ tmpmem[9] = state->S_22_16[1];
+ tmpmem[10] = state->S_22_16[2];
+ tmpmem[11] = state->S_22_16[3];
+ tmpmem[12] = state->S_22_16[4];
+ tmpmem[13] = state->S_22_16[5];
+ tmpmem[14] = state->S_22_16[6];
+ tmpmem[15] = state->S_22_16[7];
+ state->S_22_16[0] = tmpmem[220 / SUB_BLOCKS_22_8 + 8];
+ state->S_22_16[1] = tmpmem[220 / SUB_BLOCKS_22_8 + 9];
+ state->S_22_16[2] = tmpmem[220 / SUB_BLOCKS_22_8 + 10];
+ state->S_22_16[3] = tmpmem[220 / SUB_BLOCKS_22_8 + 11];
+ state->S_22_16[4] = tmpmem[220 / SUB_BLOCKS_22_8 + 12];
+ state->S_22_16[5] = tmpmem[220 / SUB_BLOCKS_22_8 + 13];
+ state->S_22_16[6] = tmpmem[220 / SUB_BLOCKS_22_8 + 14];
+ state->S_22_16[7] = tmpmem[220 / SUB_BLOCKS_22_8 + 15];
+
+ WebRtcSpl_Resample44khzTo32khz(tmpmem + 8, tmpmem, 20 / SUB_BLOCKS_22_8);
+
+ ///// 16 --> 8 /////
+ // int32_t in[160/SUB_BLOCKS_22_8]
+ // int32_t out[80/SUB_BLOCKS_22_8]
+ /////
+ WebRtcSpl_DownBy2IntToShort(tmpmem, 160 / SUB_BLOCKS_22_8, out, state->S_16_8);
+
+ // move input/output pointers 10/SUB_BLOCKS_22_8 ms seconds ahead
+ in += 220 / SUB_BLOCKS_22_8;
+ out += 80 / SUB_BLOCKS_22_8;
+ }
+}
+
+// initialize state of 22 -> 8 resampler
+void WebRtcSpl_ResetResample22khzTo8khz(WebRtcSpl_State22khzTo8khz* state)
+{
+ int k;
+ for (k = 0; k < 8; k++)
+ {
+ state->S_22_22[k] = 0;
+ state->S_22_22[k + 8] = 0;
+ state->S_22_16[k] = 0;
+ state->S_16_8[k] = 0;
+ }
+}
+
+//////////////////////
+// 8 kHz -> 22 kHz //
+//////////////////////
+
+// number of subblocks; options: 1, 2, 5, 10
+#define SUB_BLOCKS_8_22 2
+
+// 8 -> 22 resampler
+void WebRtcSpl_Resample8khzTo22khz(const int16_t* in, int16_t* out,
+ WebRtcSpl_State8khzTo22khz* state, int32_t* tmpmem)
+{
+ int k;
+
+ // process two blocks of 10/SUB_BLOCKS_8_22 ms (to reduce temp buffer size)
+ for (k = 0; k < SUB_BLOCKS_8_22; k++)
+ {
+ ///// 8 --> 16 /////
+ // int16_t in[80/SUB_BLOCKS_8_22]
+ // int32_t out[160/SUB_BLOCKS_8_22]
+ /////
+ WebRtcSpl_UpBy2ShortToInt(in, 80 / SUB_BLOCKS_8_22, tmpmem + 18, state->S_8_16);
+
+ ///// 16 --> 11 /////
+ // int32_t in[160/SUB_BLOCKS_8_22]
+ // int32_t out[110/SUB_BLOCKS_8_22]
+ /////
+ // copy state to and from input array
+ tmpmem[10] = state->S_16_11[0];
+ tmpmem[11] = state->S_16_11[1];
+ tmpmem[12] = state->S_16_11[2];
+ tmpmem[13] = state->S_16_11[3];
+ tmpmem[14] = state->S_16_11[4];
+ tmpmem[15] = state->S_16_11[5];
+ tmpmem[16] = state->S_16_11[6];
+ tmpmem[17] = state->S_16_11[7];
+ state->S_16_11[0] = tmpmem[160 / SUB_BLOCKS_8_22 + 10];
+ state->S_16_11[1] = tmpmem[160 / SUB_BLOCKS_8_22 + 11];
+ state->S_16_11[2] = tmpmem[160 / SUB_BLOCKS_8_22 + 12];
+ state->S_16_11[3] = tmpmem[160 / SUB_BLOCKS_8_22 + 13];
+ state->S_16_11[4] = tmpmem[160 / SUB_BLOCKS_8_22 + 14];
+ state->S_16_11[5] = tmpmem[160 / SUB_BLOCKS_8_22 + 15];
+ state->S_16_11[6] = tmpmem[160 / SUB_BLOCKS_8_22 + 16];
+ state->S_16_11[7] = tmpmem[160 / SUB_BLOCKS_8_22 + 17];
+
+ WebRtcSpl_32khzTo22khzIntToInt(tmpmem + 10, tmpmem, 10 / SUB_BLOCKS_8_22);
+
+ ///// 11 --> 22 /////
+ // int32_t in[110/SUB_BLOCKS_8_22]
+ // int16_t out[220/SUB_BLOCKS_8_22]
+ /////
+ WebRtcSpl_UpBy2IntToShort(tmpmem, 110 / SUB_BLOCKS_8_22, out, state->S_11_22);
+
+ // move input/output pointers 10/SUB_BLOCKS_8_22 ms seconds ahead
+ in += 80 / SUB_BLOCKS_8_22;
+ out += 220 / SUB_BLOCKS_8_22;
+ }
+}
+
+// initialize state of 8 -> 22 resampler
+void WebRtcSpl_ResetResample8khzTo22khz(WebRtcSpl_State8khzTo22khz* state)
+{
+ int k;
+ for (k = 0; k < 8; k++)
+ {
+ state->S_8_16[k] = 0;
+ state->S_16_11[k] = 0;
+ state->S_11_22[k] = 0;
+ }
+}
+
+// compute two inner-products and store them to output array
+static void WebRtcSpl_DotProdIntToInt(const int32_t* in1, const int32_t* in2,
+ const int16_t* coef_ptr, int32_t* out1,
+ int32_t* out2)
+{
+ int32_t tmp1 = 16384;
+ int32_t tmp2 = 16384;
+ int16_t coef;
+
+ coef = coef_ptr[0];
+ tmp1 += coef * in1[0];
+ tmp2 += coef * in2[-0];
+
+ coef = coef_ptr[1];
+ tmp1 += coef * in1[1];
+ tmp2 += coef * in2[-1];
+
+ coef = coef_ptr[2];
+ tmp1 += coef * in1[2];
+ tmp2 += coef * in2[-2];
+
+ coef = coef_ptr[3];
+ tmp1 += coef * in1[3];
+ tmp2 += coef * in2[-3];
+
+ coef = coef_ptr[4];
+ tmp1 += coef * in1[4];
+ tmp2 += coef * in2[-4];
+
+ coef = coef_ptr[5];
+ tmp1 += coef * in1[5];
+ tmp2 += coef * in2[-5];
+
+ coef = coef_ptr[6];
+ tmp1 += coef * in1[6];
+ tmp2 += coef * in2[-6];
+
+ coef = coef_ptr[7];
+ tmp1 += coef * in1[7];
+ tmp2 += coef * in2[-7];
+
+ coef = coef_ptr[8];
+ *out1 = tmp1 + coef * in1[8];
+ *out2 = tmp2 + coef * in2[-8];
+}
+
+// compute two inner-products and store them to output array
+static void WebRtcSpl_DotProdIntToShort(const int32_t* in1, const int32_t* in2,
+ const int16_t* coef_ptr, int16_t* out1,
+ int16_t* out2)
+{
+ int32_t tmp1 = 16384;
+ int32_t tmp2 = 16384;
+ int16_t coef;
+
+ coef = coef_ptr[0];
+ tmp1 += coef * in1[0];
+ tmp2 += coef * in2[-0];
+
+ coef = coef_ptr[1];
+ tmp1 += coef * in1[1];
+ tmp2 += coef * in2[-1];
+
+ coef = coef_ptr[2];
+ tmp1 += coef * in1[2];
+ tmp2 += coef * in2[-2];
+
+ coef = coef_ptr[3];
+ tmp1 += coef * in1[3];
+ tmp2 += coef * in2[-3];
+
+ coef = coef_ptr[4];
+ tmp1 += coef * in1[4];
+ tmp2 += coef * in2[-4];
+
+ coef = coef_ptr[5];
+ tmp1 += coef * in1[5];
+ tmp2 += coef * in2[-5];
+
+ coef = coef_ptr[6];
+ tmp1 += coef * in1[6];
+ tmp2 += coef * in2[-6];
+
+ coef = coef_ptr[7];
+ tmp1 += coef * in1[7];
+ tmp2 += coef * in2[-7];
+
+ coef = coef_ptr[8];
+ tmp1 += coef * in1[8];
+ tmp2 += coef * in2[-8];
+
+ // scale down, round and saturate
+ tmp1 >>= 15;
+ if (tmp1 > (int32_t)0x00007FFF)
+ tmp1 = 0x00007FFF;
+ if (tmp1 < (int32_t)0xFFFF8000)
+ tmp1 = 0xFFFF8000;
+ tmp2 >>= 15;
+ if (tmp2 > (int32_t)0x00007FFF)
+ tmp2 = 0x00007FFF;
+ if (tmp2 < (int32_t)0xFFFF8000)
+ tmp2 = 0xFFFF8000;
+ *out1 = (int16_t)tmp1;
+ *out2 = (int16_t)tmp2;
+}
+
+// Resampling ratio: 11/16
+// input: int32_t (normalized, not saturated) :: size 16 * K
+// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 11 * K
+// K: Number of blocks
+
+void WebRtcSpl_32khzTo22khzIntToInt(const int32_t* In,
+ int32_t* Out,
+ int32_t K)
+{
+ /////////////////////////////////////////////////////////////
+ // Filter operation:
+ //
+ // Perform resampling (16 input samples -> 11 output samples);
+ // process in sub blocks of size 16 samples.
+ int32_t m;
+
+ for (m = 0; m < K; m++)
+ {
+ // first output sample
+ Out[0] = ((int32_t)In[3] << 15) + (1 << 14);
+
+ // sum and accumulate filter coefficients and input samples
+ WebRtcSpl_DotProdIntToInt(&In[0], &In[22], kCoefficients32To22[0], &Out[1], &Out[10]);
+
+ // sum and accumulate filter coefficients and input samples
+ WebRtcSpl_DotProdIntToInt(&In[2], &In[20], kCoefficients32To22[1], &Out[2], &Out[9]);
+
+ // sum and accumulate filter coefficients and input samples
+ WebRtcSpl_DotProdIntToInt(&In[3], &In[19], kCoefficients32To22[2], &Out[3], &Out[8]);
+
+ // sum and accumulate filter coefficients and input samples
+ WebRtcSpl_DotProdIntToInt(&In[5], &In[17], kCoefficients32To22[3], &Out[4], &Out[7]);
+
+ // sum and accumulate filter coefficients and input samples
+ WebRtcSpl_DotProdIntToInt(&In[6], &In[16], kCoefficients32To22[4], &Out[5], &Out[6]);
+
+ // update pointers
+ In += 16;
+ Out += 11;
+ }
+}
+
+// Resampling ratio: 11/16
+// input: int32_t (normalized, not saturated) :: size 16 * K
+// output: int16_t (saturated) :: size 11 * K
+// K: Number of blocks
+
+void WebRtcSpl_32khzTo22khzIntToShort(const int32_t *In,
+ int16_t *Out,
+ int32_t K)
+{
+ /////////////////////////////////////////////////////////////
+ // Filter operation:
+ //
+ // Perform resampling (16 input samples -> 11 output samples);
+ // process in sub blocks of size 16 samples.
+ int32_t tmp;
+ int32_t m;
+
+ for (m = 0; m < K; m++)
+ {
+ // first output sample
+ tmp = In[3];
+ if (tmp > (int32_t)0x00007FFF)
+ tmp = 0x00007FFF;
+ if (tmp < (int32_t)0xFFFF8000)
+ tmp = 0xFFFF8000;
+ Out[0] = (int16_t)tmp;
+
+ // sum and accumulate filter coefficients and input samples
+ WebRtcSpl_DotProdIntToShort(&In[0], &In[22], kCoefficients32To22[0], &Out[1], &Out[10]);
+
+ // sum and accumulate filter coefficients and input samples
+ WebRtcSpl_DotProdIntToShort(&In[2], &In[20], kCoefficients32To22[1], &Out[2], &Out[9]);
+
+ // sum and accumulate filter coefficients and input samples
+ WebRtcSpl_DotProdIntToShort(&In[3], &In[19], kCoefficients32To22[2], &Out[3], &Out[8]);
+
+ // sum and accumulate filter coefficients and input samples
+ WebRtcSpl_DotProdIntToShort(&In[5], &In[17], kCoefficients32To22[3], &Out[4], &Out[7]);
+
+ // sum and accumulate filter coefficients and input samples
+ WebRtcSpl_DotProdIntToShort(&In[6], &In[16], kCoefficients32To22[4], &Out[5], &Out[6]);
+
+ // update pointers
+ In += 16;
+ Out += 11;
+ }
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_48khz.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_48khz.c
new file mode 100644
index 00000000..2220cc33
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_48khz.c
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains resampling functions between 48 kHz and nb/wb.
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include <string.h>
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/common_audio/signal_processing/resample_by_2_internal.h"
+
+////////////////////////////
+///// 48 kHz -> 16 kHz /////
+////////////////////////////
+
+// 48 -> 16 resampler
+void WebRtcSpl_Resample48khzTo16khz(const int16_t* in, int16_t* out,
+ WebRtcSpl_State48khzTo16khz* state, int32_t* tmpmem)
+{
+ ///// 48 --> 48(LP) /////
+ // int16_t in[480]
+ // int32_t out[480]
+ /////
+ WebRtcSpl_LPBy2ShortToInt(in, 480, tmpmem + 16, state->S_48_48);
+
+ ///// 48 --> 32 /////
+ // int32_t in[480]
+ // int32_t out[320]
+ /////
+ // copy state to and from input array
+ memcpy(tmpmem + 8, state->S_48_32, 8 * sizeof(int32_t));
+ memcpy(state->S_48_32, tmpmem + 488, 8 * sizeof(int32_t));
+ WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 160);
+
+ ///// 32 --> 16 /////
+ // int32_t in[320]
+ // int16_t out[160]
+ /////
+ WebRtcSpl_DownBy2IntToShort(tmpmem, 320, out, state->S_32_16);
+}
+
+// initialize state of 48 -> 16 resampler
+void WebRtcSpl_ResetResample48khzTo16khz(WebRtcSpl_State48khzTo16khz* state)
+{
+ memset(state->S_48_48, 0, 16 * sizeof(int32_t));
+ memset(state->S_48_32, 0, 8 * sizeof(int32_t));
+ memset(state->S_32_16, 0, 8 * sizeof(int32_t));
+}
+
+////////////////////////////
+///// 16 kHz -> 48 kHz /////
+////////////////////////////
+
+// 16 -> 48 resampler
+void WebRtcSpl_Resample16khzTo48khz(const int16_t* in, int16_t* out,
+ WebRtcSpl_State16khzTo48khz* state, int32_t* tmpmem)
+{
+ ///// 16 --> 32 /////
+ // int16_t in[160]
+ // int32_t out[320]
+ /////
+ WebRtcSpl_UpBy2ShortToInt(in, 160, tmpmem + 16, state->S_16_32);
+
+ ///// 32 --> 24 /////
+ // int32_t in[320]
+ // int32_t out[240]
+ // copy state to and from input array
+ /////
+ memcpy(tmpmem + 8, state->S_32_24, 8 * sizeof(int32_t));
+ memcpy(state->S_32_24, tmpmem + 328, 8 * sizeof(int32_t));
+ WebRtcSpl_Resample32khzTo24khz(tmpmem + 8, tmpmem, 80);
+
+ ///// 24 --> 48 /////
+ // int32_t in[240]
+ // int16_t out[480]
+ /////
+ WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48);
+}
+
+// initialize state of 16 -> 48 resampler
+void WebRtcSpl_ResetResample16khzTo48khz(WebRtcSpl_State16khzTo48khz* state)
+{
+ memset(state->S_16_32, 0, 8 * sizeof(int32_t));
+ memset(state->S_32_24, 0, 8 * sizeof(int32_t));
+ memset(state->S_24_48, 0, 8 * sizeof(int32_t));
+}
+
+////////////////////////////
+///// 48 kHz -> 8 kHz /////
+////////////////////////////
+
+// 48 -> 8 resampler
+void WebRtcSpl_Resample48khzTo8khz(const int16_t* in, int16_t* out,
+ WebRtcSpl_State48khzTo8khz* state, int32_t* tmpmem)
+{
+ ///// 48 --> 24 /////
+ // int16_t in[480]
+ // int32_t out[240]
+ /////
+ WebRtcSpl_DownBy2ShortToInt(in, 480, tmpmem + 256, state->S_48_24);
+
+ ///// 24 --> 24(LP) /////
+ // int32_t in[240]
+ // int32_t out[240]
+ /////
+ WebRtcSpl_LPBy2IntToInt(tmpmem + 256, 240, tmpmem + 16, state->S_24_24);
+
+ ///// 24 --> 16 /////
+ // int32_t in[240]
+ // int32_t out[160]
+ /////
+ // copy state to and from input array
+ memcpy(tmpmem + 8, state->S_24_16, 8 * sizeof(int32_t));
+ memcpy(state->S_24_16, tmpmem + 248, 8 * sizeof(int32_t));
+ WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 80);
+
+ ///// 16 --> 8 /////
+ // int32_t in[160]
+ // int16_t out[80]
+ /////
+ WebRtcSpl_DownBy2IntToShort(tmpmem, 160, out, state->S_16_8);
+}
+
+// initialize state of 48 -> 8 resampler
+void WebRtcSpl_ResetResample48khzTo8khz(WebRtcSpl_State48khzTo8khz* state)
+{
+ memset(state->S_48_24, 0, 8 * sizeof(int32_t));
+ memset(state->S_24_24, 0, 16 * sizeof(int32_t));
+ memset(state->S_24_16, 0, 8 * sizeof(int32_t));
+ memset(state->S_16_8, 0, 8 * sizeof(int32_t));
+}
+
+////////////////////////////
+///// 8 kHz -> 48 kHz /////
+////////////////////////////
+
+// 8 -> 48 resampler
+void WebRtcSpl_Resample8khzTo48khz(const int16_t* in, int16_t* out,
+ WebRtcSpl_State8khzTo48khz* state, int32_t* tmpmem)
+{
+ ///// 8 --> 16 /////
+ // int16_t in[80]
+ // int32_t out[160]
+ /////
+ WebRtcSpl_UpBy2ShortToInt(in, 80, tmpmem + 264, state->S_8_16);
+
+ ///// 16 --> 12 /////
+ // int32_t in[160]
+ // int32_t out[120]
+ /////
+ // copy state to and from input array
+ memcpy(tmpmem + 256, state->S_16_12, 8 * sizeof(int32_t));
+ memcpy(state->S_16_12, tmpmem + 416, 8 * sizeof(int32_t));
+ WebRtcSpl_Resample32khzTo24khz(tmpmem + 256, tmpmem + 240, 40);
+
+ ///// 12 --> 24 /////
+ // int32_t in[120]
+ // int16_t out[240]
+ /////
+ WebRtcSpl_UpBy2IntToInt(tmpmem + 240, 120, tmpmem, state->S_12_24);
+
+ ///// 24 --> 48 /////
+ // int32_t in[240]
+ // int16_t out[480]
+ /////
+ WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48);
+}
+
+// initialize state of 8 -> 48 resampler
+void WebRtcSpl_ResetResample8khzTo48khz(WebRtcSpl_State8khzTo48khz* state)
+{
+ memset(state->S_8_16, 0, 8 * sizeof(int32_t));
+ memset(state->S_16_12, 0, 8 * sizeof(int32_t));
+ memset(state->S_12_24, 0, 8 * sizeof(int32_t));
+ memset(state->S_24_48, 0, 8 * sizeof(int32_t));
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2.c
new file mode 100644
index 00000000..dcba82e3
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2.c
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the resampling by two functions.
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+#ifdef WEBRTC_ARCH_ARM_V7
+
+// allpass filter coefficients.
+static const uint32_t kResampleAllpass1[3] = {3284, 24441, 49528 << 15};
+static const uint32_t kResampleAllpass2[3] =
+ {12199, 37471 << 15, 60255 << 15};
+
+// Multiply two 32-bit values and accumulate to another input value.
+// Return: state + ((diff * tbl_value) >> 16)
+
+static __inline int32_t MUL_ACCUM_1(int32_t tbl_value,
+ int32_t diff,
+ int32_t state) {
+ int32_t result;
+ __asm __volatile ("smlawb %0, %1, %2, %3": "=r"(result): "r"(diff),
+ "r"(tbl_value), "r"(state));
+ return result;
+}
+
+// Multiply two 32-bit values and accumulate to another input value.
+// Return: Return: state + (((diff << 1) * tbl_value) >> 32)
+//
+// The reason to introduce this function is that, in case we can't use smlawb
+// instruction (in MUL_ACCUM_1) due to input value range, we can still use
+// smmla to save some cycles.
+
+static __inline int32_t MUL_ACCUM_2(int32_t tbl_value,
+ int32_t diff,
+ int32_t state) {
+ int32_t result;
+ __asm __volatile ("smmla %0, %1, %2, %3": "=r"(result): "r"(diff << 1),
+ "r"(tbl_value), "r"(state));
+ return result;
+}
+
+#else
+
+// allpass filter coefficients.
+static const uint16_t kResampleAllpass1[3] = {3284, 24441, 49528};
+static const uint16_t kResampleAllpass2[3] = {12199, 37471, 60255};
+
+// Multiply a 32-bit value with a 16-bit value and accumulate to another input:
+#define MUL_ACCUM_1(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c)
+#define MUL_ACCUM_2(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c)
+
+#endif // WEBRTC_ARCH_ARM_V7
+
+
+// decimator
+#if !defined(MIPS32_LE)
+void WebRtcSpl_DownsampleBy2(const int16_t* in, size_t len,
+ int16_t* out, int32_t* filtState) {
+ int32_t tmp1, tmp2, diff, in32, out32;
+ size_t i;
+
+ register int32_t state0 = filtState[0];
+ register int32_t state1 = filtState[1];
+ register int32_t state2 = filtState[2];
+ register int32_t state3 = filtState[3];
+ register int32_t state4 = filtState[4];
+ register int32_t state5 = filtState[5];
+ register int32_t state6 = filtState[6];
+ register int32_t state7 = filtState[7];
+
+ for (i = (len >> 1); i > 0; i--) {
+ // lower allpass filter
+ in32 = (int32_t)(*in++) << 10;
+ diff = in32 - state1;
+ tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0);
+ state0 = in32;
+ diff = tmp1 - state2;
+ tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1);
+ state1 = tmp1;
+ diff = tmp2 - state3;
+ state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2);
+ state2 = tmp2;
+
+ // upper allpass filter
+ in32 = (int32_t)(*in++) << 10;
+ diff = in32 - state5;
+ tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4);
+ state4 = in32;
+ diff = tmp1 - state6;
+ tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5);
+ state5 = tmp1;
+ diff = tmp2 - state7;
+ state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6);
+ state6 = tmp2;
+
+ // add two allpass outputs, divide by two and round
+ out32 = (state3 + state7 + 1024) >> 11;
+
+ // limit amplitude to prevent wrap-around, and write to output array
+ *out++ = WebRtcSpl_SatW32ToW16(out32);
+ }
+
+ filtState[0] = state0;
+ filtState[1] = state1;
+ filtState[2] = state2;
+ filtState[3] = state3;
+ filtState[4] = state4;
+ filtState[5] = state5;
+ filtState[6] = state6;
+ filtState[7] = state7;
+}
+#endif // #if defined(MIPS32_LE)
+
+
+void WebRtcSpl_UpsampleBy2(const int16_t* in, size_t len,
+ int16_t* out, int32_t* filtState) {
+ int32_t tmp1, tmp2, diff, in32, out32;
+ size_t i;
+
+ register int32_t state0 = filtState[0];
+ register int32_t state1 = filtState[1];
+ register int32_t state2 = filtState[2];
+ register int32_t state3 = filtState[3];
+ register int32_t state4 = filtState[4];
+ register int32_t state5 = filtState[5];
+ register int32_t state6 = filtState[6];
+ register int32_t state7 = filtState[7];
+
+ for (i = len; i > 0; i--) {
+ // lower allpass filter
+ in32 = (int32_t)(*in++) << 10;
+ diff = in32 - state1;
+ tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state0);
+ state0 = in32;
+ diff = tmp1 - state2;
+ tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state1);
+ state1 = tmp1;
+ diff = tmp2 - state3;
+ state3 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state2);
+ state2 = tmp2;
+
+ // round; limit amplitude to prevent wrap-around; write to output array
+ out32 = (state3 + 512) >> 10;
+ *out++ = WebRtcSpl_SatW32ToW16(out32);
+
+ // upper allpass filter
+ diff = in32 - state5;
+ tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state4);
+ state4 = in32;
+ diff = tmp1 - state6;
+ tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state5);
+ state5 = tmp1;
+ diff = tmp2 - state7;
+ state7 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state6);
+ state6 = tmp2;
+
+ // round; limit amplitude to prevent wrap-around; write to output array
+ out32 = (state7 + 512) >> 10;
+ *out++ = WebRtcSpl_SatW32ToW16(out32);
+ }
+
+ filtState[0] = state0;
+ filtState[1] = state1;
+ filtState[2] = state2;
+ filtState[3] = state3;
+ filtState[4] = state4;
+ filtState[5] = state5;
+ filtState[6] = state6;
+ filtState[7] = state7;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_internal.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_internal.c
new file mode 100644
index 00000000..085069c8
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_internal.c
@@ -0,0 +1,679 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This header file contains some internal resampling functions.
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/resample_by_2_internal.h"
+
+// allpass filter coefficients.
+static const int16_t kResampleAllpass[2][3] = {
+ {821, 6110, 12382},
+ {3050, 9368, 15063}
+};
+
+//
+// decimator
+// input: int32_t (shifted 15 positions to the left, + offset 16384) OVERWRITTEN!
+// output: int16_t (saturated) (of length len/2)
+// state: filter state array; length = 8
+
+void WebRtcSpl_DownBy2IntToShort(int32_t *in, int32_t len, int16_t *out,
+ int32_t *state)
+{
+ int32_t tmp0, tmp1, diff;
+ int32_t i;
+
+ len >>= 1;
+
+ // lower allpass filter (operates on even input samples)
+ for (i = 0; i < len; i++)
+ {
+ tmp0 = in[i << 1];
+ diff = tmp0 - state[1];
+ // scale down and round
+ diff = (diff + (1 << 13)) >> 14;
+ tmp1 = state[0] + diff * kResampleAllpass[1][0];
+ state[0] = tmp0;
+ diff = tmp1 - state[2];
+ // scale down and truncate
+ diff = diff >> 14;
+ if (diff < 0)
+ diff += 1;
+ tmp0 = state[1] + diff * kResampleAllpass[1][1];
+ state[1] = tmp1;
+ diff = tmp0 - state[3];
+ // scale down and truncate
+ diff = diff >> 14;
+ if (diff < 0)
+ diff += 1;
+ state[3] = state[2] + diff * kResampleAllpass[1][2];
+ state[2] = tmp0;
+
+ // divide by two and store temporarily
+ in[i << 1] = (state[3] >> 1);
+ }
+
+ in++;
+
+ // upper allpass filter (operates on odd input samples)
+ for (i = 0; i < len; i++)
+ {
+ tmp0 = in[i << 1];
+ diff = tmp0 - state[5];
+ // scale down and round
+ diff = (diff + (1 << 13)) >> 14;
+ tmp1 = state[4] + diff * kResampleAllpass[0][0];
+ state[4] = tmp0;
+ diff = tmp1 - state[6];
+ // scale down and round
+ diff = diff >> 14;
+ if (diff < 0)
+ diff += 1;
+ tmp0 = state[5] + diff * kResampleAllpass[0][1];
+ state[5] = tmp1;
+ diff = tmp0 - state[7];
+ // scale down and truncate
+ diff = diff >> 14;
+ if (diff < 0)
+ diff += 1;
+ state[7] = state[6] + diff * kResampleAllpass[0][2];
+ state[6] = tmp0;
+
+ // divide by two and store temporarily
+ in[i << 1] = (state[7] >> 1);
+ }
+
+ in--;
+
+ // combine allpass outputs
+ for (i = 0; i < len; i += 2)
+ {
+ // divide by two, add both allpass outputs and round
+ tmp0 = (in[i << 1] + in[(i << 1) + 1]) >> 15;
+ tmp1 = (in[(i << 1) + 2] + in[(i << 1) + 3]) >> 15;
+ if (tmp0 > (int32_t)0x00007FFF)
+ tmp0 = 0x00007FFF;
+ if (tmp0 < (int32_t)0xFFFF8000)
+ tmp0 = 0xFFFF8000;
+ out[i] = (int16_t)tmp0;
+ if (tmp1 > (int32_t)0x00007FFF)
+ tmp1 = 0x00007FFF;
+ if (tmp1 < (int32_t)0xFFFF8000)
+ tmp1 = 0xFFFF8000;
+ out[i + 1] = (int16_t)tmp1;
+ }
+}
+
+//
+// decimator
+// input: int16_t
+// output: int32_t (shifted 15 positions to the left, + offset 16384) (of length len/2)
+// state: filter state array; length = 8
+
+void WebRtcSpl_DownBy2ShortToInt(const int16_t *in,
+ int32_t len,
+ int32_t *out,
+ int32_t *state)
+{
+ int32_t tmp0, tmp1, diff;
+ int32_t i;
+
+ len >>= 1;
+
+ // lower allpass filter (operates on even input samples)
+ for (i = 0; i < len; i++)
+ {
+ tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
+ diff = tmp0 - state[1];
+ // scale down and round
+ diff = (diff + (1 << 13)) >> 14;
+ tmp1 = state[0] + diff * kResampleAllpass[1][0];
+ state[0] = tmp0;
+ diff = tmp1 - state[2];
+ // scale down and truncate
+ diff = diff >> 14;
+ if (diff < 0)
+ diff += 1;
+ tmp0 = state[1] + diff * kResampleAllpass[1][1];
+ state[1] = tmp1;
+ diff = tmp0 - state[3];
+ // scale down and truncate
+ diff = diff >> 14;
+ if (diff < 0)
+ diff += 1;
+ state[3] = state[2] + diff * kResampleAllpass[1][2];
+ state[2] = tmp0;
+
+ // divide by two and store temporarily
+ out[i] = (state[3] >> 1);
+ }
+
+ in++;
+
+ // upper allpass filter (operates on odd input samples)
+ for (i = 0; i < len; i++)
+ {
+ tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
+ diff = tmp0 - state[5];
+ // scale down and round
+ diff = (diff + (1 << 13)) >> 14;
+ tmp1 = state[4] + diff * kResampleAllpass[0][0];
+ state[4] = tmp0;
+ diff = tmp1 - state[6];
+ // scale down and round
+ diff = diff >> 14;
+ if (diff < 0)
+ diff += 1;
+ tmp0 = state[5] + diff * kResampleAllpass[0][1];
+ state[5] = tmp1;
+ diff = tmp0 - state[7];
+ // scale down and truncate
+ diff = diff >> 14;
+ if (diff < 0)
+ diff += 1;
+ state[7] = state[6] + diff * kResampleAllpass[0][2];
+ state[6] = tmp0;
+
+ // divide by two and store temporarily
+ out[i] += (state[7] >> 1);
+ }
+
+ in--;
+}
+
+//
+// interpolator
+// input: int16_t
+// output: int32_t (normalized, not saturated) (of length len*2)
+// state: filter state array; length = 8
+void WebRtcSpl_UpBy2ShortToInt(const int16_t *in, int32_t len, int32_t *out,
+ int32_t *state)
+{
+ int32_t tmp0, tmp1, diff;
+ int32_t i;
+
+ // upper allpass filter (generates odd output samples)
+ for (i = 0; i < len; i++)
+ {
+ tmp0 = ((int32_t)in[i] << 15) + (1 << 14);
+ diff = tmp0 - state[5];
+ // scale down and round
+ diff = (diff + (1 << 13)) >> 14;
+ tmp1 = state[4] + diff * kResampleAllpass[0][0];
+ state[4] = tmp0;
+ diff = tmp1 - state[6];
+ // scale down and truncate
+ diff = diff >> 14;
+ if (diff < 0)
+ diff += 1;
+ tmp0 = state[5] + diff * kResampleAllpass[0][1];
+ state[5] = tmp1;
+ diff = tmp0 - state[7];
+ // scale down and truncate
+ diff = diff >> 14;
+ if (diff < 0)
+ diff += 1;
+ state[7] = state[6] + diff * kResampleAllpass[0][2];
+ state[6] = tmp0;
+
+ // scale down, round and store
+ out[i << 1] = state[7] >> 15;
+ }
+
+ out++;
+
+ // lower allpass filter (generates even output samples)
+ for (i = 0; i < len; i++)
+ {
+ tmp0 = ((int32_t)in[i] << 15) + (1 << 14);
+ diff = tmp0 - state[1];
+ // scale down and round
+ diff = (diff + (1 << 13)) >> 14;
+ tmp1 = state[0] + diff * kResampleAllpass[1][0];
+ state[0] = tmp0;
+ diff = tmp1 - state[2];
+ // scale down and truncate
+ diff = diff >> 14;
+ if (diff < 0)
+ diff += 1;
+ tmp0 = state[1] + diff * kResampleAllpass[1][1];
+ state[1] = tmp1;
+ diff = tmp0 - state[3];
+ // scale down and truncate
+ diff = diff >> 14;
+ if (diff < 0)
+ diff += 1;
+ state[3] = state[2] + diff * kResampleAllpass[1][2];
+ state[2] = tmp0;
+
+ // scale down, round and store
+ out[i << 1] = state[3] >> 15;
+ }
+}
+
+//
+// interpolator
+// input: int32_t (shifted 15 positions to the left, + offset 16384)
+// output: int32_t (shifted 15 positions to the left, + offset 16384) (of length len*2)
+// state: filter state array; length = 8
+void WebRtcSpl_UpBy2IntToInt(const int32_t *in, int32_t len, int32_t *out,
+ int32_t *state)
+{
+ int32_t tmp0, tmp1, diff;
+ int32_t i;
+
+ // upper allpass filter (generates odd output samples)
+ for (i = 0; i < len; i++)
+ {
+ tmp0 = in[i];
+ diff = tmp0 - state[5];
+ // scale down and round
+ diff = (diff + (1 << 13)) >> 14;
+ tmp1 = state[4] + diff * kResampleAllpass[0][0];
+ state[4] = tmp0;
+ diff = tmp1 - state[6];
+ // scale down and truncate
+ diff = diff >> 14;
+ if (diff < 0)
+ diff += 1;
+ tmp0 = state[5] + diff * kResampleAllpass[0][1];
+ state[5] = tmp1;
+ diff = tmp0 - state[7];
+ // scale down and truncate
+ diff = diff >> 14;
+ if (diff < 0)
+ diff += 1;
+ state[7] = state[6] + diff * kResampleAllpass[0][2];
+ state[6] = tmp0;
+
+ // scale down, round and store
+ out[i << 1] = state[7];
+ }
+
+ out++;
+
+ // lower allpass filter (generates even output samples)
+ for (i = 0; i < len; i++)
+ {
+ tmp0 = in[i];
+ diff = tmp0 - state[1];
+ // scale down and round
+ diff = (diff + (1 << 13)) >> 14;
+ tmp1 = state[0] + diff * kResampleAllpass[1][0];
+ state[0] = tmp0;
+ diff = tmp1 - state[2];
+ // scale down and truncate
+ diff = diff >> 14;
+ if (diff < 0)
+ diff += 1;
+ tmp0 = state[1] + diff * kResampleAllpass[1][1];
+ state[1] = tmp1;
+ diff = tmp0 - state[3];
+ // scale down and truncate
+ diff = diff >> 14;
+ if (diff < 0)
+ diff += 1;
+ state[3] = state[2] + diff * kResampleAllpass[1][2];
+ state[2] = tmp0;
+
+ // scale down, round and store
+ out[i << 1] = state[3];
+ }
+}
+
+//
+// interpolator
+// input: int32_t (shifted 15 positions to the left, + offset 16384)
+// output: int16_t (saturated) (of length len*2)
+// state: filter state array; length = 8
+void WebRtcSpl_UpBy2IntToShort(const int32_t *in, int32_t len, int16_t *out,
+ int32_t *state)
+{
+ int32_t tmp0, tmp1, diff;
+ int32_t i;
+
+ // upper allpass filter (generates odd output samples)
+ for (i = 0; i < len; i++)
+ {
+ tmp0 = in[i];
+ diff = tmp0 - state[5];
+ // scale down and round
+ diff = (diff + (1 << 13)) >> 14;
+ tmp1 = state[4] + diff * kResampleAllpass[0][0];
+ state[4] = tmp0;
+ diff = tmp1 - state[6];
+ // scale down and round
+ diff = diff >> 14;
+ if (diff < 0)
+ diff += 1;
+ tmp0 = state[5] + diff * kResampleAllpass[0][1];
+ state[5] = tmp1;
+ diff = tmp0 - state[7];
+ // scale down and truncate
+ diff = diff >> 14;
+ if (diff < 0)
+ diff += 1;
+ state[7] = state[6] + diff * kResampleAllpass[0][2];
+ state[6] = tmp0;
+
+ // scale down, saturate and store
+ tmp1 = state[7] >> 15;
+ if (tmp1 > (int32_t)0x00007FFF)
+ tmp1 = 0x00007FFF;
+ if (tmp1 < (int32_t)0xFFFF8000)
+ tmp1 = 0xFFFF8000;
+ out[i << 1] = (int16_t)tmp1;
+ }
+
+ out++;
+
+ // lower allpass filter (generates even output samples)
+ for (i = 0; i < len; i++)
+ {
+ tmp0 = in[i];
+ diff = tmp0 - state[1];
+ // scale down and round
+ diff = (diff + (1 << 13)) >> 14;
+ tmp1 = state[0] + diff * kResampleAllpass[1][0];
+ state[0] = tmp0;
+ diff = tmp1 - state[2];
+ // scale down and truncate
+ diff = diff >> 14;
+ if (diff < 0)
+ diff += 1;
+ tmp0 = state[1] + diff * kResampleAllpass[1][1];
+ state[1] = tmp1;
+ diff = tmp0 - state[3];
+ // scale down and truncate
+ diff = diff >> 14;
+ if (diff < 0)
+ diff += 1;
+ state[3] = state[2] + diff * kResampleAllpass[1][2];
+ state[2] = tmp0;
+
+ // scale down, saturate and store
+ tmp1 = state[3] >> 15;
+ if (tmp1 > (int32_t)0x00007FFF)
+ tmp1 = 0x00007FFF;
+ if (tmp1 < (int32_t)0xFFFF8000)
+ tmp1 = 0xFFFF8000;
+ out[i << 1] = (int16_t)tmp1;
+ }
+}
+
+// lowpass filter
+// input: int16_t
+// output: int32_t (normalized, not saturated)
+// state: filter state array; length = 8
+void WebRtcSpl_LPBy2ShortToInt(const int16_t* in, int32_t len, int32_t* out,
+ int32_t* state)
+{
+ int32_t tmp0, tmp1, diff;
+ int32_t i;
+
+ len >>= 1;
+
+ // lower allpass filter: odd input -> even output samples
+ in++;
+ // initial state of polyphase delay element
+ tmp0 = state[12];
+ for (i = 0; i < len; i++)
+ {
+ diff = tmp0 - state[1];
+ // scale down and round
+ diff = (diff + (1 << 13)) >> 14;
+ tmp1 = state[0] + diff * kResampleAllpass[1][0];
+ state[0] = tmp0;
+ diff = tmp1 - state[2];
+ // scale down and truncate
+ diff = diff >> 14;
+ if (diff < 0)
+ diff += 1;
+ tmp0 = state[1] + diff * kResampleAllpass[1][1];
+ state[1] = tmp1;
+ diff = tmp0 - state[3];
+ // scale down and truncate
+ diff = diff >> 14;
+ if (diff < 0)
+ diff += 1;
+ state[3] = state[2] + diff * kResampleAllpass[1][2];
+ state[2] = tmp0;
+
+ // scale down, round and store
+ out[i << 1] = state[3] >> 1;
+ tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
+ }
+ in--;
+
+ // upper allpass filter: even input -> even output samples
+ for (i = 0; i < len; i++)
+ {
+ tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
+ diff = tmp0 - state[5];
+ // scale down and round
+ diff = (diff + (1 << 13)) >> 14;
+ tmp1 = state[4] + diff * kResampleAllpass[0][0];
+ state[4] = tmp0;
+ diff = tmp1 - state[6];
+ // scale down and round
+ diff = diff >> 14;
+ if (diff < 0)
+ diff += 1;
+ tmp0 = state[5] + diff * kResampleAllpass[0][1];
+ state[5] = tmp1;
+ diff = tmp0 - state[7];
+ // scale down and truncate
+ diff = diff >> 14;
+ if (diff < 0)
+ diff += 1;
+ state[7] = state[6] + diff * kResampleAllpass[0][2];
+ state[6] = tmp0;
+
+ // average the two allpass outputs, scale down and store
+ out[i << 1] = (out[i << 1] + (state[7] >> 1)) >> 15;
+ }
+
+ // switch to odd output samples
+ out++;
+
+ // lower allpass filter: even input -> odd output samples
+ for (i = 0; i < len; i++)
+ {
+ tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
+ diff = tmp0 - state[9];
+ // scale down and round
+ diff = (diff + (1 << 13)) >> 14;
+ tmp1 = state[8] + diff * kResampleAllpass[1][0];
+ state[8] = tmp0;
+ diff = tmp1 - state[10];
+ // scale down and truncate
+ diff = diff >> 14;
+ if (diff < 0)
+ diff += 1;
+ tmp0 = state[9] + diff * kResampleAllpass[1][1];
+ state[9] = tmp1;
+ diff = tmp0 - state[11];
+ // scale down and truncate
+ diff = diff >> 14;
+ if (diff < 0)
+ diff += 1;
+ state[11] = state[10] + diff * kResampleAllpass[1][2];
+ state[10] = tmp0;
+
+ // scale down, round and store
+ out[i << 1] = state[11] >> 1;
+ }
+
+ // upper allpass filter: odd input -> odd output samples
+ in++;
+ for (i = 0; i < len; i++)
+ {
+ tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
+ diff = tmp0 - state[13];
+ // scale down and round
+ diff = (diff + (1 << 13)) >> 14;
+ tmp1 = state[12] + diff * kResampleAllpass[0][0];
+ state[12] = tmp0;
+ diff = tmp1 - state[14];
+ // scale down and round
+ diff = diff >> 14;
+ if (diff < 0)
+ diff += 1;
+ tmp0 = state[13] + diff * kResampleAllpass[0][1];
+ state[13] = tmp1;
+ diff = tmp0 - state[15];
+ // scale down and truncate
+ diff = diff >> 14;
+ if (diff < 0)
+ diff += 1;
+ state[15] = state[14] + diff * kResampleAllpass[0][2];
+ state[14] = tmp0;
+
+ // average the two allpass outputs, scale down and store
+ out[i << 1] = (out[i << 1] + (state[15] >> 1)) >> 15;
+ }
+}
+
+// lowpass filter
+// input: int32_t (shifted 15 positions to the left, + offset 16384)
+// output: int32_t (normalized, not saturated)
+// state: filter state array; length = 8
+void WebRtcSpl_LPBy2IntToInt(const int32_t* in, int32_t len, int32_t* out,
+ int32_t* state)
+{
+ int32_t tmp0, tmp1, diff;
+ int32_t i;
+
+ len >>= 1;
+
+ // lower allpass filter: odd input -> even output samples
+ in++;
+ // initial state of polyphase delay element
+ tmp0 = state[12];
+ for (i = 0; i < len; i++)
+ {
+ diff = tmp0 - state[1];
+ // scale down and round
+ diff = (diff + (1 << 13)) >> 14;
+ tmp1 = state[0] + diff * kResampleAllpass[1][0];
+ state[0] = tmp0;
+ diff = tmp1 - state[2];
+ // scale down and truncate
+ diff = diff >> 14;
+ if (diff < 0)
+ diff += 1;
+ tmp0 = state[1] + diff * kResampleAllpass[1][1];
+ state[1] = tmp1;
+ diff = tmp0 - state[3];
+ // scale down and truncate
+ diff = diff >> 14;
+ if (diff < 0)
+ diff += 1;
+ state[3] = state[2] + diff * kResampleAllpass[1][2];
+ state[2] = tmp0;
+
+ // scale down, round and store
+ out[i << 1] = state[3] >> 1;
+ tmp0 = in[i << 1];
+ }
+ in--;
+
+ // upper allpass filter: even input -> even output samples
+ for (i = 0; i < len; i++)
+ {
+ tmp0 = in[i << 1];
+ diff = tmp0 - state[5];
+ // scale down and round
+ diff = (diff + (1 << 13)) >> 14;
+ tmp1 = state[4] + diff * kResampleAllpass[0][0];
+ state[4] = tmp0;
+ diff = tmp1 - state[6];
+ // scale down and round
+ diff = diff >> 14;
+ if (diff < 0)
+ diff += 1;
+ tmp0 = state[5] + diff * kResampleAllpass[0][1];
+ state[5] = tmp1;
+ diff = tmp0 - state[7];
+ // scale down and truncate
+ diff = diff >> 14;
+ if (diff < 0)
+ diff += 1;
+ state[7] = state[6] + diff * kResampleAllpass[0][2];
+ state[6] = tmp0;
+
+ // average the two allpass outputs, scale down and store
+ out[i << 1] = (out[i << 1] + (state[7] >> 1)) >> 15;
+ }
+
+ // switch to odd output samples
+ out++;
+
+ // lower allpass filter: even input -> odd output samples
+ for (i = 0; i < len; i++)
+ {
+ tmp0 = in[i << 1];
+ diff = tmp0 - state[9];
+ // scale down and round
+ diff = (diff + (1 << 13)) >> 14;
+ tmp1 = state[8] + diff * kResampleAllpass[1][0];
+ state[8] = tmp0;
+ diff = tmp1 - state[10];
+ // scale down and truncate
+ diff = diff >> 14;
+ if (diff < 0)
+ diff += 1;
+ tmp0 = state[9] + diff * kResampleAllpass[1][1];
+ state[9] = tmp1;
+ diff = tmp0 - state[11];
+ // scale down and truncate
+ diff = diff >> 14;
+ if (diff < 0)
+ diff += 1;
+ state[11] = state[10] + diff * kResampleAllpass[1][2];
+ state[10] = tmp0;
+
+ // scale down, round and store
+ out[i << 1] = state[11] >> 1;
+ }
+
+ // upper allpass filter: odd input -> odd output samples
+ in++;
+ for (i = 0; i < len; i++)
+ {
+ tmp0 = in[i << 1];
+ diff = tmp0 - state[13];
+ // scale down and round
+ diff = (diff + (1 << 13)) >> 14;
+ tmp1 = state[12] + diff * kResampleAllpass[0][0];
+ state[12] = tmp0;
+ diff = tmp1 - state[14];
+ // scale down and round
+ diff = diff >> 14;
+ if (diff < 0)
+ diff += 1;
+ tmp0 = state[13] + diff * kResampleAllpass[0][1];
+ state[13] = tmp1;
+ diff = tmp0 - state[15];
+ // scale down and truncate
+ diff = diff >> 14;
+ if (diff < 0)
+ diff += 1;
+ state[15] = state[14] + diff * kResampleAllpass[0][2];
+ state[14] = tmp0;
+
+ // average the two allpass outputs, scale down and store
+ out[i << 1] = (out[i << 1] + (state[15] >> 1)) >> 15;
+ }
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_internal.h b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_internal.h
new file mode 100644
index 00000000..5c9533ee
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_internal.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This header file contains some internal resampling functions.
+ *
+ */
+
+#ifndef WEBRTC_SPL_RESAMPLE_BY_2_INTERNAL_H_
+#define WEBRTC_SPL_RESAMPLE_BY_2_INTERNAL_H_
+
+#include "webrtc/typedefs.h"
+
+/*******************************************************************
+ * resample_by_2_fast.c
+ * Functions for internal use in the other resample functions
+ ******************************************************************/
+void WebRtcSpl_DownBy2IntToShort(int32_t *in, int32_t len, int16_t *out,
+ int32_t *state);
+
+void WebRtcSpl_DownBy2ShortToInt(const int16_t *in, int32_t len,
+ int32_t *out, int32_t *state);
+
+void WebRtcSpl_UpBy2ShortToInt(const int16_t *in, int32_t len,
+ int32_t *out, int32_t *state);
+
+void WebRtcSpl_UpBy2IntToInt(const int32_t *in, int32_t len, int32_t *out,
+ int32_t *state);
+
+void WebRtcSpl_UpBy2IntToShort(const int32_t *in, int32_t len,
+ int16_t *out, int32_t *state);
+
+void WebRtcSpl_LPBy2ShortToInt(const int16_t* in, int32_t len,
+ int32_t* out, int32_t* state);
+
+void WebRtcSpl_LPBy2IntToInt(const int32_t* in, int32_t len, int32_t* out,
+ int32_t* state);
+
+#endif // WEBRTC_SPL_RESAMPLE_BY_2_INTERNAL_H_
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_mips.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_mips.c
new file mode 100644
index 00000000..ec5fc8b3
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_mips.c
@@ -0,0 +1,290 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the resampling by two functions.
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#if defined(MIPS32_LE)
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+// allpass filter coefficients.
+static const uint16_t kResampleAllpass1[3] = {3284, 24441, 49528};
+static const uint16_t kResampleAllpass2[3] = {12199, 37471, 60255};
+
+// Multiply a 32-bit value with a 16-bit value and accumulate to another input:
+#define MUL_ACCUM_1(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c)
+#define MUL_ACCUM_2(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c)
+
+// decimator
+void WebRtcSpl_DownsampleBy2(const int16_t* in,
+ size_t len,
+ int16_t* out,
+ int32_t* filtState) {
+ int32_t out32;
+ size_t i, len1;
+
+ register int32_t state0 = filtState[0];
+ register int32_t state1 = filtState[1];
+ register int32_t state2 = filtState[2];
+ register int32_t state3 = filtState[3];
+ register int32_t state4 = filtState[4];
+ register int32_t state5 = filtState[5];
+ register int32_t state6 = filtState[6];
+ register int32_t state7 = filtState[7];
+
+#if defined(MIPS_DSP_R2_LE)
+ int32_t k1Res0, k1Res1, k1Res2, k2Res0, k2Res1, k2Res2;
+
+ k1Res0= 3284;
+ k1Res1= 24441;
+ k1Res2= 49528;
+ k2Res0= 12199;
+ k2Res1= 37471;
+ k2Res2= 60255;
+ len1 = (len >> 1);
+
+ const int32_t* inw = (int32_t*)in;
+ int32_t tmp11, tmp12, tmp21, tmp22;
+ int32_t in322, in321;
+ int32_t diff1, diff2;
+ for (i = len1; i > 0; i--) {
+ __asm__ volatile (
+ "lh %[in321], 0(%[inw]) \n\t"
+ "lh %[in322], 2(%[inw]) \n\t"
+
+ "sll %[in321], %[in321], 10 \n\t"
+ "sll %[in322], %[in322], 10 \n\t"
+
+ "addiu %[inw], %[inw], 4 \n\t"
+
+ "subu %[diff1], %[in321], %[state1] \n\t"
+ "subu %[diff2], %[in322], %[state5] \n\t"
+
+ : [in322] "=&r" (in322), [in321] "=&r" (in321),
+ [diff1] "=&r" (diff1), [diff2] "=r" (diff2), [inw] "+r" (inw)
+ : [state1] "r" (state1), [state5] "r" (state5)
+ : "memory"
+ );
+
+ __asm__ volatile (
+ "mult $ac0, %[diff1], %[k2Res0] \n\t"
+ "mult $ac1, %[diff2], %[k1Res0] \n\t"
+
+ "extr.w %[tmp11], $ac0, 16 \n\t"
+ "extr.w %[tmp12], $ac1, 16 \n\t"
+
+ "addu %[tmp11], %[state0], %[tmp11] \n\t"
+ "addu %[tmp12], %[state4], %[tmp12] \n\t"
+
+ "addiu %[state0], %[in321], 0 \n\t"
+ "addiu %[state4], %[in322], 0 \n\t"
+
+ "subu %[diff1], %[tmp11], %[state2] \n\t"
+ "subu %[diff2], %[tmp12], %[state6] \n\t"
+
+ "mult $ac0, %[diff1], %[k2Res1] \n\t"
+ "mult $ac1, %[diff2], %[k1Res1] \n\t"
+
+ "extr.w %[tmp21], $ac0, 16 \n\t"
+ "extr.w %[tmp22], $ac1, 16 \n\t"
+
+ "addu %[tmp21], %[state1], %[tmp21] \n\t"
+ "addu %[tmp22], %[state5], %[tmp22] \n\t"
+
+ "addiu %[state1], %[tmp11], 0 \n\t"
+ "addiu %[state5], %[tmp12], 0 \n\t"
+ : [tmp22] "=r" (tmp22), [tmp21] "=&r" (tmp21),
+ [tmp11] "=&r" (tmp11), [state0] "+r" (state0),
+ [state1] "+r" (state1),
+ [state2] "+r" (state2),
+ [state4] "+r" (state4), [tmp12] "=&r" (tmp12),
+ [state6] "+r" (state6), [state5] "+r" (state5)
+ : [k1Res1] "r" (k1Res1), [k2Res1] "r" (k2Res1), [k2Res0] "r" (k2Res0),
+ [diff2] "r" (diff2), [diff1] "r" (diff1), [in322] "r" (in322),
+ [in321] "r" (in321), [k1Res0] "r" (k1Res0)
+ : "hi", "lo", "$ac1hi", "$ac1lo"
+ );
+
+ // upper allpass filter
+ __asm__ volatile (
+ "subu %[diff1], %[tmp21], %[state3] \n\t"
+ "subu %[diff2], %[tmp22], %[state7] \n\t"
+
+ "mult $ac0, %[diff1], %[k2Res2] \n\t"
+ "mult $ac1, %[diff2], %[k1Res2] \n\t"
+ "extr.w %[state3], $ac0, 16 \n\t"
+ "extr.w %[state7], $ac1, 16 \n\t"
+ "addu %[state3], %[state2], %[state3] \n\t"
+ "addu %[state7], %[state6], %[state7] \n\t"
+
+ "addiu %[state2], %[tmp21], 0 \n\t"
+ "addiu %[state6], %[tmp22], 0 \n\t"
+
+ // add two allpass outputs, divide by two and round
+ "addu %[out32], %[state3], %[state7] \n\t"
+ "addiu %[out32], %[out32], 1024 \n\t"
+ "sra %[out32], %[out32], 11 \n\t"
+ : [state3] "+r" (state3), [state6] "+r" (state6),
+ [state2] "+r" (state2), [diff2] "=&r" (diff2),
+ [out32] "=r" (out32), [diff1] "=&r" (diff1), [state7] "+r" (state7)
+ : [tmp22] "r" (tmp22), [tmp21] "r" (tmp21),
+ [k1Res2] "r" (k1Res2), [k2Res2] "r" (k2Res2)
+ : "hi", "lo", "$ac1hi", "$ac1lo"
+ );
+
+ // limit amplitude to prevent wrap-around, and write to output array
+ *out++ = WebRtcSpl_SatW32ToW16(out32);
+ }
+#else // #if defined(MIPS_DSP_R2_LE)
+ int32_t tmp1, tmp2, diff;
+ int32_t in32;
+ len1 = (len >> 1)/4;
+ for (i = len1; i > 0; i--) {
+ // lower allpass filter
+ in32 = (int32_t)(*in++) << 10;
+ diff = in32 - state1;
+ tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0);
+ state0 = in32;
+ diff = tmp1 - state2;
+ tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1);
+ state1 = tmp1;
+ diff = tmp2 - state3;
+ state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2);
+ state2 = tmp2;
+
+ // upper allpass filter
+ in32 = (int32_t)(*in++) << 10;
+ diff = in32 - state5;
+ tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4);
+ state4 = in32;
+ diff = tmp1 - state6;
+ tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5);
+ state5 = tmp1;
+ diff = tmp2 - state7;
+ state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6);
+ state6 = tmp2;
+
+ // add two allpass outputs, divide by two and round
+ out32 = (state3 + state7 + 1024) >> 11;
+
+ // limit amplitude to prevent wrap-around, and write to output array
+ *out++ = WebRtcSpl_SatW32ToW16(out32);
+ // lower allpass filter
+ in32 = (int32_t)(*in++) << 10;
+ diff = in32 - state1;
+ tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0);
+ state0 = in32;
+ diff = tmp1 - state2;
+ tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1);
+ state1 = tmp1;
+ diff = tmp2 - state3;
+ state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2);
+ state2 = tmp2;
+
+ // upper allpass filter
+ in32 = (int32_t)(*in++) << 10;
+ diff = in32 - state5;
+ tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4);
+ state4 = in32;
+ diff = tmp1 - state6;
+ tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5);
+ state5 = tmp1;
+ diff = tmp2 - state7;
+ state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6);
+ state6 = tmp2;
+
+ // add two allpass outputs, divide by two and round
+ out32 = (state3 + state7 + 1024) >> 11;
+
+ // limit amplitude to prevent wrap-around, and write to output array
+ *out++ = WebRtcSpl_SatW32ToW16(out32);
+ // lower allpass filter
+ in32 = (int32_t)(*in++) << 10;
+ diff = in32 - state1;
+ tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0);
+ state0 = in32;
+ diff = tmp1 - state2;
+ tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1);
+ state1 = tmp1;
+ diff = tmp2 - state3;
+ state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2);
+ state2 = tmp2;
+
+ // upper allpass filter
+ in32 = (int32_t)(*in++) << 10;
+ diff = in32 - state5;
+ tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4);
+ state4 = in32;
+ diff = tmp1 - state6;
+ tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5);
+ state5 = tmp1;
+ diff = tmp2 - state7;
+ state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6);
+ state6 = tmp2;
+
+ // add two allpass outputs, divide by two and round
+ out32 = (state3 + state7 + 1024) >> 11;
+
+ // limit amplitude to prevent wrap-around, and write to output array
+ *out++ = WebRtcSpl_SatW32ToW16(out32);
+ // lower allpass filter
+ in32 = (int32_t)(*in++) << 10;
+ diff = in32 - state1;
+ tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0);
+ state0 = in32;
+ diff = tmp1 - state2;
+ tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1);
+ state1 = tmp1;
+ diff = tmp2 - state3;
+ state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2);
+ state2 = tmp2;
+
+ // upper allpass filter
+ in32 = (int32_t)(*in++) << 10;
+ diff = in32 - state5;
+ tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4);
+ state4 = in32;
+ diff = tmp1 - state6;
+ tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5);
+ state5 = tmp1;
+ diff = tmp2 - state7;
+ state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6);
+ state6 = tmp2;
+
+ // add two allpass outputs, divide by two and round
+ out32 = (state3 + state7 + 1024) >> 11;
+
+ // limit amplitude to prevent wrap-around, and write to output array
+ *out++ = WebRtcSpl_SatW32ToW16(out32);
+ }
+#endif // #if defined(MIPS_DSP_R2_LE)
+ __asm__ volatile (
+ "sw %[state0], 0(%[filtState]) \n\t"
+ "sw %[state1], 4(%[filtState]) \n\t"
+ "sw %[state2], 8(%[filtState]) \n\t"
+ "sw %[state3], 12(%[filtState]) \n\t"
+ "sw %[state4], 16(%[filtState]) \n\t"
+ "sw %[state5], 20(%[filtState]) \n\t"
+ "sw %[state6], 24(%[filtState]) \n\t"
+ "sw %[state7], 28(%[filtState]) \n\t"
+ :
+ : [state0] "r" (state0), [state1] "r" (state1), [state2] "r" (state2),
+ [state3] "r" (state3), [state4] "r" (state4), [state5] "r" (state5),
+ [state6] "r" (state6), [state7] "r" (state7), [filtState] "r" (filtState)
+ : "memory"
+ );
+}
+
+#endif // #if defined(MIPS32_LE)
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_fractional.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_fractional.c
new file mode 100644
index 00000000..6409fbac
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_fractional.c
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the resampling functions between 48, 44, 32 and 24 kHz.
+ * The description headers can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+// interpolation coefficients
+static const int16_t kCoefficients48To32[2][8] = {
+ {778, -2050, 1087, 23285, 12903, -3783, 441, 222},
+ {222, 441, -3783, 12903, 23285, 1087, -2050, 778}
+};
+
+static const int16_t kCoefficients32To24[3][8] = {
+ {767, -2362, 2434, 24406, 10620, -3838, 721, 90},
+ {386, -381, -2646, 19062, 19062, -2646, -381, 386},
+ {90, 721, -3838, 10620, 24406, 2434, -2362, 767}
+};
+
+static const int16_t kCoefficients44To32[4][9] = {
+ {117, -669, 2245, -6183, 26267, 13529, -3245, 845, -138},
+ {-101, 612, -2283, 8532, 29790, -5138, 1789, -524, 91},
+ {50, -292, 1016, -3064, 32010, 3933, -1147, 315, -53},
+ {-156, 974, -3863, 18603, 21691, -6246, 2353, -712, 126}
+};
+
+// Resampling ratio: 2/3
+// input: int32_t (normalized, not saturated) :: size 3 * K
+// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 2 * K
+// K: number of blocks
+
+void WebRtcSpl_Resample48khzTo32khz(const int32_t *In, int32_t *Out, size_t K)
+{
+ /////////////////////////////////////////////////////////////
+ // Filter operation:
+ //
+ // Perform resampling (3 input samples -> 2 output samples);
+ // process in sub blocks of size 3 samples.
+ int32_t tmp;
+ size_t m;
+
+ for (m = 0; m < K; m++)
+ {
+ tmp = 1 << 14;
+ tmp += kCoefficients48To32[0][0] * In[0];
+ tmp += kCoefficients48To32[0][1] * In[1];
+ tmp += kCoefficients48To32[0][2] * In[2];
+ tmp += kCoefficients48To32[0][3] * In[3];
+ tmp += kCoefficients48To32[0][4] * In[4];
+ tmp += kCoefficients48To32[0][5] * In[5];
+ tmp += kCoefficients48To32[0][6] * In[6];
+ tmp += kCoefficients48To32[0][7] * In[7];
+ Out[0] = tmp;
+
+ tmp = 1 << 14;
+ tmp += kCoefficients48To32[1][0] * In[1];
+ tmp += kCoefficients48To32[1][1] * In[2];
+ tmp += kCoefficients48To32[1][2] * In[3];
+ tmp += kCoefficients48To32[1][3] * In[4];
+ tmp += kCoefficients48To32[1][4] * In[5];
+ tmp += kCoefficients48To32[1][5] * In[6];
+ tmp += kCoefficients48To32[1][6] * In[7];
+ tmp += kCoefficients48To32[1][7] * In[8];
+ Out[1] = tmp;
+
+ // update pointers
+ In += 3;
+ Out += 2;
+ }
+}
+
+// Resampling ratio: 3/4
+// input: int32_t (normalized, not saturated) :: size 4 * K
+// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 3 * K
+// K: number of blocks
+
+void WebRtcSpl_Resample32khzTo24khz(const int32_t *In, int32_t *Out, size_t K)
+{
+ /////////////////////////////////////////////////////////////
+ // Filter operation:
+ //
+ // Perform resampling (4 input samples -> 3 output samples);
+ // process in sub blocks of size 4 samples.
+ size_t m;
+ int32_t tmp;
+
+ for (m = 0; m < K; m++)
+ {
+ tmp = 1 << 14;
+ tmp += kCoefficients32To24[0][0] * In[0];
+ tmp += kCoefficients32To24[0][1] * In[1];
+ tmp += kCoefficients32To24[0][2] * In[2];
+ tmp += kCoefficients32To24[0][3] * In[3];
+ tmp += kCoefficients32To24[0][4] * In[4];
+ tmp += kCoefficients32To24[0][5] * In[5];
+ tmp += kCoefficients32To24[0][6] * In[6];
+ tmp += kCoefficients32To24[0][7] * In[7];
+ Out[0] = tmp;
+
+ tmp = 1 << 14;
+ tmp += kCoefficients32To24[1][0] * In[1];
+ tmp += kCoefficients32To24[1][1] * In[2];
+ tmp += kCoefficients32To24[1][2] * In[3];
+ tmp += kCoefficients32To24[1][3] * In[4];
+ tmp += kCoefficients32To24[1][4] * In[5];
+ tmp += kCoefficients32To24[1][5] * In[6];
+ tmp += kCoefficients32To24[1][6] * In[7];
+ tmp += kCoefficients32To24[1][7] * In[8];
+ Out[1] = tmp;
+
+ tmp = 1 << 14;
+ tmp += kCoefficients32To24[2][0] * In[2];
+ tmp += kCoefficients32To24[2][1] * In[3];
+ tmp += kCoefficients32To24[2][2] * In[4];
+ tmp += kCoefficients32To24[2][3] * In[5];
+ tmp += kCoefficients32To24[2][4] * In[6];
+ tmp += kCoefficients32To24[2][5] * In[7];
+ tmp += kCoefficients32To24[2][6] * In[8];
+ tmp += kCoefficients32To24[2][7] * In[9];
+ Out[2] = tmp;
+
+ // update pointers
+ In += 4;
+ Out += 3;
+ }
+}
+
+//
+// fractional resampling filters
+// Fout = 11/16 * Fin
+// Fout = 8/11 * Fin
+//
+
+// compute two inner-products and store them to output array
+static void WebRtcSpl_ResampDotProduct(const int32_t *in1, const int32_t *in2,
+ const int16_t *coef_ptr, int32_t *out1,
+ int32_t *out2)
+{
+ int32_t tmp1 = 16384;
+ int32_t tmp2 = 16384;
+ int16_t coef;
+
+ coef = coef_ptr[0];
+ tmp1 += coef * in1[0];
+ tmp2 += coef * in2[-0];
+
+ coef = coef_ptr[1];
+ tmp1 += coef * in1[1];
+ tmp2 += coef * in2[-1];
+
+ coef = coef_ptr[2];
+ tmp1 += coef * in1[2];
+ tmp2 += coef * in2[-2];
+
+ coef = coef_ptr[3];
+ tmp1 += coef * in1[3];
+ tmp2 += coef * in2[-3];
+
+ coef = coef_ptr[4];
+ tmp1 += coef * in1[4];
+ tmp2 += coef * in2[-4];
+
+ coef = coef_ptr[5];
+ tmp1 += coef * in1[5];
+ tmp2 += coef * in2[-5];
+
+ coef = coef_ptr[6];
+ tmp1 += coef * in1[6];
+ tmp2 += coef * in2[-6];
+
+ coef = coef_ptr[7];
+ tmp1 += coef * in1[7];
+ tmp2 += coef * in2[-7];
+
+ coef = coef_ptr[8];
+ *out1 = tmp1 + coef * in1[8];
+ *out2 = tmp2 + coef * in2[-8];
+}
+
+// Resampling ratio: 8/11
+// input: int32_t (normalized, not saturated) :: size 11 * K
+// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 8 * K
+// K: number of blocks
+
+void WebRtcSpl_Resample44khzTo32khz(const int32_t *In, int32_t *Out, size_t K)
+{
+ /////////////////////////////////////////////////////////////
+ // Filter operation:
+ //
+ // Perform resampling (11 input samples -> 8 output samples);
+ // process in sub blocks of size 11 samples.
+ int32_t tmp;
+ size_t m;
+
+ for (m = 0; m < K; m++)
+ {
+ tmp = 1 << 14;
+
+ // first output sample
+ Out[0] = ((int32_t)In[3] << 15) + tmp;
+
+ // sum and accumulate filter coefficients and input samples
+ tmp += kCoefficients44To32[3][0] * In[5];
+ tmp += kCoefficients44To32[3][1] * In[6];
+ tmp += kCoefficients44To32[3][2] * In[7];
+ tmp += kCoefficients44To32[3][3] * In[8];
+ tmp += kCoefficients44To32[3][4] * In[9];
+ tmp += kCoefficients44To32[3][5] * In[10];
+ tmp += kCoefficients44To32[3][6] * In[11];
+ tmp += kCoefficients44To32[3][7] * In[12];
+ tmp += kCoefficients44To32[3][8] * In[13];
+ Out[4] = tmp;
+
+ // sum and accumulate filter coefficients and input samples
+ WebRtcSpl_ResampDotProduct(&In[0], &In[17], kCoefficients44To32[0], &Out[1], &Out[7]);
+
+ // sum and accumulate filter coefficients and input samples
+ WebRtcSpl_ResampDotProduct(&In[2], &In[15], kCoefficients44To32[1], &Out[2], &Out[6]);
+
+ // sum and accumulate filter coefficients and input samples
+ WebRtcSpl_ResampDotProduct(&In[3], &In[14], kCoefficients44To32[2], &Out[3], &Out[5]);
+
+ // update pointers
+ In += 11;
+ Out += 8;
+ }
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/signal_processing_unittest.cc b/third_party/webrtc/src/webrtc/common_audio/signal_processing/signal_processing_unittest.cc
new file mode 100644
index 00000000..108f459c
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/signal_processing_unittest.cc
@@ -0,0 +1,579 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "testing/gtest/include/gtest/gtest.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+static const size_t kVector16Size = 9;
+static const int16_t vector16[kVector16Size] = {1, -15511, 4323, 1963,
+ WEBRTC_SPL_WORD16_MAX, 0, WEBRTC_SPL_WORD16_MIN + 5, -3333, 345};
+
+class SplTest : public testing::Test {
+ protected:
+ SplTest() {
+ WebRtcSpl_Init();
+ }
+ virtual ~SplTest() {
+ }
+};
+
+TEST_F(SplTest, MacroTest) {
+ // Macros with inputs.
+ int A = 10;
+ int B = 21;
+ int a = -3;
+ int b = WEBRTC_SPL_WORD32_MAX;
+
+ EXPECT_EQ(10, WEBRTC_SPL_MIN(A, B));
+ EXPECT_EQ(21, WEBRTC_SPL_MAX(A, B));
+
+ EXPECT_EQ(3, WEBRTC_SPL_ABS_W16(a));
+ EXPECT_EQ(3, WEBRTC_SPL_ABS_W32(a));
+
+ EXPECT_EQ(-63, WEBRTC_SPL_MUL(a, B));
+ EXPECT_EQ(-2147483645, WEBRTC_SPL_MUL(a, b));
+ EXPECT_EQ(2147483651u, WEBRTC_SPL_UMUL(a, b));
+ b = WEBRTC_SPL_WORD16_MAX >> 1;
+ EXPECT_EQ(4294918147u, WEBRTC_SPL_UMUL_32_16(a, b));
+ EXPECT_EQ(-49149, WEBRTC_SPL_MUL_16_U16(a, b));
+
+ a = b;
+ b = -3;
+
+ EXPECT_EQ(-1, WEBRTC_SPL_MUL_16_32_RSFT16(a, b));
+ EXPECT_EQ(-1, WEBRTC_SPL_MUL_16_32_RSFT15(a, b));
+ EXPECT_EQ(-3, WEBRTC_SPL_MUL_16_32_RSFT14(a, b));
+ EXPECT_EQ(-24, WEBRTC_SPL_MUL_16_32_RSFT11(a, b));
+
+ EXPECT_EQ(-12288, WEBRTC_SPL_MUL_16_16_RSFT(a, b, 2));
+ EXPECT_EQ(-12287, WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(a, b, 2));
+
+ EXPECT_EQ(21, WEBRTC_SPL_SAT(a, A, B));
+ EXPECT_EQ(21, WEBRTC_SPL_SAT(a, B, A));
+
+ // Shifting with negative numbers allowed
+ int shift_amount = 1; // Workaround compiler warning using variable here.
+ // Positive means left shift
+ EXPECT_EQ(32766, WEBRTC_SPL_SHIFT_W32(a, shift_amount));
+
+ // Shifting with negative numbers not allowed
+ // We cannot do casting here due to signed/unsigned problem
+ EXPECT_EQ(32766, WEBRTC_SPL_LSHIFT_W32(a, 1));
+
+ EXPECT_EQ(8191u, WEBRTC_SPL_RSHIFT_U32(a, 1));
+
+ EXPECT_EQ(1470, WEBRTC_SPL_RAND(A));
+
+ EXPECT_EQ(-49149, WEBRTC_SPL_MUL_16_16(a, b));
+ EXPECT_EQ(1073676289, WEBRTC_SPL_MUL_16_16(WEBRTC_SPL_WORD16_MAX,
+ WEBRTC_SPL_WORD16_MAX));
+ EXPECT_EQ(1073709055, WEBRTC_SPL_MUL_16_32_RSFT16(WEBRTC_SPL_WORD16_MAX,
+ WEBRTC_SPL_WORD32_MAX));
+ EXPECT_EQ(1073741824, WEBRTC_SPL_MUL_16_32_RSFT16(WEBRTC_SPL_WORD16_MIN,
+ WEBRTC_SPL_WORD32_MIN));
+#ifdef WEBRTC_ARCH_ARM_V7
+ EXPECT_EQ(-1073741824,
+ WEBRTC_SPL_MUL_16_32_RSFT16(WEBRTC_SPL_WORD16_MIN,
+ WEBRTC_SPL_WORD32_MAX));
+#else
+ EXPECT_EQ(-1073741823,
+ WEBRTC_SPL_MUL_16_32_RSFT16(WEBRTC_SPL_WORD16_MIN,
+ WEBRTC_SPL_WORD32_MAX));
+#endif
+}
+
+TEST_F(SplTest, InlineTest) {
+ int16_t a16 = 121;
+ int16_t b16 = -17;
+ int32_t a32 = 111121;
+ int32_t b32 = -1711;
+
+ EXPECT_EQ(17, WebRtcSpl_GetSizeInBits(a32));
+
+ EXPECT_EQ(0, WebRtcSpl_NormW32(0));
+ EXPECT_EQ(31, WebRtcSpl_NormW32(-1));
+ EXPECT_EQ(0, WebRtcSpl_NormW32(WEBRTC_SPL_WORD32_MIN));
+ EXPECT_EQ(14, WebRtcSpl_NormW32(a32));
+
+ EXPECT_EQ(0, WebRtcSpl_NormW16(0));
+ EXPECT_EQ(15, WebRtcSpl_NormW16(-1));
+ EXPECT_EQ(0, WebRtcSpl_NormW16(WEBRTC_SPL_WORD16_MIN));
+ EXPECT_EQ(4, WebRtcSpl_NormW16(b32));
+ for (int ii = 0; ii < 15; ++ii) {
+ int16_t value = 1 << ii;
+ EXPECT_EQ(14 - ii, WebRtcSpl_NormW16(value));
+ EXPECT_EQ(15 - ii, WebRtcSpl_NormW16(-value));
+ }
+
+ EXPECT_EQ(0, WebRtcSpl_NormU32(0u));
+ EXPECT_EQ(0, WebRtcSpl_NormU32(0xffffffff));
+ EXPECT_EQ(15, WebRtcSpl_NormU32(static_cast<uint32_t>(a32)));
+
+ EXPECT_EQ(104, WebRtcSpl_AddSatW16(a16, b16));
+ EXPECT_EQ(138, WebRtcSpl_SubSatW16(a16, b16));
+
+ EXPECT_EQ(109410, WebRtcSpl_AddSatW32(a32, b32));
+ EXPECT_EQ(112832, WebRtcSpl_SubSatW32(a32, b32));
+
+ a32 = 0x80000000;
+ b32 = 0x80000000;
+ // Cast to signed int to avoid compiler complaint on gtest.h.
+ EXPECT_EQ(static_cast<int>(0x80000000), WebRtcSpl_AddSatW32(a32, b32));
+ a32 = 0x7fffffff;
+ b32 = 0x7fffffff;
+ EXPECT_EQ(0x7fffffff, WebRtcSpl_AddSatW32(a32, b32));
+ a32 = 0;
+ b32 = 0x80000000;
+ EXPECT_EQ(0x7fffffff, WebRtcSpl_SubSatW32(a32, b32));
+ a32 = 0x7fffffff;
+ b32 = 0x80000000;
+ EXPECT_EQ(0x7fffffff, WebRtcSpl_SubSatW32(a32, b32));
+ a32 = 0x80000000;
+ b32 = 0x7fffffff;
+ EXPECT_EQ(static_cast<int>(0x80000000), WebRtcSpl_SubSatW32(a32, b32));
+}
+
+TEST_F(SplTest, MathOperationsTest) {
+ int A = 1134567892;
+ int32_t num = 117;
+ int32_t den = -5;
+ uint16_t denU = 5;
+ EXPECT_EQ(33700, WebRtcSpl_Sqrt(A));
+ EXPECT_EQ(33683, WebRtcSpl_SqrtFloor(A));
+
+
+ EXPECT_EQ(-91772805, WebRtcSpl_DivResultInQ31(den, num));
+ EXPECT_EQ(-23, WebRtcSpl_DivW32W16ResW16(num, (int16_t)den));
+ EXPECT_EQ(-23, WebRtcSpl_DivW32W16(num, (int16_t)den));
+ EXPECT_EQ(23u, WebRtcSpl_DivU32U16(num, denU));
+ EXPECT_EQ(0, WebRtcSpl_DivW32HiLow(128, 0, 256));
+}
+
+TEST_F(SplTest, BasicArrayOperationsTest) {
+ const size_t kVectorSize = 4;
+ int B[] = {4, 12, 133, 1100};
+ int16_t b16[kVectorSize];
+ int32_t b32[kVectorSize];
+
+ int16_t bTmp16[kVectorSize];
+ int32_t bTmp32[kVectorSize];
+
+ WebRtcSpl_MemSetW16(b16, 3, kVectorSize);
+ for (size_t kk = 0; kk < kVectorSize; ++kk) {
+ EXPECT_EQ(3, b16[kk]);
+ }
+ WebRtcSpl_ZerosArrayW16(b16, kVectorSize);
+ for (size_t kk = 0; kk < kVectorSize; ++kk) {
+ EXPECT_EQ(0, b16[kk]);
+ }
+ WebRtcSpl_MemSetW32(b32, 3, kVectorSize);
+ for (size_t kk = 0; kk < kVectorSize; ++kk) {
+ EXPECT_EQ(3, b32[kk]);
+ }
+ WebRtcSpl_ZerosArrayW32(b32, kVectorSize);
+ for (size_t kk = 0; kk < kVectorSize; ++kk) {
+ EXPECT_EQ(0, b32[kk]);
+ }
+ for (size_t kk = 0; kk < kVectorSize; ++kk) {
+ bTmp16[kk] = (int16_t)kk;
+ bTmp32[kk] = (int32_t)kk;
+ }
+ WEBRTC_SPL_MEMCPY_W16(b16, bTmp16, kVectorSize);
+ for (size_t kk = 0; kk < kVectorSize; ++kk) {
+ EXPECT_EQ(b16[kk], bTmp16[kk]);
+ }
+// WEBRTC_SPL_MEMCPY_W32(b32, bTmp32, kVectorSize);
+// for (int kk = 0; kk < kVectorSize; ++kk) {
+// EXPECT_EQ(b32[kk], bTmp32[kk]);
+// }
+ WebRtcSpl_CopyFromEndW16(b16, kVectorSize, 2, bTmp16);
+ for (size_t kk = 0; kk < 2; ++kk) {
+ EXPECT_EQ(static_cast<int16_t>(kk+2), bTmp16[kk]);
+ }
+
+ for (size_t kk = 0; kk < kVectorSize; ++kk) {
+ b32[kk] = B[kk];
+ b16[kk] = (int16_t)B[kk];
+ }
+ WebRtcSpl_VectorBitShiftW32ToW16(bTmp16, kVectorSize, b32, 1);
+ for (size_t kk = 0; kk < kVectorSize; ++kk) {
+ EXPECT_EQ((B[kk]>>1), bTmp16[kk]);
+ }
+ WebRtcSpl_VectorBitShiftW16(bTmp16, kVectorSize, b16, 1);
+ for (size_t kk = 0; kk < kVectorSize; ++kk) {
+ EXPECT_EQ((B[kk]>>1), bTmp16[kk]);
+ }
+ WebRtcSpl_VectorBitShiftW32(bTmp32, kVectorSize, b32, 1);
+ for (size_t kk = 0; kk < kVectorSize; ++kk) {
+ EXPECT_EQ((B[kk]>>1), bTmp32[kk]);
+ }
+
+ WebRtcSpl_MemCpyReversedOrder(&bTmp16[3], b16, kVectorSize);
+ for (size_t kk = 0; kk < kVectorSize; ++kk) {
+ EXPECT_EQ(b16[3-kk], bTmp16[kk]);
+ }
+}
+
+TEST_F(SplTest, MinMaxOperationsTest) {
+ const size_t kVectorSize = 17;
+
+ // Vectors to test the cases where minimum values have to be caught
+ // outside of the unrolled loops in ARM-Neon.
+ int16_t vector16[kVectorSize] = {-1, 7485, 0, 3333,
+ -18283, 0, 12334, -29871, 988, -3333,
+ 345, -456, 222, 999, 888, 8774, WEBRTC_SPL_WORD16_MIN};
+ int32_t vector32[kVectorSize] = {-1, 0, 283211, 3333,
+ 8712345, 0, -3333, 89345, -374585456, 222, 999, 122345334,
+ -12389756, -987329871, 888, -2, WEBRTC_SPL_WORD32_MIN};
+
+ EXPECT_EQ(WEBRTC_SPL_WORD16_MIN,
+ WebRtcSpl_MinValueW16(vector16, kVectorSize));
+ EXPECT_EQ(WEBRTC_SPL_WORD32_MIN,
+ WebRtcSpl_MinValueW32(vector32, kVectorSize));
+ EXPECT_EQ(kVectorSize - 1, WebRtcSpl_MinIndexW16(vector16, kVectorSize));
+ EXPECT_EQ(kVectorSize - 1, WebRtcSpl_MinIndexW32(vector32, kVectorSize));
+
+ // Test the cases where maximum values have to be caught
+ // outside of the unrolled loops in ARM-Neon.
+ vector16[kVectorSize - 1] = WEBRTC_SPL_WORD16_MAX;
+ vector32[kVectorSize - 1] = WEBRTC_SPL_WORD32_MAX;
+
+ EXPECT_EQ(WEBRTC_SPL_WORD16_MAX,
+ WebRtcSpl_MaxAbsValueW16(vector16, kVectorSize));
+ EXPECT_EQ(WEBRTC_SPL_WORD16_MAX,
+ WebRtcSpl_MaxValueW16(vector16, kVectorSize));
+ EXPECT_EQ(WEBRTC_SPL_WORD32_MAX,
+ WebRtcSpl_MaxAbsValueW32(vector32, kVectorSize));
+ EXPECT_EQ(WEBRTC_SPL_WORD32_MAX,
+ WebRtcSpl_MaxValueW32(vector32, kVectorSize));
+ EXPECT_EQ(kVectorSize - 1, WebRtcSpl_MaxAbsIndexW16(vector16, kVectorSize));
+ EXPECT_EQ(kVectorSize - 1, WebRtcSpl_MaxIndexW16(vector16, kVectorSize));
+ EXPECT_EQ(kVectorSize - 1, WebRtcSpl_MaxIndexW32(vector32, kVectorSize));
+
+ // Test the cases where multiple maximum and minimum values are present.
+ vector16[1] = WEBRTC_SPL_WORD16_MAX;
+ vector16[6] = WEBRTC_SPL_WORD16_MIN;
+ vector16[11] = WEBRTC_SPL_WORD16_MIN;
+ vector32[1] = WEBRTC_SPL_WORD32_MAX;
+ vector32[6] = WEBRTC_SPL_WORD32_MIN;
+ vector32[11] = WEBRTC_SPL_WORD32_MIN;
+
+ EXPECT_EQ(WEBRTC_SPL_WORD16_MAX,
+ WebRtcSpl_MaxAbsValueW16(vector16, kVectorSize));
+ EXPECT_EQ(WEBRTC_SPL_WORD16_MAX,
+ WebRtcSpl_MaxValueW16(vector16, kVectorSize));
+ EXPECT_EQ(WEBRTC_SPL_WORD16_MIN,
+ WebRtcSpl_MinValueW16(vector16, kVectorSize));
+ EXPECT_EQ(WEBRTC_SPL_WORD32_MAX,
+ WebRtcSpl_MaxAbsValueW32(vector32, kVectorSize));
+ EXPECT_EQ(WEBRTC_SPL_WORD32_MAX,
+ WebRtcSpl_MaxValueW32(vector32, kVectorSize));
+ EXPECT_EQ(WEBRTC_SPL_WORD32_MIN,
+ WebRtcSpl_MinValueW32(vector32, kVectorSize));
+ EXPECT_EQ(6u, WebRtcSpl_MaxAbsIndexW16(vector16, kVectorSize));
+ EXPECT_EQ(1u, WebRtcSpl_MaxIndexW16(vector16, kVectorSize));
+ EXPECT_EQ(1u, WebRtcSpl_MaxIndexW32(vector32, kVectorSize));
+ EXPECT_EQ(6u, WebRtcSpl_MinIndexW16(vector16, kVectorSize));
+ EXPECT_EQ(6u, WebRtcSpl_MinIndexW32(vector32, kVectorSize));
+}
+
+TEST_F(SplTest, VectorOperationsTest) {
+ const size_t kVectorSize = 4;
+ int B[] = {4, 12, 133, 1100};
+ int16_t a16[kVectorSize];
+ int16_t b16[kVectorSize];
+ int16_t bTmp16[kVectorSize];
+
+ for (size_t kk = 0; kk < kVectorSize; ++kk) {
+ a16[kk] = B[kk];
+ b16[kk] = B[kk];
+ }
+
+ WebRtcSpl_AffineTransformVector(bTmp16, b16, 3, 7, 2, kVectorSize);
+ for (size_t kk = 0; kk < kVectorSize; ++kk) {
+ EXPECT_EQ((B[kk]*3+7)>>2, bTmp16[kk]);
+ }
+ WebRtcSpl_ScaleAndAddVectorsWithRound(b16, 3, b16, 2, 2, bTmp16, kVectorSize);
+ for (size_t kk = 0; kk < kVectorSize; ++kk) {
+ EXPECT_EQ((B[kk]*3+B[kk]*2+2)>>2, bTmp16[kk]);
+ }
+
+ WebRtcSpl_AddAffineVectorToVector(bTmp16, b16, 3, 7, 2, kVectorSize);
+ for (size_t kk = 0; kk < kVectorSize; ++kk) {
+ EXPECT_EQ(((B[kk]*3+B[kk]*2+2)>>2)+((b16[kk]*3+7)>>2), bTmp16[kk]);
+ }
+
+ WebRtcSpl_ScaleVector(b16, bTmp16, 13, kVectorSize, 2);
+ for (size_t kk = 0; kk < kVectorSize; ++kk) {
+ EXPECT_EQ((b16[kk]*13)>>2, bTmp16[kk]);
+ }
+ WebRtcSpl_ScaleVectorWithSat(b16, bTmp16, 13, kVectorSize, 2);
+ for (size_t kk = 0; kk < kVectorSize; ++kk) {
+ EXPECT_EQ((b16[kk]*13)>>2, bTmp16[kk]);
+ }
+ WebRtcSpl_ScaleAndAddVectors(a16, 13, 2, b16, 7, 2, bTmp16, kVectorSize);
+ for (size_t kk = 0; kk < kVectorSize; ++kk) {
+ EXPECT_EQ(((a16[kk]*13)>>2)+((b16[kk]*7)>>2), bTmp16[kk]);
+ }
+
+ WebRtcSpl_AddVectorsAndShift(bTmp16, a16, b16, kVectorSize, 2);
+ for (size_t kk = 0; kk < kVectorSize; ++kk) {
+ EXPECT_EQ(B[kk] >> 1, bTmp16[kk]);
+ }
+ WebRtcSpl_ReverseOrderMultArrayElements(bTmp16, a16, &b16[3], kVectorSize, 2);
+ for (size_t kk = 0; kk < kVectorSize; ++kk) {
+ EXPECT_EQ((a16[kk]*b16[3-kk])>>2, bTmp16[kk]);
+ }
+ WebRtcSpl_ElementwiseVectorMult(bTmp16, a16, b16, kVectorSize, 6);
+ for (size_t kk = 0; kk < kVectorSize; ++kk) {
+ EXPECT_EQ((a16[kk]*b16[kk])>>6, bTmp16[kk]);
+ }
+
+ WebRtcSpl_SqrtOfOneMinusXSquared(b16, kVectorSize, bTmp16);
+ for (size_t kk = 0; kk < kVectorSize - 1; ++kk) {
+ EXPECT_EQ(32767, bTmp16[kk]);
+ }
+ EXPECT_EQ(32749, bTmp16[kVectorSize - 1]);
+
+ EXPECT_EQ(0, WebRtcSpl_GetScalingSquare(b16, kVectorSize, 1));
+}
+
+TEST_F(SplTest, EstimatorsTest) {
+ const size_t kOrder = 2;
+ const int32_t unstable_filter[] = { 4, 12, 133, 1100 };
+ const int32_t stable_filter[] = { 1100, 133, 12, 4 };
+ int16_t lpc[kOrder + 2] = { 0 };
+ int16_t refl[kOrder + 2] = { 0 };
+ int16_t lpc_result[] = { 4096, -497, 15, 0 };
+ int16_t refl_result[] = { -3962, 123, 0, 0 };
+
+ EXPECT_EQ(0, WebRtcSpl_LevinsonDurbin(unstable_filter, lpc, refl, kOrder));
+ EXPECT_EQ(1, WebRtcSpl_LevinsonDurbin(stable_filter, lpc, refl, kOrder));
+ for (size_t i = 0; i < kOrder + 2; ++i) {
+ EXPECT_EQ(lpc_result[i], lpc[i]);
+ EXPECT_EQ(refl_result[i], refl[i]);
+ }
+}
+
+TEST_F(SplTest, FilterTest) {
+ const size_t kVectorSize = 4;
+ const size_t kFilterOrder = 3;
+ int16_t A[] = {1, 2, 33, 100};
+ int16_t A5[] = {1, 2, 33, 100, -5};
+ int16_t B[] = {4, 12, 133, 110};
+ int16_t data_in[kVectorSize];
+ int16_t data_out[kVectorSize];
+ int16_t bTmp16Low[kVectorSize];
+ int16_t bState[kVectorSize];
+ int16_t bStateLow[kVectorSize];
+
+ WebRtcSpl_ZerosArrayW16(bState, kVectorSize);
+ WebRtcSpl_ZerosArrayW16(bStateLow, kVectorSize);
+
+ for (size_t kk = 0; kk < kVectorSize; ++kk) {
+ data_in[kk] = A[kk];
+ data_out[kk] = 0;
+ }
+
+ // MA filters.
+ // Note that the input data has |kFilterOrder| states before the actual
+ // data (one sample).
+ WebRtcSpl_FilterMAFastQ12(&data_in[kFilterOrder], data_out, B,
+ kFilterOrder + 1, 1);
+ EXPECT_EQ(0, data_out[0]);
+ // AR filters.
+ // Note that the output data has |kFilterOrder| states before the actual
+ // data (one sample).
+ WebRtcSpl_FilterARFastQ12(data_in, &data_out[kFilterOrder], A,
+ kFilterOrder + 1, 1);
+ EXPECT_EQ(0, data_out[kFilterOrder]);
+
+ EXPECT_EQ(kVectorSize, WebRtcSpl_FilterAR(A5,
+ 5,
+ data_in,
+ kVectorSize,
+ bState,
+ kVectorSize,
+ bStateLow,
+ kVectorSize,
+ data_out,
+ bTmp16Low,
+ kVectorSize));
+}
+
+TEST_F(SplTest, RandTest) {
+ const int kVectorSize = 4;
+ int16_t BU[] = {3653, 12446, 8525, 30691};
+ int16_t b16[kVectorSize];
+ uint32_t bSeed = 100000;
+
+ EXPECT_EQ(7086, WebRtcSpl_RandU(&bSeed));
+ EXPECT_EQ(31565, WebRtcSpl_RandU(&bSeed));
+ EXPECT_EQ(-9786, WebRtcSpl_RandN(&bSeed));
+ EXPECT_EQ(kVectorSize, WebRtcSpl_RandUArray(b16, kVectorSize, &bSeed));
+ for (int kk = 0; kk < kVectorSize; ++kk) {
+ EXPECT_EQ(BU[kk], b16[kk]);
+ }
+}
+
+TEST_F(SplTest, DotProductWithScaleTest) {
+ EXPECT_EQ(605362796, WebRtcSpl_DotProductWithScale(vector16,
+ vector16, kVector16Size, 2));
+}
+
+TEST_F(SplTest, CrossCorrelationTest) {
+ // Note the function arguments relation specificed by API.
+ const size_t kCrossCorrelationDimension = 3;
+ const int kShift = 2;
+ const int kStep = 1;
+ const size_t kSeqDimension = 6;
+
+ const int16_t kVector16[kVector16Size] = {1, 4323, 1963,
+ WEBRTC_SPL_WORD16_MAX, WEBRTC_SPL_WORD16_MIN + 5, -3333, -876, 8483, 142};
+ int32_t vector32[kCrossCorrelationDimension] = {0};
+
+ WebRtcSpl_CrossCorrelation(vector32, vector16, kVector16, kSeqDimension,
+ kCrossCorrelationDimension, kShift, kStep);
+
+ // WebRtcSpl_CrossCorrelationC() and WebRtcSpl_CrossCorrelationNeon()
+ // are not bit-exact.
+ const int32_t kExpected[kCrossCorrelationDimension] =
+ {-266947903, -15579555, -171282001};
+ const int32_t* expected = kExpected;
+#if !defined(MIPS32_LE)
+ const int32_t kExpectedNeon[kCrossCorrelationDimension] =
+ {-266947901, -15579553, -171281999};
+ if (WebRtcSpl_CrossCorrelation != WebRtcSpl_CrossCorrelationC) {
+ expected = kExpectedNeon;
+ }
+#endif
+ for (size_t i = 0; i < kCrossCorrelationDimension; ++i) {
+ EXPECT_EQ(expected[i], vector32[i]);
+ }
+}
+
+TEST_F(SplTest, AutoCorrelationTest) {
+ int scale = 0;
+ int32_t vector32[kVector16Size];
+ const int32_t expected[kVector16Size] = {302681398, 14223410, -121705063,
+ -85221647, -17104971, 61806945, 6644603, -669329, 43};
+
+ EXPECT_EQ(kVector16Size,
+ WebRtcSpl_AutoCorrelation(vector16, kVector16Size,
+ kVector16Size - 1, vector32, &scale));
+ EXPECT_EQ(3, scale);
+ for (size_t i = 0; i < kVector16Size; ++i) {
+ EXPECT_EQ(expected[i], vector32[i]);
+ }
+}
+
+TEST_F(SplTest, SignalProcessingTest) {
+ const size_t kVectorSize = 4;
+ int A[] = {1, 2, 33, 100};
+ const int16_t kHanning[4] = { 2399, 8192, 13985, 16384 };
+ int16_t b16[kVectorSize];
+
+ int16_t bTmp16[kVectorSize];
+
+ int bScale = 0;
+
+ for (size_t kk = 0; kk < kVectorSize; ++kk) {
+ b16[kk] = A[kk];
+ }
+
+ // TODO(bjornv): Activate the Reflection Coefficient tests when refactoring.
+// WebRtcSpl_ReflCoefToLpc(b16, kVectorSize, bTmp16);
+//// for (int kk = 0; kk < kVectorSize; ++kk) {
+//// EXPECT_EQ(aTmp16[kk], bTmp16[kk]);
+//// }
+// WebRtcSpl_LpcToReflCoef(bTmp16, kVectorSize, b16);
+//// for (int kk = 0; kk < kVectorSize; ++kk) {
+//// EXPECT_EQ(a16[kk], b16[kk]);
+//// }
+// WebRtcSpl_AutoCorrToReflCoef(b32, kVectorSize, bTmp16);
+//// for (int kk = 0; kk < kVectorSize; ++kk) {
+//// EXPECT_EQ(aTmp16[kk], bTmp16[kk]);
+//// }
+
+ WebRtcSpl_GetHanningWindow(bTmp16, kVectorSize);
+ for (size_t kk = 0; kk < kVectorSize; ++kk) {
+ EXPECT_EQ(kHanning[kk], bTmp16[kk]);
+ }
+
+ for (size_t kk = 0; kk < kVectorSize; ++kk) {
+ b16[kk] = A[kk];
+ }
+ EXPECT_EQ(11094 , WebRtcSpl_Energy(b16, kVectorSize, &bScale));
+ EXPECT_EQ(0, bScale);
+}
+
+TEST_F(SplTest, FFTTest) {
+ int16_t B[] = {1, 2, 33, 100,
+ 2, 3, 34, 101,
+ 3, 4, 35, 102,
+ 4, 5, 36, 103};
+
+ EXPECT_EQ(0, WebRtcSpl_ComplexFFT(B, 3, 1));
+// for (int kk = 0; kk < 16; ++kk) {
+// EXPECT_EQ(A[kk], B[kk]);
+// }
+ EXPECT_EQ(0, WebRtcSpl_ComplexIFFT(B, 3, 1));
+// for (int kk = 0; kk < 16; ++kk) {
+// EXPECT_EQ(A[kk], B[kk]);
+// }
+ WebRtcSpl_ComplexBitReverse(B, 3);
+ for (int kk = 0; kk < 16; ++kk) {
+ //EXPECT_EQ(A[kk], B[kk]);
+ }
+}
+
+TEST_F(SplTest, Resample48WithSaturationTest) {
+ // The test resamples 3*kBlockSize number of samples to 2*kBlockSize number
+ // of samples.
+ const size_t kBlockSize = 16;
+
+ // Saturated input vector of 48 samples.
+ const int32_t kVectorSaturated[3 * kBlockSize + 7] = {
+ -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768,
+ -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768,
+ -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768,
+ 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+ 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+ 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+ 32767, 32767, 32767, 32767, 32767, 32767, 32767
+ };
+
+ // All values in |out_vector| should be |kRefValue32kHz|.
+ const int32_t kRefValue32kHz1 = -1077493760;
+ const int32_t kRefValue32kHz2 = 1077493645;
+
+ // After bit shift with saturation, |out_vector_w16| is saturated.
+
+ const int16_t kRefValue16kHz1 = -32768;
+ const int16_t kRefValue16kHz2 = 32767;
+ // Vector for storing output.
+ int32_t out_vector[2 * kBlockSize];
+ int16_t out_vector_w16[2 * kBlockSize];
+
+ WebRtcSpl_Resample48khzTo32khz(kVectorSaturated, out_vector, kBlockSize);
+ WebRtcSpl_VectorBitShiftW32ToW16(out_vector_w16, 2 * kBlockSize, out_vector,
+ 15);
+
+ // Comparing output values against references. The values at position
+ // 12-15 are skipped to account for the filter lag.
+ for (size_t i = 0; i < 12; ++i) {
+ EXPECT_EQ(kRefValue32kHz1, out_vector[i]);
+ EXPECT_EQ(kRefValue16kHz1, out_vector_w16[i]);
+ }
+ for (size_t i = 16; i < 2 * kBlockSize; ++i) {
+ EXPECT_EQ(kRefValue32kHz2, out_vector[i]);
+ EXPECT_EQ(kRefValue16kHz2, out_vector_w16[i]);
+ }
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_init.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_init.c
new file mode 100644
index 00000000..73c2039e
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_init.c
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+/* The global function contained in this file initializes SPL function
+ * pointers, currently only for ARM platforms.
+ *
+ * Some code came from common/rtcd.c in the WebM project.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h"
+
+/* Declare function pointers. */
+MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16;
+MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32;
+MaxValueW16 WebRtcSpl_MaxValueW16;
+MaxValueW32 WebRtcSpl_MaxValueW32;
+MinValueW16 WebRtcSpl_MinValueW16;
+MinValueW32 WebRtcSpl_MinValueW32;
+CrossCorrelation WebRtcSpl_CrossCorrelation;
+DownsampleFast WebRtcSpl_DownsampleFast;
+ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound;
+
+#if (defined(WEBRTC_DETECT_NEON) || !defined(WEBRTC_HAS_NEON)) && \
+ !defined(MIPS32_LE)
+/* Initialize function pointers to the generic C version. */
+static void InitPointersToC() {
+ WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16C;
+ WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C;
+ WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16C;
+ WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32C;
+ WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16C;
+ WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32C;
+ WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationC;
+ WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastC;
+ WebRtcSpl_ScaleAndAddVectorsWithRound =
+ WebRtcSpl_ScaleAndAddVectorsWithRoundC;
+}
+#endif
+
+#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON)
+/* Initialize function pointers to the Neon version. */
+static void InitPointersToNeon() {
+ WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16Neon;
+ WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32Neon;
+ WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16Neon;
+ WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32Neon;
+ WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16Neon;
+ WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32Neon;
+ WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationNeon;
+ WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastNeon;
+ WebRtcSpl_ScaleAndAddVectorsWithRound =
+ WebRtcSpl_ScaleAndAddVectorsWithRoundC;
+}
+#endif
+
+#if defined(MIPS32_LE)
+/* Initialize function pointers to the MIPS version. */
+static void InitPointersToMIPS() {
+ WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16_mips;
+ WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16_mips;
+ WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32_mips;
+ WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16_mips;
+ WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32_mips;
+ WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelation_mips;
+ WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFast_mips;
+#if defined(MIPS_DSP_R1_LE)
+ WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32_mips;
+ WebRtcSpl_ScaleAndAddVectorsWithRound =
+ WebRtcSpl_ScaleAndAddVectorsWithRound_mips;
+#else
+ WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C;
+ WebRtcSpl_ScaleAndAddVectorsWithRound =
+ WebRtcSpl_ScaleAndAddVectorsWithRoundC;
+#endif
+}
+#endif
+
+static void InitFunctionPointers(void) {
+#if defined(WEBRTC_DETECT_NEON)
+ if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) {
+ InitPointersToNeon();
+ } else {
+ InitPointersToC();
+ }
+#elif defined(WEBRTC_HAS_NEON)
+ InitPointersToNeon();
+#elif defined(MIPS32_LE)
+ InitPointersToMIPS();
+#else
+ InitPointersToC();
+#endif /* WEBRTC_DETECT_NEON */
+}
+
+#if defined(WEBRTC_POSIX)
+#include <pthread.h>
+
+static void once(void (*func)(void)) {
+ static pthread_once_t lock = PTHREAD_ONCE_INIT;
+ pthread_once(&lock, func);
+}
+
+#elif defined(_WIN32)
+#include <windows.h>
+
+static void once(void (*func)(void)) {
+ /* Didn't use InitializeCriticalSection() since there's no race-free context
+ * in which to execute it.
+ *
+ * TODO(kma): Change to different implementation (e.g.
+ * InterlockedCompareExchangePointer) to avoid issues similar to
+ * http://code.google.com/p/webm/issues/detail?id=467.
+ */
+ static CRITICAL_SECTION lock = {(void *)((size_t)-1), -1, 0, 0, 0, 0};
+ static int done = 0;
+
+ EnterCriticalSection(&lock);
+ if (!done) {
+ func();
+ done = 1;
+ }
+ LeaveCriticalSection(&lock);
+}
+
+/* There's no fallback version as an #else block here to ensure thread safety.
+ * In case of neither pthread for WEBRTC_POSIX nor _WIN32 is present, build
+ * system should pick it up.
+ */
+#endif /* WEBRTC_POSIX */
+
+void WebRtcSpl_Init() {
+ once(InitFunctionPointers);
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt.c
new file mode 100644
index 00000000..24db4f82
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt.c
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_Sqrt().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+#include <assert.h>
+
+int32_t WebRtcSpl_SqrtLocal(int32_t in);
+
+int32_t WebRtcSpl_SqrtLocal(int32_t in)
+{
+
+ int16_t x_half, t16;
+ int32_t A, B, x2;
+
+ /* The following block performs:
+ y=in/2
+ x=y-2^30
+ x_half=x/2^31
+ t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4)
+ + 0.875*((x_half)^5)
+ */
+
+ B = in / 2;
+
+ B = B - ((int32_t)0x40000000); // B = in/2 - 1/2
+ x_half = (int16_t)(B >> 16); // x_half = x/2 = (in-1)/2
+ B = B + ((int32_t)0x40000000); // B = 1 + x/2
+ B = B + ((int32_t)0x40000000); // Add 0.5 twice (since 1.0 does not exist in Q31)
+
+ x2 = ((int32_t)x_half) * ((int32_t)x_half) * 2; // A = (x/2)^2
+ A = -x2; // A = -(x/2)^2
+ B = B + (A >> 1); // B = 1 + x/2 - 0.5*(x/2)^2
+
+ A >>= 16;
+ A = A * A * 2; // A = (x/2)^4
+ t16 = (int16_t)(A >> 16);
+ B += -20480 * t16 * 2; // B = B - 0.625*A
+ // After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4
+
+ A = x_half * t16 * 2; // A = (x/2)^5
+ t16 = (int16_t)(A >> 16);
+ B += 28672 * t16 * 2; // B = B + 0.875*A
+ // After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4 + 0.875*(x/2)^5
+
+ t16 = (int16_t)(x2 >> 16);
+ A = x_half * t16 * 2; // A = x/2^3
+
+ B = B + (A >> 1); // B = B + 0.5*A
+ // After this, B = 1 + x/2 - 0.5*(x/2)^2 + 0.5*(x/2)^3 - 0.625*(x/2)^4 + 0.875*(x/2)^5
+
+ B = B + ((int32_t)32768); // Round off bit
+
+ return B;
+}
+
+int32_t WebRtcSpl_Sqrt(int32_t value)
+{
+ /*
+ Algorithm:
+
+ Six term Taylor Series is used here to compute the square root of a number
+ y^0.5 = (1+x)^0.5 where x = y-1
+ = 1+(x/2)-0.5*((x/2)^2+0.5*((x/2)^3-0.625*((x/2)^4+0.875*((x/2)^5)
+ 0.5 <= x < 1
+
+ Example of how the algorithm works, with ut=sqrt(in), and
+ with in=73632 and ut=271 (even shift value case):
+
+ in=73632
+ y= in/131072
+ x=y-1
+ t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + 0.875*((x/2)^5)
+ ut=t*(1/sqrt(2))*512
+
+ or:
+
+ in=73632
+ in2=73632*2^14
+ y= in2/2^31
+ x=y-1
+ t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + 0.875*((x/2)^5)
+ ut=t*(1/sqrt(2))
+ ut2=ut*2^9
+
+ which gives:
+
+ in = 73632
+ in2 = 1206386688
+ y = 0.56176757812500
+ x = -0.43823242187500
+ t = 0.74973506527313
+ ut = 0.53014274874797
+ ut2 = 2.714330873589594e+002
+
+ or:
+
+ in=73632
+ in2=73632*2^14
+ y=in2/2
+ x=y-2^30
+ x_half=x/2^31
+ t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4)
+ + 0.875*((x_half)^5)
+ ut=t*(1/sqrt(2))
+ ut2=ut*2^9
+
+ which gives:
+
+ in = 73632
+ in2 = 1206386688
+ y = 603193344
+ x = -470548480
+ x_half = -0.21911621093750
+ t = 0.74973506527313
+ ut = 0.53014274874797
+ ut2 = 2.714330873589594e+002
+
+ */
+
+ int16_t x_norm, nshift, t16, sh;
+ int32_t A;
+
+ int16_t k_sqrt_2 = 23170; // 1/sqrt2 (==5a82)
+
+ A = value;
+
+ if (A == 0)
+ return (int32_t)0; // sqrt(0) = 0
+
+ sh = WebRtcSpl_NormW32(A); // # shifts to normalize A
+ A = WEBRTC_SPL_LSHIFT_W32(A, sh); // Normalize A
+ if (A < (WEBRTC_SPL_WORD32_MAX - 32767))
+ {
+ A = A + ((int32_t)32768); // Round off bit
+ } else
+ {
+ A = WEBRTC_SPL_WORD32_MAX;
+ }
+
+ x_norm = (int16_t)(A >> 16); // x_norm = AH
+
+ nshift = (sh / 2);
+ assert(nshift >= 0);
+
+ A = (int32_t)WEBRTC_SPL_LSHIFT_W32((int32_t)x_norm, 16);
+ A = WEBRTC_SPL_ABS_W32(A); // A = abs(x_norm<<16)
+ A = WebRtcSpl_SqrtLocal(A); // A = sqrt(A)
+
+ if (2 * nshift == sh) {
+ // Even shift value case
+
+ t16 = (int16_t)(A >> 16); // t16 = AH
+
+ A = k_sqrt_2 * t16 * 2; // A = 1/sqrt(2)*t16
+ A = A + ((int32_t)32768); // Round off
+ A = A & ((int32_t)0x7fff0000); // Round off
+
+ A >>= 15; // A = A>>16
+
+ } else
+ {
+ A >>= 16; // A = A>>16
+ }
+
+ A = A & ((int32_t)0x0000ffff);
+ A >>= nshift; // De-normalize the result.
+
+ return A;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor.c
new file mode 100644
index 00000000..370307a0
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor.c
@@ -0,0 +1,77 @@
+/*
+ * Written by Wilco Dijkstra, 1996. The following email exchange establishes the
+ * license.
+ *
+ * From: Wilco Dijkstra <Wilco.Dijkstra@ntlworld.com>
+ * Date: Fri, Jun 24, 2011 at 3:20 AM
+ * Subject: Re: sqrt routine
+ * To: Kevin Ma <kma@google.com>
+ * Hi Kevin,
+ * Thanks for asking. Those routines are public domain (originally posted to
+ * comp.sys.arm a long time ago), so you can use them freely for any purpose.
+ * Cheers,
+ * Wilco
+ *
+ * ----- Original Message -----
+ * From: "Kevin Ma" <kma@google.com>
+ * To: <Wilco.Dijkstra@ntlworld.com>
+ * Sent: Thursday, June 23, 2011 11:44 PM
+ * Subject: Fwd: sqrt routine
+ * Hi Wilco,
+ * I saw your sqrt routine from several web sites, including
+ * http://www.finesse.demon.co.uk/steven/sqrt.html.
+ * Just wonder if there's any copyright information with your Successive
+ * approximation routines, or if I can freely use it for any purpose.
+ * Thanks.
+ * Kevin
+ */
+
+// Minor modifications in code style for WebRTC, 2012.
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+/*
+ * Algorithm:
+ * Successive approximation of the equation (root + delta) ^ 2 = N
+ * until delta < 1. If delta < 1 we have the integer part of SQRT (N).
+ * Use delta = 2^i for i = 15 .. 0.
+ *
+ * Output precision is 16 bits. Note for large input values (close to
+ * 0x7FFFFFFF), bit 15 (the highest bit of the low 16-bit half word)
+ * contains the MSB information (a non-sign value). Do with caution
+ * if you need to cast the output to int16_t type.
+ *
+ * If the input value is negative, it returns 0.
+ */
+
+#define WEBRTC_SPL_SQRT_ITER(N) \
+ try1 = root + (1 << (N)); \
+ if (value >= try1 << (N)) \
+ { \
+ value -= try1 << (N); \
+ root |= 2 << (N); \
+ }
+
+int32_t WebRtcSpl_SqrtFloor(int32_t value)
+{
+ int32_t root = 0, try1;
+
+ WEBRTC_SPL_SQRT_ITER (15);
+ WEBRTC_SPL_SQRT_ITER (14);
+ WEBRTC_SPL_SQRT_ITER (13);
+ WEBRTC_SPL_SQRT_ITER (12);
+ WEBRTC_SPL_SQRT_ITER (11);
+ WEBRTC_SPL_SQRT_ITER (10);
+ WEBRTC_SPL_SQRT_ITER ( 9);
+ WEBRTC_SPL_SQRT_ITER ( 8);
+ WEBRTC_SPL_SQRT_ITER ( 7);
+ WEBRTC_SPL_SQRT_ITER ( 6);
+ WEBRTC_SPL_SQRT_ITER ( 5);
+ WEBRTC_SPL_SQRT_ITER ( 4);
+ WEBRTC_SPL_SQRT_ITER ( 3);
+ WEBRTC_SPL_SQRT_ITER ( 2);
+ WEBRTC_SPL_SQRT_ITER ( 1);
+ WEBRTC_SPL_SQRT_ITER ( 0);
+
+ return root >> 1;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor_arm.S b/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor_arm.S
new file mode 100644
index 00000000..f44ddd46
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor_arm.S
@@ -0,0 +1,110 @@
+@
+@ Written by Wilco Dijkstra, 1996. The following email exchange establishes the
+@ license.
+@
+@ From: Wilco Dijkstra <Wilco.Dijkstra@ntlworld.com>
+@ Date: Fri, Jun 24, 2011 at 3:20 AM
+@ Subject: Re: sqrt routine
+@ To: Kevin Ma <kma@google.com>
+@ Hi Kevin,
+@ Thanks for asking. Those routines are public domain (originally posted to
+@ comp.sys.arm a long time ago), so you can use them freely for any purpose.
+@ Cheers,
+@ Wilco
+@
+@ ----- Original Message -----
+@ From: "Kevin Ma" <kma@google.com>
+@ To: <Wilco.Dijkstra@ntlworld.com>
+@ Sent: Thursday, June 23, 2011 11:44 PM
+@ Subject: Fwd: sqrt routine
+@ Hi Wilco,
+@ I saw your sqrt routine from several web sites, including
+@ http://www.finesse.demon.co.uk/steven/sqrt.html.
+@ Just wonder if there's any copyright information with your Successive
+@ approximation routines, or if I can freely use it for any purpose.
+@ Thanks.
+@ Kevin
+
+@ Minor modifications in code style for WebRTC, 2012.
+@ Output is bit-exact with the reference C code in spl_sqrt_floor.c.
+
+@ Input : r0 32 bit unsigned integer
+@ Output: r0 = INT (SQRT (r0)), precision is 16 bits
+@ Registers touched: r1, r2
+
+#include "webrtc/system_wrappers/interface/asm_defines.h"
+
+GLOBAL_FUNCTION WebRtcSpl_SqrtFloor
+.align 2
+DEFINE_FUNCTION WebRtcSpl_SqrtFloor
+ mov r1, #3 << 30
+ mov r2, #1 << 30
+
+ @ unroll for i = 0 .. 15
+
+ cmp r0, r2, ror #2 * 0
+ subhs r0, r0, r2, ror #2 * 0
+ adc r2, r1, r2, lsl #1
+
+ cmp r0, r2, ror #2 * 1
+ subhs r0, r0, r2, ror #2 * 1
+ adc r2, r1, r2, lsl #1
+
+ cmp r0, r2, ror #2 * 2
+ subhs r0, r0, r2, ror #2 * 2
+ adc r2, r1, r2, lsl #1
+
+ cmp r0, r2, ror #2 * 3
+ subhs r0, r0, r2, ror #2 * 3
+ adc r2, r1, r2, lsl #1
+
+ cmp r0, r2, ror #2 * 4
+ subhs r0, r0, r2, ror #2 * 4
+ adc r2, r1, r2, lsl #1
+
+ cmp r0, r2, ror #2 * 5
+ subhs r0, r0, r2, ror #2 * 5
+ adc r2, r1, r2, lsl #1
+
+ cmp r0, r2, ror #2 * 6
+ subhs r0, r0, r2, ror #2 * 6
+ adc r2, r1, r2, lsl #1
+
+ cmp r0, r2, ror #2 * 7
+ subhs r0, r0, r2, ror #2 * 7
+ adc r2, r1, r2, lsl #1
+
+ cmp r0, r2, ror #2 * 8
+ subhs r0, r0, r2, ror #2 * 8
+ adc r2, r1, r2, lsl #1
+
+ cmp r0, r2, ror #2 * 9
+ subhs r0, r0, r2, ror #2 * 9
+ adc r2, r1, r2, lsl #1
+
+ cmp r0, r2, ror #2 * 10
+ subhs r0, r0, r2, ror #2 * 10
+ adc r2, r1, r2, lsl #1
+
+ cmp r0, r2, ror #2 * 11
+ subhs r0, r0, r2, ror #2 * 11
+ adc r2, r1, r2, lsl #1
+
+ cmp r0, r2, ror #2 * 12
+ subhs r0, r0, r2, ror #2 * 12
+ adc r2, r1, r2, lsl #1
+
+ cmp r0, r2, ror #2 * 13
+ subhs r0, r0, r2, ror #2 * 13
+ adc r2, r1, r2, lsl #1
+
+ cmp r0, r2, ror #2 * 14
+ subhs r0, r0, r2, ror #2 * 14
+ adc r2, r1, r2, lsl #1
+
+ cmp r0, r2, ror #2 * 15
+ subhs r0, r0, r2, ror #2 * 15
+ adc r2, r1, r2, lsl #1
+
+ bic r0, r2, #3 << 30 @ for rounding add: cmp r0, r2 adc r2, #1
+ bx lr
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor_mips.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor_mips.c
new file mode 100644
index 00000000..8716459b
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor_mips.c
@@ -0,0 +1,207 @@
+/*
+ * Written by Wilco Dijkstra, 1996. The following email exchange establishes the
+ * license.
+ *
+ * From: Wilco Dijkstra <Wilco.Dijkstra@ntlworld.com>
+ * Date: Fri, Jun 24, 2011 at 3:20 AM
+ * Subject: Re: sqrt routine
+ * To: Kevin Ma <kma@google.com>
+ * Hi Kevin,
+ * Thanks for asking. Those routines are public domain (originally posted to
+ * comp.sys.arm a long time ago), so you can use them freely for any purpose.
+ * Cheers,
+ * Wilco
+ *
+ * ----- Original Message -----
+ * From: "Kevin Ma" <kma@google.com>
+ * To: <Wilco.Dijkstra@ntlworld.com>
+ * Sent: Thursday, June 23, 2011 11:44 PM
+ * Subject: Fwd: sqrt routine
+ * Hi Wilco,
+ * I saw your sqrt routine from several web sites, including
+ * http://www.finesse.demon.co.uk/steven/sqrt.html.
+ * Just wonder if there's any copyright information with your Successive
+ * approximation routines, or if I can freely use it for any purpose.
+ * Thanks.
+ * Kevin
+ */
+
+// Minor modifications in code style for WebRTC, 2012.
+// Code optimizations for MIPS, 2013.
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+/*
+ * Algorithm:
+ * Successive approximation of the equation (root + delta) ^ 2 = N
+ * until delta < 1. If delta < 1 we have the integer part of SQRT (N).
+ * Use delta = 2^i for i = 15 .. 0.
+ *
+ * Output precision is 16 bits. Note for large input values (close to
+ * 0x7FFFFFFF), bit 15 (the highest bit of the low 16-bit half word)
+ * contains the MSB information (a non-sign value). Do with caution
+ * if you need to cast the output to int16_t type.
+ *
+ * If the input value is negative, it returns 0.
+ */
+
+
+int32_t WebRtcSpl_SqrtFloor(int32_t value)
+{
+ int32_t root = 0, tmp1, tmp2, tmp3, tmp4;
+
+ __asm __volatile(
+ ".set push \n\t"
+ ".set noreorder \n\t"
+
+ "lui %[tmp1], 0x4000 \n\t"
+ "slt %[tmp2], %[value], %[tmp1] \n\t"
+ "sub %[tmp3], %[value], %[tmp1] \n\t"
+ "lui %[tmp1], 0x1 \n\t"
+ "or %[tmp4], %[root], %[tmp1] \n\t"
+ "movz %[value], %[tmp3], %[tmp2] \n\t"
+ "movz %[root], %[tmp4], %[tmp2] \n\t"
+
+ "addiu %[tmp1], $0, 0x4000 \n\t"
+ "addu %[tmp1], %[tmp1], %[root] \n\t"
+ "sll %[tmp1], 14 \n\t"
+ "slt %[tmp2], %[value], %[tmp1] \n\t"
+ "subu %[tmp3], %[value], %[tmp1] \n\t"
+ "ori %[tmp4], %[root], 0x8000 \n\t"
+ "movz %[value], %[tmp3], %[tmp2] \n\t"
+ "movz %[root], %[tmp4], %[tmp2] \n\t"
+
+ "addiu %[tmp1], $0, 0x2000 \n\t"
+ "addu %[tmp1], %[tmp1], %[root] \n\t"
+ "sll %[tmp1], 13 \n\t"
+ "slt %[tmp2], %[value], %[tmp1] \n\t"
+ "subu %[tmp3], %[value], %[tmp1] \n\t"
+ "ori %[tmp4], %[root], 0x4000 \n\t"
+ "movz %[value], %[tmp3], %[tmp2] \n\t"
+ "movz %[root], %[tmp4], %[tmp2] \n\t"
+
+ "addiu %[tmp1], $0, 0x1000 \n\t"
+ "addu %[tmp1], %[tmp1], %[root] \n\t"
+ "sll %[tmp1], 12 \n\t"
+ "slt %[tmp2], %[value], %[tmp1] \n\t"
+ "subu %[tmp3], %[value], %[tmp1] \n\t"
+ "ori %[tmp4], %[root], 0x2000 \n\t"
+ "movz %[value], %[tmp3], %[tmp2] \n\t"
+ "movz %[root], %[tmp4], %[tmp2] \n\t"
+
+ "addiu %[tmp1], $0, 0x800 \n\t"
+ "addu %[tmp1], %[tmp1], %[root] \n\t"
+ "sll %[tmp1], 11 \n\t"
+ "slt %[tmp2], %[value], %[tmp1] \n\t"
+ "subu %[tmp3], %[value], %[tmp1] \n\t"
+ "ori %[tmp4], %[root], 0x1000 \n\t"
+ "movz %[value], %[tmp3], %[tmp2] \n\t"
+ "movz %[root], %[tmp4], %[tmp2] \n\t"
+
+ "addiu %[tmp1], $0, 0x400 \n\t"
+ "addu %[tmp1], %[tmp1], %[root] \n\t"
+ "sll %[tmp1], 10 \n\t"
+ "slt %[tmp2], %[value], %[tmp1] \n\t"
+ "subu %[tmp3], %[value], %[tmp1] \n\t"
+ "ori %[tmp4], %[root], 0x800 \n\t"
+ "movz %[value], %[tmp3], %[tmp2] \n\t"
+ "movz %[root], %[tmp4], %[tmp2] \n\t"
+
+ "addiu %[tmp1], $0, 0x200 \n\t"
+ "addu %[tmp1], %[tmp1], %[root] \n\t"
+ "sll %[tmp1], 9 \n\t"
+ "slt %[tmp2], %[value], %[tmp1] \n\t"
+ "subu %[tmp3], %[value], %[tmp1] \n\t"
+ "ori %[tmp4], %[root], 0x400 \n\t"
+ "movz %[value], %[tmp3], %[tmp2] \n\t"
+ "movz %[root], %[tmp4], %[tmp2] \n\t"
+
+ "addiu %[tmp1], $0, 0x100 \n\t"
+ "addu %[tmp1], %[tmp1], %[root] \n\t"
+ "sll %[tmp1], 8 \n\t"
+ "slt %[tmp2], %[value], %[tmp1] \n\t"
+ "subu %[tmp3], %[value], %[tmp1] \n\t"
+ "ori %[tmp4], %[root], 0x200 \n\t"
+ "movz %[value], %[tmp3], %[tmp2] \n\t"
+ "movz %[root], %[tmp4], %[tmp2] \n\t"
+
+ "addiu %[tmp1], $0, 0x80 \n\t"
+ "addu %[tmp1], %[tmp1], %[root] \n\t"
+ "sll %[tmp1], 7 \n\t"
+ "slt %[tmp2], %[value], %[tmp1] \n\t"
+ "subu %[tmp3], %[value], %[tmp1] \n\t"
+ "ori %[tmp4], %[root], 0x100 \n\t"
+ "movz %[value], %[tmp3], %[tmp2] \n\t"
+ "movz %[root], %[tmp4], %[tmp2] \n\t"
+
+ "addiu %[tmp1], $0, 0x40 \n\t"
+ "addu %[tmp1], %[tmp1], %[root] \n\t"
+ "sll %[tmp1], 6 \n\t"
+ "slt %[tmp2], %[value], %[tmp1] \n\t"
+ "subu %[tmp3], %[value], %[tmp1] \n\t"
+ "ori %[tmp4], %[root], 0x80 \n\t"
+ "movz %[value], %[tmp3], %[tmp2] \n\t"
+ "movz %[root], %[tmp4], %[tmp2] \n\t"
+
+ "addiu %[tmp1], $0, 0x20 \n\t"
+ "addu %[tmp1], %[tmp1], %[root] \n\t"
+ "sll %[tmp1], 5 \n\t"
+ "slt %[tmp2], %[value], %[tmp1] \n\t"
+ "subu %[tmp3], %[value], %[tmp1] \n\t"
+ "ori %[tmp4], %[root], 0x40 \n\t"
+ "movz %[value], %[tmp3], %[tmp2] \n\t"
+ "movz %[root], %[tmp4], %[tmp2] \n\t"
+
+ "addiu %[tmp1], $0, 0x10 \n\t"
+ "addu %[tmp1], %[tmp1], %[root] \n\t"
+ "sll %[tmp1], 4 \n\t"
+ "slt %[tmp2], %[value], %[tmp1] \n\t"
+ "subu %[tmp3], %[value], %[tmp1] \n\t"
+ "ori %[tmp4], %[root], 0x20 \n\t"
+ "movz %[value], %[tmp3], %[tmp2] \n\t"
+ "movz %[root], %[tmp4], %[tmp2] \n\t"
+
+ "addiu %[tmp1], $0, 0x8 \n\t"
+ "addu %[tmp1], %[tmp1], %[root] \n\t"
+ "sll %[tmp1], 3 \n\t"
+ "slt %[tmp2], %[value], %[tmp1] \n\t"
+ "subu %[tmp3], %[value], %[tmp1] \n\t"
+ "ori %[tmp4], %[root], 0x10 \n\t"
+ "movz %[value], %[tmp3], %[tmp2] \n\t"
+ "movz %[root], %[tmp4], %[tmp2] \n\t"
+
+ "addiu %[tmp1], $0, 0x4 \n\t"
+ "addu %[tmp1], %[tmp1], %[root] \n\t"
+ "sll %[tmp1], 2 \n\t"
+ "slt %[tmp2], %[value], %[tmp1] \n\t"
+ "subu %[tmp3], %[value], %[tmp1] \n\t"
+ "ori %[tmp4], %[root], 0x8 \n\t"
+ "movz %[value], %[tmp3], %[tmp2] \n\t"
+ "movz %[root], %[tmp4], %[tmp2] \n\t"
+
+ "addiu %[tmp1], $0, 0x2 \n\t"
+ "addu %[tmp1], %[tmp1], %[root] \n\t"
+ "sll %[tmp1], 1 \n\t"
+ "slt %[tmp2], %[value], %[tmp1] \n\t"
+ "subu %[tmp3], %[value], %[tmp1] \n\t"
+ "ori %[tmp4], %[root], 0x4 \n\t"
+ "movz %[value], %[tmp3], %[tmp2] \n\t"
+ "movz %[root], %[tmp4], %[tmp2] \n\t"
+
+ "addiu %[tmp1], $0, 0x1 \n\t"
+ "addu %[tmp1], %[tmp1], %[root] \n\t"
+ "slt %[tmp2], %[value], %[tmp1] \n\t"
+ "ori %[tmp4], %[root], 0x2 \n\t"
+ "movz %[root], %[tmp4], %[tmp2] \n\t"
+
+ ".set pop \n\t"
+
+ : [root] "+r" (root), [value] "+r" (value),
+ [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2),
+ [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4)
+ :
+ );
+
+ return root >> 1;
+}
+
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/splitting_filter.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/splitting_filter.c
new file mode 100644
index 00000000..36fcf355
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/splitting_filter.c
@@ -0,0 +1,208 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * This file contains the splitting filter functions.
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+#include <assert.h>
+
+// Maximum number of samples in a low/high-band frame.
+enum
+{
+ kMaxBandFrameLength = 320 // 10 ms at 64 kHz.
+};
+
+// QMF filter coefficients in Q16.
+static const uint16_t WebRtcSpl_kAllPassFilter1[3] = {6418, 36982, 57261};
+static const uint16_t WebRtcSpl_kAllPassFilter2[3] = {21333, 49062, 63010};
+
+///////////////////////////////////////////////////////////////////////////////////////////////
+// WebRtcSpl_AllPassQMF(...)
+//
+// Allpass filter used by the analysis and synthesis parts of the QMF filter.
+//
+// Input:
+// - in_data : Input data sequence (Q10)
+// - data_length : Length of data sequence (>2)
+// - filter_coefficients : Filter coefficients (length 3, Q16)
+//
+// Input & Output:
+// - filter_state : Filter state (length 6, Q10).
+//
+// Output:
+// - out_data : Output data sequence (Q10), length equal to
+// |data_length|
+//
+
+void WebRtcSpl_AllPassQMF(int32_t* in_data, size_t data_length,
+ int32_t* out_data, const uint16_t* filter_coefficients,
+ int32_t* filter_state)
+{
+ // The procedure is to filter the input with three first order all pass filters
+ // (cascade operations).
+ //
+ // a_3 + q^-1 a_2 + q^-1 a_1 + q^-1
+ // y[n] = ----------- ----------- ----------- x[n]
+ // 1 + a_3q^-1 1 + a_2q^-1 1 + a_1q^-1
+ //
+ // The input vector |filter_coefficients| includes these three filter coefficients.
+ // The filter state contains the in_data state, in_data[-1], followed by
+ // the out_data state, out_data[-1]. This is repeated for each cascade.
+ // The first cascade filter will filter the |in_data| and store the output in
+ // |out_data|. The second will the take the |out_data| as input and make an
+ // intermediate storage in |in_data|, to save memory. The third, and final, cascade
+ // filter operation takes the |in_data| (which is the output from the previous cascade
+ // filter) and store the output in |out_data|.
+ // Note that the input vector values are changed during the process.
+ size_t k;
+ int32_t diff;
+ // First all-pass cascade; filter from in_data to out_data.
+
+ // Let y_i[n] indicate the output of cascade filter i (with filter coefficient a_i) at
+ // vector position n. Then the final output will be y[n] = y_3[n]
+
+ // First loop, use the states stored in memory.
+ // "diff" should be safe from wrap around since max values are 2^25
+ // diff = (x[0] - y_1[-1])
+ diff = WebRtcSpl_SubSatW32(in_data[0], filter_state[1]);
+ // y_1[0] = x[-1] + a_1 * (x[0] - y_1[-1])
+ out_data[0] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[0], diff, filter_state[0]);
+
+ // For the remaining loops, use previous values.
+ for (k = 1; k < data_length; k++)
+ {
+ // diff = (x[n] - y_1[n-1])
+ diff = WebRtcSpl_SubSatW32(in_data[k], out_data[k - 1]);
+ // y_1[n] = x[n-1] + a_1 * (x[n] - y_1[n-1])
+ out_data[k] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[0], diff, in_data[k - 1]);
+ }
+
+ // Update states.
+ filter_state[0] = in_data[data_length - 1]; // x[N-1], becomes x[-1] next time
+ filter_state[1] = out_data[data_length - 1]; // y_1[N-1], becomes y_1[-1] next time
+
+ // Second all-pass cascade; filter from out_data to in_data.
+ // diff = (y_1[0] - y_2[-1])
+ diff = WebRtcSpl_SubSatW32(out_data[0], filter_state[3]);
+ // y_2[0] = y_1[-1] + a_2 * (y_1[0] - y_2[-1])
+ in_data[0] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[1], diff, filter_state[2]);
+ for (k = 1; k < data_length; k++)
+ {
+ // diff = (y_1[n] - y_2[n-1])
+ diff = WebRtcSpl_SubSatW32(out_data[k], in_data[k - 1]);
+ // y_2[0] = y_1[-1] + a_2 * (y_1[0] - y_2[-1])
+ in_data[k] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[1], diff, out_data[k-1]);
+ }
+
+ filter_state[2] = out_data[data_length - 1]; // y_1[N-1], becomes y_1[-1] next time
+ filter_state[3] = in_data[data_length - 1]; // y_2[N-1], becomes y_2[-1] next time
+
+ // Third all-pass cascade; filter from in_data to out_data.
+ // diff = (y_2[0] - y[-1])
+ diff = WebRtcSpl_SubSatW32(in_data[0], filter_state[5]);
+ // y[0] = y_2[-1] + a_3 * (y_2[0] - y[-1])
+ out_data[0] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[2], diff, filter_state[4]);
+ for (k = 1; k < data_length; k++)
+ {
+ // diff = (y_2[n] - y[n-1])
+ diff = WebRtcSpl_SubSatW32(in_data[k], out_data[k - 1]);
+ // y[n] = y_2[n-1] + a_3 * (y_2[n] - y[n-1])
+ out_data[k] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[2], diff, in_data[k-1]);
+ }
+ filter_state[4] = in_data[data_length - 1]; // y_2[N-1], becomes y_2[-1] next time
+ filter_state[5] = out_data[data_length - 1]; // y[N-1], becomes y[-1] next time
+}
+
+void WebRtcSpl_AnalysisQMF(const int16_t* in_data, size_t in_data_length,
+ int16_t* low_band, int16_t* high_band,
+ int32_t* filter_state1, int32_t* filter_state2)
+{
+ size_t i;
+ int16_t k;
+ int32_t tmp;
+ int32_t half_in1[kMaxBandFrameLength];
+ int32_t half_in2[kMaxBandFrameLength];
+ int32_t filter1[kMaxBandFrameLength];
+ int32_t filter2[kMaxBandFrameLength];
+ const size_t band_length = in_data_length / 2;
+ assert(in_data_length % 2 == 0);
+ assert(band_length <= kMaxBandFrameLength);
+
+ // Split even and odd samples. Also shift them to Q10.
+ for (i = 0, k = 0; i < band_length; i++, k += 2)
+ {
+ half_in2[i] = WEBRTC_SPL_LSHIFT_W32((int32_t)in_data[k], 10);
+ half_in1[i] = WEBRTC_SPL_LSHIFT_W32((int32_t)in_data[k + 1], 10);
+ }
+
+ // All pass filter even and odd samples, independently.
+ WebRtcSpl_AllPassQMF(half_in1, band_length, filter1,
+ WebRtcSpl_kAllPassFilter1, filter_state1);
+ WebRtcSpl_AllPassQMF(half_in2, band_length, filter2,
+ WebRtcSpl_kAllPassFilter2, filter_state2);
+
+ // Take the sum and difference of filtered version of odd and even
+ // branches to get upper & lower band.
+ for (i = 0; i < band_length; i++)
+ {
+ tmp = (filter1[i] + filter2[i] + 1024) >> 11;
+ low_band[i] = WebRtcSpl_SatW32ToW16(tmp);
+
+ tmp = (filter1[i] - filter2[i] + 1024) >> 11;
+ high_band[i] = WebRtcSpl_SatW32ToW16(tmp);
+ }
+}
+
+void WebRtcSpl_SynthesisQMF(const int16_t* low_band, const int16_t* high_band,
+ size_t band_length, int16_t* out_data,
+ int32_t* filter_state1, int32_t* filter_state2)
+{
+ int32_t tmp;
+ int32_t half_in1[kMaxBandFrameLength];
+ int32_t half_in2[kMaxBandFrameLength];
+ int32_t filter1[kMaxBandFrameLength];
+ int32_t filter2[kMaxBandFrameLength];
+ size_t i;
+ int16_t k;
+ assert(band_length <= kMaxBandFrameLength);
+
+ // Obtain the sum and difference channels out of upper and lower-band channels.
+ // Also shift to Q10 domain.
+ for (i = 0; i < band_length; i++)
+ {
+ tmp = (int32_t)low_band[i] + (int32_t)high_band[i];
+ half_in1[i] = WEBRTC_SPL_LSHIFT_W32(tmp, 10);
+ tmp = (int32_t)low_band[i] - (int32_t)high_band[i];
+ half_in2[i] = WEBRTC_SPL_LSHIFT_W32(tmp, 10);
+ }
+
+ // all-pass filter the sum and difference channels
+ WebRtcSpl_AllPassQMF(half_in1, band_length, filter1,
+ WebRtcSpl_kAllPassFilter2, filter_state1);
+ WebRtcSpl_AllPassQMF(half_in2, band_length, filter2,
+ WebRtcSpl_kAllPassFilter1, filter_state2);
+
+ // The filtered signals are even and odd samples of the output. Combine
+ // them. The signals are Q10 should shift them back to Q0 and take care of
+ // saturation.
+ for (i = 0, k = 0; i < band_length; i++)
+ {
+ tmp = (filter2[i] + 512) >> 10;
+ out_data[k++] = WebRtcSpl_SatW32ToW16(tmp);
+
+ tmp = (filter1[i] + 512) >> 10;
+ out_data[k++] = WebRtcSpl_SatW32ToW16(tmp);
+ }
+
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/sqrt_of_one_minus_x_squared.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/sqrt_of_one_minus_x_squared.c
new file mode 100644
index 00000000..ff78b522
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/sqrt_of_one_minus_x_squared.c
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_SqrtOfOneMinusXSquared().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+void WebRtcSpl_SqrtOfOneMinusXSquared(int16_t *xQ15, size_t vector_length,
+ int16_t *yQ15)
+{
+ int32_t sq;
+ size_t m;
+ int16_t tmp;
+
+ for (m = 0; m < vector_length; m++)
+ {
+ tmp = xQ15[m];
+ sq = tmp * tmp; // x^2 in Q30
+ sq = 1073741823 - sq; // 1-x^2, where 1 ~= 0.99999999906 is 1073741823 in Q30
+ sq = WebRtcSpl_Sqrt(sq); // sqrt(1-x^2) in Q15
+ yQ15[m] = (int16_t)sq;
+ }
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/vector_scaling_operations.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/vector_scaling_operations.c
new file mode 100644
index 00000000..fdefd067
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/vector_scaling_operations.c
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains implementations of the functions
+ * WebRtcSpl_VectorBitShiftW16()
+ * WebRtcSpl_VectorBitShiftW32()
+ * WebRtcSpl_VectorBitShiftW32ToW16()
+ * WebRtcSpl_ScaleVector()
+ * WebRtcSpl_ScaleVectorWithSat()
+ * WebRtcSpl_ScaleAndAddVectors()
+ * WebRtcSpl_ScaleAndAddVectorsWithRoundC()
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+void WebRtcSpl_VectorBitShiftW16(int16_t *res, size_t length,
+ const int16_t *in, int16_t right_shifts)
+{
+ size_t i;
+
+ if (right_shifts > 0)
+ {
+ for (i = length; i > 0; i--)
+ {
+ (*res++) = ((*in++) >> right_shifts);
+ }
+ } else
+ {
+ for (i = length; i > 0; i--)
+ {
+ (*res++) = ((*in++) << (-right_shifts));
+ }
+ }
+}
+
+void WebRtcSpl_VectorBitShiftW32(int32_t *out_vector,
+ size_t vector_length,
+ const int32_t *in_vector,
+ int16_t right_shifts)
+{
+ size_t i;
+
+ if (right_shifts > 0)
+ {
+ for (i = vector_length; i > 0; i--)
+ {
+ (*out_vector++) = ((*in_vector++) >> right_shifts);
+ }
+ } else
+ {
+ for (i = vector_length; i > 0; i--)
+ {
+ (*out_vector++) = ((*in_vector++) << (-right_shifts));
+ }
+ }
+}
+
+void WebRtcSpl_VectorBitShiftW32ToW16(int16_t* out, size_t length,
+ const int32_t* in, int right_shifts) {
+ size_t i;
+ int32_t tmp_w32;
+
+ if (right_shifts >= 0) {
+ for (i = length; i > 0; i--) {
+ tmp_w32 = (*in++) >> right_shifts;
+ (*out++) = WebRtcSpl_SatW32ToW16(tmp_w32);
+ }
+ } else {
+ int left_shifts = -right_shifts;
+ for (i = length; i > 0; i--) {
+ tmp_w32 = (*in++) << left_shifts;
+ (*out++) = WebRtcSpl_SatW32ToW16(tmp_w32);
+ }
+ }
+}
+
+void WebRtcSpl_ScaleVector(const int16_t *in_vector, int16_t *out_vector,
+ int16_t gain, size_t in_vector_length,
+ int16_t right_shifts)
+{
+ // Performs vector operation: out_vector = (gain*in_vector)>>right_shifts
+ size_t i;
+ const int16_t *inptr;
+ int16_t *outptr;
+
+ inptr = in_vector;
+ outptr = out_vector;
+
+ for (i = 0; i < in_vector_length; i++)
+ {
+ *outptr++ = (int16_t)((*inptr++ * gain) >> right_shifts);
+ }
+}
+
+void WebRtcSpl_ScaleVectorWithSat(const int16_t *in_vector, int16_t *out_vector,
+ int16_t gain, size_t in_vector_length,
+ int16_t right_shifts)
+{
+ // Performs vector operation: out_vector = (gain*in_vector)>>right_shifts
+ size_t i;
+ const int16_t *inptr;
+ int16_t *outptr;
+
+ inptr = in_vector;
+ outptr = out_vector;
+
+ for (i = 0; i < in_vector_length; i++) {
+ *outptr++ = WebRtcSpl_SatW32ToW16((*inptr++ * gain) >> right_shifts);
+ }
+}
+
+void WebRtcSpl_ScaleAndAddVectors(const int16_t *in1, int16_t gain1, int shift1,
+ const int16_t *in2, int16_t gain2, int shift2,
+ int16_t *out, size_t vector_length)
+{
+ // Performs vector operation: out = (gain1*in1)>>shift1 + (gain2*in2)>>shift2
+ size_t i;
+ const int16_t *in1ptr;
+ const int16_t *in2ptr;
+ int16_t *outptr;
+
+ in1ptr = in1;
+ in2ptr = in2;
+ outptr = out;
+
+ for (i = 0; i < vector_length; i++)
+ {
+ *outptr++ = (int16_t)((gain1 * *in1ptr++) >> shift1) +
+ (int16_t)((gain2 * *in2ptr++) >> shift2);
+ }
+}
+
+// C version of WebRtcSpl_ScaleAndAddVectorsWithRound() for generic platforms.
+int WebRtcSpl_ScaleAndAddVectorsWithRoundC(const int16_t* in_vector1,
+ int16_t in_vector1_scale,
+ const int16_t* in_vector2,
+ int16_t in_vector2_scale,
+ int right_shifts,
+ int16_t* out_vector,
+ size_t length) {
+ size_t i = 0;
+ int round_value = (1 << right_shifts) >> 1;
+
+ if (in_vector1 == NULL || in_vector2 == NULL || out_vector == NULL ||
+ length == 0 || right_shifts < 0) {
+ return -1;
+ }
+
+ for (i = 0; i < length; i++) {
+ out_vector[i] = (int16_t)((
+ in_vector1[i] * in_vector1_scale + in_vector2[i] * in_vector2_scale +
+ round_value) >> right_shifts);
+ }
+
+ return 0;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/vector_scaling_operations_mips.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/vector_scaling_operations_mips.c
new file mode 100644
index 00000000..dd73eeae
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/vector_scaling_operations_mips.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains implementations of the functions
+ * WebRtcSpl_ScaleAndAddVectorsWithRound_mips()
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+int WebRtcSpl_ScaleAndAddVectorsWithRound_mips(const int16_t* in_vector1,
+ int16_t in_vector1_scale,
+ const int16_t* in_vector2,
+ int16_t in_vector2_scale,
+ int right_shifts,
+ int16_t* out_vector,
+ size_t length) {
+ int16_t r0 = 0, r1 = 0;
+ int16_t *in1 = (int16_t*)in_vector1;
+ int16_t *in2 = (int16_t*)in_vector2;
+ int16_t *out = out_vector;
+ size_t i = 0;
+ int value32 = 0;
+
+ if (in_vector1 == NULL || in_vector2 == NULL || out_vector == NULL ||
+ length == 0 || right_shifts < 0) {
+ return -1;
+ }
+ for (i = 0; i < length; i++) {
+ __asm __volatile (
+ "lh %[r0], 0(%[in1]) \n\t"
+ "lh %[r1], 0(%[in2]) \n\t"
+ "mult %[r0], %[in_vector1_scale] \n\t"
+ "madd %[r1], %[in_vector2_scale] \n\t"
+ "extrv_r.w %[value32], $ac0, %[right_shifts] \n\t"
+ "addiu %[in1], %[in1], 2 \n\t"
+ "addiu %[in2], %[in2], 2 \n\t"
+ "sh %[value32], 0(%[out]) \n\t"
+ "addiu %[out], %[out], 2 \n\t"
+ : [value32] "=&r" (value32), [out] "+r" (out), [in1] "+r" (in1),
+ [in2] "+r" (in2), [r0] "=&r" (r0), [r1] "=&r" (r1)
+ : [in_vector1_scale] "r" (in_vector1_scale),
+ [in_vector2_scale] "r" (in_vector2_scale),
+ [right_shifts] "r" (right_shifts)
+ : "hi", "lo", "memory"
+ );
+ }
+ return 0;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/wav_file.h b/third_party/webrtc/src/webrtc/common_audio/wav_file.h
new file mode 100644
index 00000000..2eadd3f7
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/wav_file.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_COMMON_AUDIO_WAV_FILE_H_
+#define WEBRTC_COMMON_AUDIO_WAV_FILE_H_
+
+#ifdef __cplusplus
+
+#include <stdint.h>
+#include <cstddef>
+#include <string>
+
+#include "webrtc/base/constructormagic.h"
+
+namespace webrtc {
+
+// Interface to provide access to WAV file parameters.
+class WavFile {
+ public:
+ virtual ~WavFile() {}
+
+ virtual int sample_rate() const = 0;
+ virtual int num_channels() const = 0;
+ virtual uint32_t num_samples() const = 0;
+};
+
+// Simple C++ class for writing 16-bit PCM WAV files. All error handling is
+// by calls to RTC_CHECK(), making it unsuitable for anything but debug code.
+class WavWriter final : public WavFile {
+ public:
+ // Open a new WAV file for writing.
+ WavWriter(const std::string& filename, int sample_rate, int num_channels);
+
+ // Close the WAV file, after writing its header.
+ ~WavWriter();
+
+ // Write additional samples to the file. Each sample is in the range
+ // [-32768,32767], and there must be the previously specified number of
+ // interleaved channels.
+ void WriteSamples(const float* samples, size_t num_samples);
+ void WriteSamples(const int16_t* samples, size_t num_samples);
+
+ int sample_rate() const override { return sample_rate_; }
+ int num_channels() const override { return num_channels_; }
+ uint32_t num_samples() const override { return num_samples_; }
+
+ private:
+ void Close();
+ const int sample_rate_;
+ const int num_channels_;
+ uint32_t num_samples_; // Total number of samples written to file.
+ FILE* file_handle_; // Output file, owned by this class
+
+ RTC_DISALLOW_COPY_AND_ASSIGN(WavWriter);
+};
+
+// Follows the conventions of WavWriter.
+class WavReader final : public WavFile {
+ public:
+ // Opens an existing WAV file for reading.
+ explicit WavReader(const std::string& filename);
+
+ // Close the WAV file.
+ ~WavReader();
+
+ // Returns the number of samples read. If this is less than requested,
+ // verifies that the end of the file was reached.
+ size_t ReadSamples(size_t num_samples, float* samples);
+ size_t ReadSamples(size_t num_samples, int16_t* samples);
+
+ int sample_rate() const override { return sample_rate_; }
+ int num_channels() const override { return num_channels_; }
+ uint32_t num_samples() const override { return num_samples_; }
+
+ private:
+ void Close();
+ int sample_rate_;
+ int num_channels_;
+ uint32_t num_samples_; // Total number of samples in the file.
+ uint32_t num_samples_remaining_;
+ FILE* file_handle_; // Input file, owned by this class.
+
+ RTC_DISALLOW_COPY_AND_ASSIGN(WavReader);
+};
+
+} // namespace webrtc
+
+extern "C" {
+#endif // __cplusplus
+
+// C wrappers for the WavWriter class.
+typedef struct rtc_WavWriter rtc_WavWriter;
+rtc_WavWriter* rtc_WavOpen(const char* filename,
+ int sample_rate,
+ int num_channels);
+void rtc_WavClose(rtc_WavWriter* wf);
+void rtc_WavWriteSamples(rtc_WavWriter* wf,
+ const float* samples,
+ size_t num_samples);
+int rtc_WavSampleRate(const rtc_WavWriter* wf);
+int rtc_WavNumChannels(const rtc_WavWriter* wf);
+uint32_t rtc_WavNumSamples(const rtc_WavWriter* wf);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // WEBRTC_COMMON_AUDIO_WAV_FILE_H_
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_common.h b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_common.h
new file mode 100644
index 00000000..1e24ca99
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_common.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_COMMON_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_COMMON_H_
+
+#include "webrtc/typedefs.h"
+
+#ifdef _MSC_VER /* visual c++ */
+#define ALIGN16_BEG __declspec(align(16))
+#define ALIGN16_END
+#else /* gcc or icc */
+#define ALIGN16_BEG
+#define ALIGN16_END __attribute__((aligned(16)))
+#endif
+
+extern ALIGN16_BEG const float ALIGN16_END WebRtcAec_sqrtHanning[65];
+extern ALIGN16_BEG const float ALIGN16_END WebRtcAec_weightCurve[65];
+extern ALIGN16_BEG const float ALIGN16_END WebRtcAec_overDriveCurve[65];
+extern const float WebRtcAec_kExtendedSmoothingCoefficients[2][2];
+extern const float WebRtcAec_kNormalSmoothingCoefficients[2][2];
+extern const float WebRtcAec_kMinFarendPSD;
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_COMMON_H_
+
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core.c
new file mode 100644
index 00000000..b2162ac0
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core.c
@@ -0,0 +1,1929 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * The core AEC algorithm, which is presented with time-aligned signals.
+ */
+
+#include "webrtc/modules/audio_processing/aec/aec_core.h"
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+#include <stdio.h>
+#endif
+
+#include <assert.h>
+#include <math.h>
+#include <stddef.h> // size_t
+#include <stdlib.h>
+#include <string.h>
+
+#include "webrtc/common_audio/ring_buffer.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_processing/aec/aec_common.h"
+#include "webrtc/modules/audio_processing/aec/aec_core_internal.h"
+#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
+#include "webrtc/modules/audio_processing/logging/aec_logging.h"
+#include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h"
+#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h"
+#include "webrtc/typedefs.h"
+
+
+// Buffer size (samples)
+static const size_t kBufSizePartitions = 250; // 1 second of audio in 16 kHz.
+
+// Metrics
+static const int subCountLen = 4;
+static const int countLen = 50;
+static const int kDelayMetricsAggregationWindow = 1250; // 5 seconds at 16 kHz.
+
+// Quantities to control H band scaling for SWB input
+static const int flagHbandCn = 1; // flag for adding comfort noise in H band
+static const float cnScaleHband =
+ (float)0.4; // scale for comfort noise in H band
+// Initial bin for averaging nlp gain in low band
+static const int freqAvgIc = PART_LEN / 2;
+
+// Matlab code to produce table:
+// win = sqrt(hanning(63)); win = [0 ; win(1:32)];
+// fprintf(1, '\t%.14f, %.14f, %.14f,\n', win);
+ALIGN16_BEG const float ALIGN16_END WebRtcAec_sqrtHanning[65] = {
+ 0.00000000000000f, 0.02454122852291f, 0.04906767432742f, 0.07356456359967f,
+ 0.09801714032956f, 0.12241067519922f, 0.14673047445536f, 0.17096188876030f,
+ 0.19509032201613f, 0.21910124015687f, 0.24298017990326f, 0.26671275747490f,
+ 0.29028467725446f, 0.31368174039889f, 0.33688985339222f, 0.35989503653499f,
+ 0.38268343236509f, 0.40524131400499f, 0.42755509343028f, 0.44961132965461f,
+ 0.47139673682600f, 0.49289819222978f, 0.51410274419322f, 0.53499761988710f,
+ 0.55557023301960f, 0.57580819141785f, 0.59569930449243f, 0.61523159058063f,
+ 0.63439328416365f, 0.65317284295378f, 0.67155895484702f, 0.68954054473707f,
+ 0.70710678118655f, 0.72424708295147f, 0.74095112535496f, 0.75720884650648f,
+ 0.77301045336274f, 0.78834642762661f, 0.80320753148064f, 0.81758481315158f,
+ 0.83146961230255f, 0.84485356524971f, 0.85772861000027f, 0.87008699110871f,
+ 0.88192126434835f, 0.89322430119552f, 0.90398929312344f, 0.91420975570353f,
+ 0.92387953251129f, 0.93299279883474f, 0.94154406518302f, 0.94952818059304f,
+ 0.95694033573221f, 0.96377606579544f, 0.97003125319454f, 0.97570213003853f,
+ 0.98078528040323f, 0.98527764238894f, 0.98917650996478f, 0.99247953459871f,
+ 0.99518472667220f, 0.99729045667869f, 0.99879545620517f, 0.99969881869620f,
+ 1.00000000000000f};
+
+// Matlab code to produce table:
+// weightCurve = [0 ; 0.3 * sqrt(linspace(0,1,64))' + 0.1];
+// fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', weightCurve);
+ALIGN16_BEG const float ALIGN16_END WebRtcAec_weightCurve[65] = {
+ 0.0000f, 0.1000f, 0.1378f, 0.1535f, 0.1655f, 0.1756f, 0.1845f, 0.1926f,
+ 0.2000f, 0.2069f, 0.2134f, 0.2195f, 0.2254f, 0.2309f, 0.2363f, 0.2414f,
+ 0.2464f, 0.2512f, 0.2558f, 0.2604f, 0.2648f, 0.2690f, 0.2732f, 0.2773f,
+ 0.2813f, 0.2852f, 0.2890f, 0.2927f, 0.2964f, 0.3000f, 0.3035f, 0.3070f,
+ 0.3104f, 0.3138f, 0.3171f, 0.3204f, 0.3236f, 0.3268f, 0.3299f, 0.3330f,
+ 0.3360f, 0.3390f, 0.3420f, 0.3449f, 0.3478f, 0.3507f, 0.3535f, 0.3563f,
+ 0.3591f, 0.3619f, 0.3646f, 0.3673f, 0.3699f, 0.3726f, 0.3752f, 0.3777f,
+ 0.3803f, 0.3828f, 0.3854f, 0.3878f, 0.3903f, 0.3928f, 0.3952f, 0.3976f,
+ 0.4000f};
+
+// Matlab code to produce table:
+// overDriveCurve = [sqrt(linspace(0,1,65))' + 1];
+// fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', overDriveCurve);
+ALIGN16_BEG const float ALIGN16_END WebRtcAec_overDriveCurve[65] = {
+ 1.0000f, 1.1250f, 1.1768f, 1.2165f, 1.2500f, 1.2795f, 1.3062f, 1.3307f,
+ 1.3536f, 1.3750f, 1.3953f, 1.4146f, 1.4330f, 1.4507f, 1.4677f, 1.4841f,
+ 1.5000f, 1.5154f, 1.5303f, 1.5449f, 1.5590f, 1.5728f, 1.5863f, 1.5995f,
+ 1.6124f, 1.6250f, 1.6374f, 1.6495f, 1.6614f, 1.6731f, 1.6847f, 1.6960f,
+ 1.7071f, 1.7181f, 1.7289f, 1.7395f, 1.7500f, 1.7603f, 1.7706f, 1.7806f,
+ 1.7906f, 1.8004f, 1.8101f, 1.8197f, 1.8292f, 1.8385f, 1.8478f, 1.8570f,
+ 1.8660f, 1.8750f, 1.8839f, 1.8927f, 1.9014f, 1.9100f, 1.9186f, 1.9270f,
+ 1.9354f, 1.9437f, 1.9520f, 1.9601f, 1.9682f, 1.9763f, 1.9843f, 1.9922f,
+ 2.0000f};
+
+// Delay Agnostic AEC parameters, still under development and may change.
+static const float kDelayQualityThresholdMax = 0.07f;
+static const float kDelayQualityThresholdMin = 0.01f;
+static const int kInitialShiftOffset = 5;
+#if !defined(WEBRTC_ANDROID)
+static const int kDelayCorrectionStart = 1500; // 10 ms chunks
+#endif
+
+// Target suppression levels for nlp modes.
+// log{0.001, 0.00001, 0.00000001}
+static const float kTargetSupp[3] = {-6.9f, -11.5f, -18.4f};
+
+// Two sets of parameters, one for the extended filter mode.
+static const float kExtendedMinOverDrive[3] = {3.0f, 6.0f, 15.0f};
+static const float kNormalMinOverDrive[3] = {1.0f, 2.0f, 5.0f};
+const float WebRtcAec_kExtendedSmoothingCoefficients[2][2] = {{0.9f, 0.1f},
+ {0.92f, 0.08f}};
+const float WebRtcAec_kNormalSmoothingCoefficients[2][2] = {{0.9f, 0.1f},
+ {0.93f, 0.07f}};
+
+// Number of partitions forming the NLP's "preferred" bands.
+enum {
+ kPrefBandSize = 24
+};
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+extern int webrtc_aec_instance_count;
+#endif
+
+WebRtcAecFilterFar WebRtcAec_FilterFar;
+WebRtcAecScaleErrorSignal WebRtcAec_ScaleErrorSignal;
+WebRtcAecFilterAdaptation WebRtcAec_FilterAdaptation;
+WebRtcAecOverdriveAndSuppress WebRtcAec_OverdriveAndSuppress;
+WebRtcAecComfortNoise WebRtcAec_ComfortNoise;
+WebRtcAecSubBandCoherence WebRtcAec_SubbandCoherence;
+
+__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) {
+ return aRe * bRe - aIm * bIm;
+}
+
+__inline static float MulIm(float aRe, float aIm, float bRe, float bIm) {
+ return aRe * bIm + aIm * bRe;
+}
+
+static int CmpFloat(const void* a, const void* b) {
+ const float* da = (const float*)a;
+ const float* db = (const float*)b;
+
+ return (*da > *db) - (*da < *db);
+}
+
+static void FilterFar(AecCore* aec, float yf[2][PART_LEN1]) {
+ int i;
+ for (i = 0; i < aec->num_partitions; i++) {
+ int j;
+ int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
+ int pos = i * PART_LEN1;
+ // Check for wrap
+ if (i + aec->xfBufBlockPos >= aec->num_partitions) {
+ xPos -= aec->num_partitions * (PART_LEN1);
+ }
+
+ for (j = 0; j < PART_LEN1; j++) {
+ yf[0][j] += MulRe(aec->xfBuf[0][xPos + j],
+ aec->xfBuf[1][xPos + j],
+ aec->wfBuf[0][pos + j],
+ aec->wfBuf[1][pos + j]);
+ yf[1][j] += MulIm(aec->xfBuf[0][xPos + j],
+ aec->xfBuf[1][xPos + j],
+ aec->wfBuf[0][pos + j],
+ aec->wfBuf[1][pos + j]);
+ }
+ }
+}
+
+static void ScaleErrorSignal(AecCore* aec, float ef[2][PART_LEN1]) {
+ const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
+ const float error_threshold = aec->extended_filter_enabled
+ ? kExtendedErrorThreshold
+ : aec->normal_error_threshold;
+ int i;
+ float abs_ef;
+ for (i = 0; i < (PART_LEN1); i++) {
+ ef[0][i] /= (aec->xPow[i] + 1e-10f);
+ ef[1][i] /= (aec->xPow[i] + 1e-10f);
+ abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]);
+
+ if (abs_ef > error_threshold) {
+ abs_ef = error_threshold / (abs_ef + 1e-10f);
+ ef[0][i] *= abs_ef;
+ ef[1][i] *= abs_ef;
+ }
+
+ // Stepsize factor
+ ef[0][i] *= mu;
+ ef[1][i] *= mu;
+ }
+}
+
+// Time-unconstrined filter adaptation.
+// TODO(andrew): consider for a low-complexity mode.
+// static void FilterAdaptationUnconstrained(AecCore* aec, float *fft,
+// float ef[2][PART_LEN1]) {
+// int i, j;
+// for (i = 0; i < aec->num_partitions; i++) {
+// int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1);
+// int pos;
+// // Check for wrap
+// if (i + aec->xfBufBlockPos >= aec->num_partitions) {
+// xPos -= aec->num_partitions * PART_LEN1;
+// }
+//
+// pos = i * PART_LEN1;
+//
+// for (j = 0; j < PART_LEN1; j++) {
+// aec->wfBuf[0][pos + j] += MulRe(aec->xfBuf[0][xPos + j],
+// -aec->xfBuf[1][xPos + j],
+// ef[0][j], ef[1][j]);
+// aec->wfBuf[1][pos + j] += MulIm(aec->xfBuf[0][xPos + j],
+// -aec->xfBuf[1][xPos + j],
+// ef[0][j], ef[1][j]);
+// }
+// }
+//}
+
+static void FilterAdaptation(AecCore* aec, float* fft, float ef[2][PART_LEN1]) {
+ int i, j;
+ for (i = 0; i < aec->num_partitions; i++) {
+ int xPos = (i + aec->xfBufBlockPos) * (PART_LEN1);
+ int pos;
+ // Check for wrap
+ if (i + aec->xfBufBlockPos >= aec->num_partitions) {
+ xPos -= aec->num_partitions * PART_LEN1;
+ }
+
+ pos = i * PART_LEN1;
+
+ for (j = 0; j < PART_LEN; j++) {
+
+ fft[2 * j] = MulRe(aec->xfBuf[0][xPos + j],
+ -aec->xfBuf[1][xPos + j],
+ ef[0][j],
+ ef[1][j]);
+ fft[2 * j + 1] = MulIm(aec->xfBuf[0][xPos + j],
+ -aec->xfBuf[1][xPos + j],
+ ef[0][j],
+ ef[1][j]);
+ }
+ fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN],
+ -aec->xfBuf[1][xPos + PART_LEN],
+ ef[0][PART_LEN],
+ ef[1][PART_LEN]);
+
+ aec_rdft_inverse_128(fft);
+ memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
+
+ // fft scaling
+ {
+ float scale = 2.0f / PART_LEN2;
+ for (j = 0; j < PART_LEN; j++) {
+ fft[j] *= scale;
+ }
+ }
+ aec_rdft_forward_128(fft);
+
+ aec->wfBuf[0][pos] += fft[0];
+ aec->wfBuf[0][pos + PART_LEN] += fft[1];
+
+ for (j = 1; j < PART_LEN; j++) {
+ aec->wfBuf[0][pos + j] += fft[2 * j];
+ aec->wfBuf[1][pos + j] += fft[2 * j + 1];
+ }
+ }
+}
+
+static void OverdriveAndSuppress(AecCore* aec,
+ float hNl[PART_LEN1],
+ const float hNlFb,
+ float efw[2][PART_LEN1]) {
+ int i;
+ for (i = 0; i < PART_LEN1; i++) {
+ // Weight subbands
+ if (hNl[i] > hNlFb) {
+ hNl[i] = WebRtcAec_weightCurve[i] * hNlFb +
+ (1 - WebRtcAec_weightCurve[i]) * hNl[i];
+ }
+ hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]);
+
+ // Suppress error signal
+ efw[0][i] *= hNl[i];
+ efw[1][i] *= hNl[i];
+
+ // Ooura fft returns incorrect sign on imaginary component. It matters here
+ // because we are making an additive change with comfort noise.
+ efw[1][i] *= -1;
+ }
+}
+
+static int PartitionDelay(const AecCore* aec) {
+ // Measures the energy in each filter partition and returns the partition with
+ // highest energy.
+ // TODO(bjornv): Spread computational cost by computing one partition per
+ // block?
+ float wfEnMax = 0;
+ int i;
+ int delay = 0;
+
+ for (i = 0; i < aec->num_partitions; i++) {
+ int j;
+ int pos = i * PART_LEN1;
+ float wfEn = 0;
+ for (j = 0; j < PART_LEN1; j++) {
+ wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] +
+ aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j];
+ }
+
+ if (wfEn > wfEnMax) {
+ wfEnMax = wfEn;
+ delay = i;
+ }
+ }
+ return delay;
+}
+
+// Threshold to protect against the ill-effects of a zero far-end.
+const float WebRtcAec_kMinFarendPSD = 15;
+
+// Updates the following smoothed Power Spectral Densities (PSD):
+// - sd : near-end
+// - se : residual echo
+// - sx : far-end
+// - sde : cross-PSD of near-end and residual echo
+// - sxd : cross-PSD of near-end and far-end
+//
+// In addition to updating the PSDs, also the filter diverge state is determined
+// upon actions are taken.
+static void SmoothedPSD(AecCore* aec,
+ float efw[2][PART_LEN1],
+ float dfw[2][PART_LEN1],
+ float xfw[2][PART_LEN1]) {
+ // Power estimate smoothing coefficients.
+ const float* ptrGCoh = aec->extended_filter_enabled
+ ? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1]
+ : WebRtcAec_kNormalSmoothingCoefficients[aec->mult - 1];
+ int i;
+ float sdSum = 0, seSum = 0;
+
+ for (i = 0; i < PART_LEN1; i++) {
+ aec->sd[i] = ptrGCoh[0] * aec->sd[i] +
+ ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]);
+ aec->se[i] = ptrGCoh[0] * aec->se[i] +
+ ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]);
+ // We threshold here to protect against the ill-effects of a zero farend.
+ // The threshold is not arbitrarily chosen, but balances protection and
+ // adverse interaction with the algorithm's tuning.
+ // TODO(bjornv): investigate further why this is so sensitive.
+ aec->sx[i] =
+ ptrGCoh[0] * aec->sx[i] +
+ ptrGCoh[1] * WEBRTC_SPL_MAX(
+ xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i],
+ WebRtcAec_kMinFarendPSD);
+
+ aec->sde[i][0] =
+ ptrGCoh[0] * aec->sde[i][0] +
+ ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]);
+ aec->sde[i][1] =
+ ptrGCoh[0] * aec->sde[i][1] +
+ ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]);
+
+ aec->sxd[i][0] =
+ ptrGCoh[0] * aec->sxd[i][0] +
+ ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]);
+ aec->sxd[i][1] =
+ ptrGCoh[0] * aec->sxd[i][1] +
+ ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]);
+
+ sdSum += aec->sd[i];
+ seSum += aec->se[i];
+ }
+
+ // Divergent filter safeguard.
+ aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum;
+
+ if (aec->divergeState)
+ memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1);
+
+ // Reset if error is significantly larger than nearend (13 dB).
+ if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum))
+ memset(aec->wfBuf, 0, sizeof(aec->wfBuf));
+}
+
+// Window time domain data to be used by the fft.
+__inline static void WindowData(float* x_windowed, const float* x) {
+ int i;
+ for (i = 0; i < PART_LEN; i++) {
+ x_windowed[i] = x[i] * WebRtcAec_sqrtHanning[i];
+ x_windowed[PART_LEN + i] =
+ x[PART_LEN + i] * WebRtcAec_sqrtHanning[PART_LEN - i];
+ }
+}
+
+// Puts fft output data into a complex valued array.
+__inline static void StoreAsComplex(const float* data,
+ float data_complex[2][PART_LEN1]) {
+ int i;
+ data_complex[0][0] = data[0];
+ data_complex[1][0] = 0;
+ for (i = 1; i < PART_LEN; i++) {
+ data_complex[0][i] = data[2 * i];
+ data_complex[1][i] = data[2 * i + 1];
+ }
+ data_complex[0][PART_LEN] = data[1];
+ data_complex[1][PART_LEN] = 0;
+}
+
+static void SubbandCoherence(AecCore* aec,
+ float efw[2][PART_LEN1],
+ float xfw[2][PART_LEN1],
+ float* fft,
+ float* cohde,
+ float* cohxd) {
+ float dfw[2][PART_LEN1];
+ int i;
+
+ if (aec->delayEstCtr == 0)
+ aec->delayIdx = PartitionDelay(aec);
+
+ // Use delayed far.
+ memcpy(xfw,
+ aec->xfwBuf + aec->delayIdx * PART_LEN1,
+ sizeof(xfw[0][0]) * 2 * PART_LEN1);
+
+ // Windowed near fft
+ WindowData(fft, aec->dBuf);
+ aec_rdft_forward_128(fft);
+ StoreAsComplex(fft, dfw);
+
+ // Windowed error fft
+ WindowData(fft, aec->eBuf);
+ aec_rdft_forward_128(fft);
+ StoreAsComplex(fft, efw);
+
+ SmoothedPSD(aec, efw, dfw, xfw);
+
+ // Subband coherence
+ for (i = 0; i < PART_LEN1; i++) {
+ cohde[i] =
+ (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) /
+ (aec->sd[i] * aec->se[i] + 1e-10f);
+ cohxd[i] =
+ (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) /
+ (aec->sx[i] * aec->sd[i] + 1e-10f);
+ }
+}
+
+static void GetHighbandGain(const float* lambda, float* nlpGainHband) {
+ int i;
+
+ nlpGainHband[0] = (float)0.0;
+ for (i = freqAvgIc; i < PART_LEN1 - 1; i++) {
+ nlpGainHband[0] += lambda[i];
+ }
+ nlpGainHband[0] /= (float)(PART_LEN1 - 1 - freqAvgIc);
+}
+
+static void ComfortNoise(AecCore* aec,
+ float efw[2][PART_LEN1],
+ complex_t* comfortNoiseHband,
+ const float* noisePow,
+ const float* lambda) {
+ int i, num;
+ float rand[PART_LEN];
+ float noise, noiseAvg, tmp, tmpAvg;
+ int16_t randW16[PART_LEN];
+ complex_t u[PART_LEN1];
+
+ const float pi2 = 6.28318530717959f;
+
+ // Generate a uniform random array on [0 1]
+ WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed);
+ for (i = 0; i < PART_LEN; i++) {
+ rand[i] = ((float)randW16[i]) / 32768;
+ }
+
+ // Reject LF noise
+ u[0][0] = 0;
+ u[0][1] = 0;
+ for (i = 1; i < PART_LEN1; i++) {
+ tmp = pi2 * rand[i - 1];
+
+ noise = sqrtf(noisePow[i]);
+ u[i][0] = noise * cosf(tmp);
+ u[i][1] = -noise * sinf(tmp);
+ }
+ u[PART_LEN][1] = 0;
+
+ for (i = 0; i < PART_LEN1; i++) {
+ // This is the proper weighting to match the background noise power
+ tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0));
+ // tmp = 1 - lambda[i];
+ efw[0][i] += tmp * u[i][0];
+ efw[1][i] += tmp * u[i][1];
+ }
+
+ // For H band comfort noise
+ // TODO: don't compute noise and "tmp" twice. Use the previous results.
+ noiseAvg = 0.0;
+ tmpAvg = 0.0;
+ num = 0;
+ if (aec->num_bands > 1 && flagHbandCn == 1) {
+
+ // average noise scale
+ // average over second half of freq spectrum (i.e., 4->8khz)
+ // TODO: we shouldn't need num. We know how many elements we're summing.
+ for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
+ num++;
+ noiseAvg += sqrtf(noisePow[i]);
+ }
+ noiseAvg /= (float)num;
+
+ // average nlp scale
+ // average over second half of freq spectrum (i.e., 4->8khz)
+ // TODO: we shouldn't need num. We know how many elements we're summing.
+ num = 0;
+ for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
+ num++;
+ tmpAvg += sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0));
+ }
+ tmpAvg /= (float)num;
+
+ // Use average noise for H band
+ // TODO: we should probably have a new random vector here.
+ // Reject LF noise
+ u[0][0] = 0;
+ u[0][1] = 0;
+ for (i = 1; i < PART_LEN1; i++) {
+ tmp = pi2 * rand[i - 1];
+
+ // Use average noise for H band
+ u[i][0] = noiseAvg * (float)cos(tmp);
+ u[i][1] = -noiseAvg * (float)sin(tmp);
+ }
+ u[PART_LEN][1] = 0;
+
+ for (i = 0; i < PART_LEN1; i++) {
+ // Use average NLP weight for H band
+ comfortNoiseHband[i][0] = tmpAvg * u[i][0];
+ comfortNoiseHband[i][1] = tmpAvg * u[i][1];
+ }
+ }
+}
+
+static void InitLevel(PowerLevel* level) {
+ const float kBigFloat = 1E17f;
+
+ level->averagelevel = 0;
+ level->framelevel = 0;
+ level->minlevel = kBigFloat;
+ level->frsum = 0;
+ level->sfrsum = 0;
+ level->frcounter = 0;
+ level->sfrcounter = 0;
+}
+
+static void InitStats(Stats* stats) {
+ stats->instant = kOffsetLevel;
+ stats->average = kOffsetLevel;
+ stats->max = kOffsetLevel;
+ stats->min = kOffsetLevel * (-1);
+ stats->sum = 0;
+ stats->hisum = 0;
+ stats->himean = kOffsetLevel;
+ stats->counter = 0;
+ stats->hicounter = 0;
+}
+
+static void InitMetrics(AecCore* self) {
+ self->stateCounter = 0;
+ InitLevel(&self->farlevel);
+ InitLevel(&self->nearlevel);
+ InitLevel(&self->linoutlevel);
+ InitLevel(&self->nlpoutlevel);
+
+ InitStats(&self->erl);
+ InitStats(&self->erle);
+ InitStats(&self->aNlp);
+ InitStats(&self->rerl);
+}
+
+static void UpdateLevel(PowerLevel* level, float in[2][PART_LEN1]) {
+ // Do the energy calculation in the frequency domain. The FFT is performed on
+ // a segment of PART_LEN2 samples due to overlap, but we only want the energy
+ // of half that data (the last PART_LEN samples). Parseval's relation states
+ // that the energy is preserved according to
+ //
+ // \sum_{n=0}^{N-1} |x(n)|^2 = 1/N * \sum_{n=0}^{N-1} |X(n)|^2
+ // = ENERGY,
+ //
+ // where N = PART_LEN2. Since we are only interested in calculating the energy
+ // for the last PART_LEN samples we approximate by calculating ENERGY and
+ // divide by 2,
+ //
+ // \sum_{n=N/2}^{N-1} |x(n)|^2 ~= ENERGY / 2
+ //
+ // Since we deal with real valued time domain signals we only store frequency
+ // bins [0, PART_LEN], which is what |in| consists of. To calculate ENERGY we
+ // need to add the contribution from the missing part in
+ // [PART_LEN+1, PART_LEN2-1]. These values are, up to a phase shift, identical
+ // with the values in [1, PART_LEN-1], hence multiply those values by 2. This
+ // is the values in the for loop below, but multiplication by 2 and division
+ // by 2 cancel.
+
+ // TODO(bjornv): Investigate reusing energy calculations performed at other
+ // places in the code.
+ int k = 1;
+ // Imaginary parts are zero at end points and left out of the calculation.
+ float energy = (in[0][0] * in[0][0]) / 2;
+ energy += (in[0][PART_LEN] * in[0][PART_LEN]) / 2;
+
+ for (k = 1; k < PART_LEN; k++) {
+ energy += (in[0][k] * in[0][k] + in[1][k] * in[1][k]);
+ }
+ energy /= PART_LEN2;
+
+ level->sfrsum += energy;
+ level->sfrcounter++;
+
+ if (level->sfrcounter > subCountLen) {
+ level->framelevel = level->sfrsum / (subCountLen * PART_LEN);
+ level->sfrsum = 0;
+ level->sfrcounter = 0;
+ if (level->framelevel > 0) {
+ if (level->framelevel < level->minlevel) {
+ level->minlevel = level->framelevel; // New minimum.
+ } else {
+ level->minlevel *= (1 + 0.001f); // Small increase.
+ }
+ }
+ level->frcounter++;
+ level->frsum += level->framelevel;
+ if (level->frcounter > countLen) {
+ level->averagelevel = level->frsum / countLen;
+ level->frsum = 0;
+ level->frcounter = 0;
+ }
+ }
+}
+
+static void UpdateMetrics(AecCore* aec) {
+ float dtmp, dtmp2;
+
+ const float actThresholdNoisy = 8.0f;
+ const float actThresholdClean = 40.0f;
+ const float safety = 0.99995f;
+ const float noisyPower = 300000.0f;
+
+ float actThreshold;
+ float echo, suppressedEcho;
+
+ if (aec->echoState) { // Check if echo is likely present
+ aec->stateCounter++;
+ }
+
+ if (aec->farlevel.frcounter == 0) {
+
+ if (aec->farlevel.minlevel < noisyPower) {
+ actThreshold = actThresholdClean;
+ } else {
+ actThreshold = actThresholdNoisy;
+ }
+
+ if ((aec->stateCounter > (0.5f * countLen * subCountLen)) &&
+ (aec->farlevel.sfrcounter == 0)
+
+ // Estimate in active far-end segments only
+ &&
+ (aec->farlevel.averagelevel >
+ (actThreshold * aec->farlevel.minlevel))) {
+
+ // Subtract noise power
+ echo = aec->nearlevel.averagelevel - safety * aec->nearlevel.minlevel;
+
+ // ERL
+ dtmp = 10 * (float)log10(aec->farlevel.averagelevel /
+ aec->nearlevel.averagelevel +
+ 1e-10f);
+ dtmp2 = 10 * (float)log10(aec->farlevel.averagelevel / echo + 1e-10f);
+
+ aec->erl.instant = dtmp;
+ if (dtmp > aec->erl.max) {
+ aec->erl.max = dtmp;
+ }
+
+ if (dtmp < aec->erl.min) {
+ aec->erl.min = dtmp;
+ }
+
+ aec->erl.counter++;
+ aec->erl.sum += dtmp;
+ aec->erl.average = aec->erl.sum / aec->erl.counter;
+
+ // Upper mean
+ if (dtmp > aec->erl.average) {
+ aec->erl.hicounter++;
+ aec->erl.hisum += dtmp;
+ aec->erl.himean = aec->erl.hisum / aec->erl.hicounter;
+ }
+
+ // A_NLP
+ dtmp = 10 * (float)log10(aec->nearlevel.averagelevel /
+ (2 * aec->linoutlevel.averagelevel) +
+ 1e-10f);
+
+ // subtract noise power
+ suppressedEcho = 2 * (aec->linoutlevel.averagelevel -
+ safety * aec->linoutlevel.minlevel);
+
+ dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f);
+
+ aec->aNlp.instant = dtmp2;
+ if (dtmp > aec->aNlp.max) {
+ aec->aNlp.max = dtmp;
+ }
+
+ if (dtmp < aec->aNlp.min) {
+ aec->aNlp.min = dtmp;
+ }
+
+ aec->aNlp.counter++;
+ aec->aNlp.sum += dtmp;
+ aec->aNlp.average = aec->aNlp.sum / aec->aNlp.counter;
+
+ // Upper mean
+ if (dtmp > aec->aNlp.average) {
+ aec->aNlp.hicounter++;
+ aec->aNlp.hisum += dtmp;
+ aec->aNlp.himean = aec->aNlp.hisum / aec->aNlp.hicounter;
+ }
+
+ // ERLE
+
+ // subtract noise power
+ suppressedEcho = 2 * (aec->nlpoutlevel.averagelevel -
+ safety * aec->nlpoutlevel.minlevel);
+
+ dtmp = 10 * (float)log10(aec->nearlevel.averagelevel /
+ (2 * aec->nlpoutlevel.averagelevel) +
+ 1e-10f);
+ dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f);
+
+ dtmp = dtmp2;
+ aec->erle.instant = dtmp;
+ if (dtmp > aec->erle.max) {
+ aec->erle.max = dtmp;
+ }
+
+ if (dtmp < aec->erle.min) {
+ aec->erle.min = dtmp;
+ }
+
+ aec->erle.counter++;
+ aec->erle.sum += dtmp;
+ aec->erle.average = aec->erle.sum / aec->erle.counter;
+
+ // Upper mean
+ if (dtmp > aec->erle.average) {
+ aec->erle.hicounter++;
+ aec->erle.hisum += dtmp;
+ aec->erle.himean = aec->erle.hisum / aec->erle.hicounter;
+ }
+ }
+
+ aec->stateCounter = 0;
+ }
+}
+
+static void UpdateDelayMetrics(AecCore* self) {
+ int i = 0;
+ int delay_values = 0;
+ int median = 0;
+ int lookahead = WebRtc_lookahead(self->delay_estimator);
+ const int kMsPerBlock = PART_LEN / (self->mult * 8);
+ int64_t l1_norm = 0;
+
+ if (self->num_delay_values == 0) {
+ // We have no new delay value data. Even though -1 is a valid |median| in
+ // the sense that we allow negative values, it will practically never be
+ // used since multiples of |kMsPerBlock| will always be returned.
+ // We therefore use -1 to indicate in the logs that the delay estimator was
+ // not able to estimate the delay.
+ self->delay_median = -1;
+ self->delay_std = -1;
+ self->fraction_poor_delays = -1;
+ return;
+ }
+
+ // Start value for median count down.
+ delay_values = self->num_delay_values >> 1;
+ // Get median of delay values since last update.
+ for (i = 0; i < kHistorySizeBlocks; i++) {
+ delay_values -= self->delay_histogram[i];
+ if (delay_values < 0) {
+ median = i;
+ break;
+ }
+ }
+ // Account for lookahead.
+ self->delay_median = (median - lookahead) * kMsPerBlock;
+
+ // Calculate the L1 norm, with median value as central moment.
+ for (i = 0; i < kHistorySizeBlocks; i++) {
+ l1_norm += abs(i - median) * self->delay_histogram[i];
+ }
+ self->delay_std = (int)((l1_norm + self->num_delay_values / 2) /
+ self->num_delay_values) * kMsPerBlock;
+
+ // Determine fraction of delays that are out of bounds, that is, either
+ // negative (anti-causal system) or larger than the AEC filter length.
+ {
+ int num_delays_out_of_bounds = self->num_delay_values;
+ const int histogram_length = sizeof(self->delay_histogram) /
+ sizeof(self->delay_histogram[0]);
+ for (i = lookahead; i < lookahead + self->num_partitions; ++i) {
+ if (i < histogram_length)
+ num_delays_out_of_bounds -= self->delay_histogram[i];
+ }
+ self->fraction_poor_delays = (float)num_delays_out_of_bounds /
+ self->num_delay_values;
+ }
+
+ // Reset histogram.
+ memset(self->delay_histogram, 0, sizeof(self->delay_histogram));
+ self->num_delay_values = 0;
+
+ return;
+}
+
+static void TimeToFrequency(float time_data[PART_LEN2],
+ float freq_data[2][PART_LEN1],
+ int window) {
+ int i = 0;
+
+ // TODO(bjornv): Should we have a different function/wrapper for windowed FFT?
+ if (window) {
+ for (i = 0; i < PART_LEN; i++) {
+ time_data[i] *= WebRtcAec_sqrtHanning[i];
+ time_data[PART_LEN + i] *= WebRtcAec_sqrtHanning[PART_LEN - i];
+ }
+ }
+
+ aec_rdft_forward_128(time_data);
+ // Reorder.
+ freq_data[1][0] = 0;
+ freq_data[1][PART_LEN] = 0;
+ freq_data[0][0] = time_data[0];
+ freq_data[0][PART_LEN] = time_data[1];
+ for (i = 1; i < PART_LEN; i++) {
+ freq_data[0][i] = time_data[2 * i];
+ freq_data[1][i] = time_data[2 * i + 1];
+ }
+}
+
+static int MoveFarReadPtrWithoutSystemDelayUpdate(AecCore* self, int elements) {
+ WebRtc_MoveReadPtr(self->far_buf_windowed, elements);
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+ WebRtc_MoveReadPtr(self->far_time_buf, elements);
+#endif
+ return WebRtc_MoveReadPtr(self->far_buf, elements);
+}
+
+static int SignalBasedDelayCorrection(AecCore* self) {
+ int delay_correction = 0;
+ int last_delay = -2;
+ assert(self != NULL);
+#if !defined(WEBRTC_ANDROID)
+ // On desktops, turn on correction after |kDelayCorrectionStart| frames. This
+ // is to let the delay estimation get a chance to converge. Also, if the
+ // playout audio volume is low (or even muted) the delay estimation can return
+ // a very large delay, which will break the AEC if it is applied.
+ if (self->frame_count < kDelayCorrectionStart) {
+ return 0;
+ }
+#endif
+
+ // 1. Check for non-negative delay estimate. Note that the estimates we get
+ // from the delay estimation are not compensated for lookahead. Hence, a
+ // negative |last_delay| is an invalid one.
+ // 2. Verify that there is a delay change. In addition, only allow a change
+ // if the delay is outside a certain region taking the AEC filter length
+ // into account.
+ // TODO(bjornv): Investigate if we can remove the non-zero delay change check.
+ // 3. Only allow delay correction if the delay estimation quality exceeds
+ // |delay_quality_threshold|.
+ // 4. Finally, verify that the proposed |delay_correction| is feasible by
+ // comparing with the size of the far-end buffer.
+ last_delay = WebRtc_last_delay(self->delay_estimator);
+ if ((last_delay >= 0) &&
+ (last_delay != self->previous_delay) &&
+ (WebRtc_last_delay_quality(self->delay_estimator) >
+ self->delay_quality_threshold)) {
+ int delay = last_delay - WebRtc_lookahead(self->delay_estimator);
+ // Allow for a slack in the actual delay, defined by a |lower_bound| and an
+ // |upper_bound|. The adaptive echo cancellation filter is currently
+ // |num_partitions| (of 64 samples) long. If the delay estimate is negative
+ // or at least 3/4 of the filter length we open up for correction.
+ const int lower_bound = 0;
+ const int upper_bound = self->num_partitions * 3 / 4;
+ const int do_correction = delay <= lower_bound || delay > upper_bound;
+ if (do_correction == 1) {
+ int available_read = (int)WebRtc_available_read(self->far_buf);
+ // With |shift_offset| we gradually rely on the delay estimates. For
+ // positive delays we reduce the correction by |shift_offset| to lower the
+ // risk of pushing the AEC into a non causal state. For negative delays
+ // we rely on the values up to a rounding error, hence compensate by 1
+ // element to make sure to push the delay into the causal region.
+ delay_correction = -delay;
+ delay_correction += delay > self->shift_offset ? self->shift_offset : 1;
+ self->shift_offset--;
+ self->shift_offset = (self->shift_offset <= 1 ? 1 : self->shift_offset);
+ if (delay_correction > available_read - self->mult - 1) {
+ // There is not enough data in the buffer to perform this shift. Hence,
+ // we do not rely on the delay estimate and do nothing.
+ delay_correction = 0;
+ } else {
+ self->previous_delay = last_delay;
+ ++self->delay_correction_count;
+ }
+ }
+ }
+ // Update the |delay_quality_threshold| once we have our first delay
+ // correction.
+ if (self->delay_correction_count > 0) {
+ float delay_quality = WebRtc_last_delay_quality(self->delay_estimator);
+ delay_quality = (delay_quality > kDelayQualityThresholdMax ?
+ kDelayQualityThresholdMax : delay_quality);
+ self->delay_quality_threshold =
+ (delay_quality > self->delay_quality_threshold ? delay_quality :
+ self->delay_quality_threshold);
+ }
+ return delay_correction;
+}
+
+static void NonLinearProcessing(AecCore* aec,
+ float* output,
+ float* const* outputH) {
+ float efw[2][PART_LEN1], xfw[2][PART_LEN1];
+ complex_t comfortNoiseHband[PART_LEN1];
+ float fft[PART_LEN2];
+ float scale, dtmp;
+ float nlpGainHband;
+ int i;
+ size_t j;
+
+ // Coherence and non-linear filter
+ float cohde[PART_LEN1], cohxd[PART_LEN1];
+ float hNlDeAvg, hNlXdAvg;
+ float hNl[PART_LEN1];
+ float hNlPref[kPrefBandSize];
+ float hNlFb = 0, hNlFbLow = 0;
+ const float prefBandQuant = 0.75f, prefBandQuantLow = 0.5f;
+ const int prefBandSize = kPrefBandSize / aec->mult;
+ const int minPrefBand = 4 / aec->mult;
+ // Power estimate smoothing coefficients.
+ const float* min_overdrive = aec->extended_filter_enabled
+ ? kExtendedMinOverDrive
+ : kNormalMinOverDrive;
+
+ // Filter energy
+ const int delayEstInterval = 10 * aec->mult;
+
+ float* xfw_ptr = NULL;
+
+ aec->delayEstCtr++;
+ if (aec->delayEstCtr == delayEstInterval) {
+ aec->delayEstCtr = 0;
+ }
+
+ // initialize comfort noise for H band
+ memset(comfortNoiseHband, 0, sizeof(comfortNoiseHband));
+ nlpGainHband = (float)0.0;
+ dtmp = (float)0.0;
+
+ // We should always have at least one element stored in |far_buf|.
+ assert(WebRtc_available_read(aec->far_buf_windowed) > 0);
+ // NLP
+ WebRtc_ReadBuffer(aec->far_buf_windowed, (void**)&xfw_ptr, &xfw[0][0], 1);
+
+ // TODO(bjornv): Investigate if we can reuse |far_buf_windowed| instead of
+ // |xfwBuf|.
+ // Buffer far.
+ memcpy(aec->xfwBuf, xfw_ptr, sizeof(float) * 2 * PART_LEN1);
+
+ WebRtcAec_SubbandCoherence(aec, efw, xfw, fft, cohde, cohxd);
+
+ hNlXdAvg = 0;
+ for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) {
+ hNlXdAvg += cohxd[i];
+ }
+ hNlXdAvg /= prefBandSize;
+ hNlXdAvg = 1 - hNlXdAvg;
+
+ hNlDeAvg = 0;
+ for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) {
+ hNlDeAvg += cohde[i];
+ }
+ hNlDeAvg /= prefBandSize;
+
+ if (hNlXdAvg < 0.75f && hNlXdAvg < aec->hNlXdAvgMin) {
+ aec->hNlXdAvgMin = hNlXdAvg;
+ }
+
+ if (hNlDeAvg > 0.98f && hNlXdAvg > 0.9f) {
+ aec->stNearState = 1;
+ } else if (hNlDeAvg < 0.95f || hNlXdAvg < 0.8f) {
+ aec->stNearState = 0;
+ }
+
+ if (aec->hNlXdAvgMin == 1) {
+ aec->echoState = 0;
+ aec->overDrive = min_overdrive[aec->nlp_mode];
+
+ if (aec->stNearState == 1) {
+ memcpy(hNl, cohde, sizeof(hNl));
+ hNlFb = hNlDeAvg;
+ hNlFbLow = hNlDeAvg;
+ } else {
+ for (i = 0; i < PART_LEN1; i++) {
+ hNl[i] = 1 - cohxd[i];
+ }
+ hNlFb = hNlXdAvg;
+ hNlFbLow = hNlXdAvg;
+ }
+ } else {
+
+ if (aec->stNearState == 1) {
+ aec->echoState = 0;
+ memcpy(hNl, cohde, sizeof(hNl));
+ hNlFb = hNlDeAvg;
+ hNlFbLow = hNlDeAvg;
+ } else {
+ aec->echoState = 1;
+ for (i = 0; i < PART_LEN1; i++) {
+ hNl[i] = WEBRTC_SPL_MIN(cohde[i], 1 - cohxd[i]);
+ }
+
+ // Select an order statistic from the preferred bands.
+ // TODO: Using quicksort now, but a selection algorithm may be preferred.
+ memcpy(hNlPref, &hNl[minPrefBand], sizeof(float) * prefBandSize);
+ qsort(hNlPref, prefBandSize, sizeof(float), CmpFloat);
+ hNlFb = hNlPref[(int)floor(prefBandQuant * (prefBandSize - 1))];
+ hNlFbLow = hNlPref[(int)floor(prefBandQuantLow * (prefBandSize - 1))];
+ }
+ }
+
+ // Track the local filter minimum to determine suppression overdrive.
+ if (hNlFbLow < 0.6f && hNlFbLow < aec->hNlFbLocalMin) {
+ aec->hNlFbLocalMin = hNlFbLow;
+ aec->hNlFbMin = hNlFbLow;
+ aec->hNlNewMin = 1;
+ aec->hNlMinCtr = 0;
+ }
+ aec->hNlFbLocalMin =
+ WEBRTC_SPL_MIN(aec->hNlFbLocalMin + 0.0008f / aec->mult, 1);
+ aec->hNlXdAvgMin = WEBRTC_SPL_MIN(aec->hNlXdAvgMin + 0.0006f / aec->mult, 1);
+
+ if (aec->hNlNewMin == 1) {
+ aec->hNlMinCtr++;
+ }
+ if (aec->hNlMinCtr == 2) {
+ aec->hNlNewMin = 0;
+ aec->hNlMinCtr = 0;
+ aec->overDrive =
+ WEBRTC_SPL_MAX(kTargetSupp[aec->nlp_mode] /
+ ((float)log(aec->hNlFbMin + 1e-10f) + 1e-10f),
+ min_overdrive[aec->nlp_mode]);
+ }
+
+ // Smooth the overdrive.
+ if (aec->overDrive < aec->overDriveSm) {
+ aec->overDriveSm = 0.99f * aec->overDriveSm + 0.01f * aec->overDrive;
+ } else {
+ aec->overDriveSm = 0.9f * aec->overDriveSm + 0.1f * aec->overDrive;
+ }
+
+ WebRtcAec_OverdriveAndSuppress(aec, hNl, hNlFb, efw);
+
+ // Add comfort noise.
+ WebRtcAec_ComfortNoise(aec, efw, comfortNoiseHband, aec->noisePow, hNl);
+
+ // TODO(bjornv): Investigate how to take the windowing below into account if
+ // needed.
+ if (aec->metricsMode == 1) {
+ // Note that we have a scaling by two in the time domain |eBuf|.
+ // In addition the time domain signal is windowed before transformation,
+ // losing half the energy on the average. We take care of the first
+ // scaling only in UpdateMetrics().
+ UpdateLevel(&aec->nlpoutlevel, efw);
+ }
+ // Inverse error fft.
+ fft[0] = efw[0][0];
+ fft[1] = efw[0][PART_LEN];
+ for (i = 1; i < PART_LEN; i++) {
+ fft[2 * i] = efw[0][i];
+ // Sign change required by Ooura fft.
+ fft[2 * i + 1] = -efw[1][i];
+ }
+ aec_rdft_inverse_128(fft);
+
+ // Overlap and add to obtain output.
+ scale = 2.0f / PART_LEN2;
+ for (i = 0; i < PART_LEN; i++) {
+ fft[i] *= scale; // fft scaling
+ fft[i] = fft[i] * WebRtcAec_sqrtHanning[i] + aec->outBuf[i];
+
+ fft[PART_LEN + i] *= scale; // fft scaling
+ aec->outBuf[i] = fft[PART_LEN + i] * WebRtcAec_sqrtHanning[PART_LEN - i];
+
+ // Saturate output to keep it in the allowed range.
+ output[i] = WEBRTC_SPL_SAT(
+ WEBRTC_SPL_WORD16_MAX, fft[i], WEBRTC_SPL_WORD16_MIN);
+ }
+
+ // For H band
+ if (aec->num_bands > 1) {
+
+ // H band gain
+ // average nlp over low band: average over second half of freq spectrum
+ // (4->8khz)
+ GetHighbandGain(hNl, &nlpGainHband);
+
+ // Inverse comfort_noise
+ if (flagHbandCn == 1) {
+ fft[0] = comfortNoiseHband[0][0];
+ fft[1] = comfortNoiseHband[PART_LEN][0];
+ for (i = 1; i < PART_LEN; i++) {
+ fft[2 * i] = comfortNoiseHband[i][0];
+ fft[2 * i + 1] = comfortNoiseHband[i][1];
+ }
+ aec_rdft_inverse_128(fft);
+ scale = 2.0f / PART_LEN2;
+ }
+
+ // compute gain factor
+ for (j = 0; j < aec->num_bands - 1; ++j) {
+ for (i = 0; i < PART_LEN; i++) {
+ dtmp = aec->dBufH[j][i];
+ dtmp = dtmp * nlpGainHband; // for variable gain
+
+ // add some comfort noise where Hband is attenuated
+ if (flagHbandCn == 1 && j == 0) {
+ fft[i] *= scale; // fft scaling
+ dtmp += cnScaleHband * fft[i];
+ }
+
+ // Saturate output to keep it in the allowed range.
+ outputH[j][i] = WEBRTC_SPL_SAT(
+ WEBRTC_SPL_WORD16_MAX, dtmp, WEBRTC_SPL_WORD16_MIN);
+ }
+ }
+ }
+
+ // Copy the current block to the old position.
+ memcpy(aec->dBuf, aec->dBuf + PART_LEN, sizeof(float) * PART_LEN);
+ memcpy(aec->eBuf, aec->eBuf + PART_LEN, sizeof(float) * PART_LEN);
+
+ // Copy the current block to the old position for H band
+ for (j = 0; j < aec->num_bands - 1; ++j) {
+ memcpy(aec->dBufH[j], aec->dBufH[j] + PART_LEN, sizeof(float) * PART_LEN);
+ }
+
+ memmove(aec->xfwBuf + PART_LEN1,
+ aec->xfwBuf,
+ sizeof(aec->xfwBuf) - sizeof(complex_t) * PART_LEN1);
+}
+
+static void ProcessBlock(AecCore* aec) {
+ size_t i;
+ float y[PART_LEN], e[PART_LEN];
+ float scale;
+
+ float fft[PART_LEN2];
+ float xf[2][PART_LEN1], yf[2][PART_LEN1], ef[2][PART_LEN1];
+ float df[2][PART_LEN1];
+ float far_spectrum = 0.0f;
+ float near_spectrum = 0.0f;
+ float abs_far_spectrum[PART_LEN1];
+ float abs_near_spectrum[PART_LEN1];
+
+ const float gPow[2] = {0.9f, 0.1f};
+
+ // Noise estimate constants.
+ const int noiseInitBlocks = 500 * aec->mult;
+ const float step = 0.1f;
+ const float ramp = 1.0002f;
+ const float gInitNoise[2] = {0.999f, 0.001f};
+
+ float nearend[PART_LEN];
+ float* nearend_ptr = NULL;
+ float output[PART_LEN];
+ float outputH[NUM_HIGH_BANDS_MAX][PART_LEN];
+ float* outputH_ptr[NUM_HIGH_BANDS_MAX];
+ for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) {
+ outputH_ptr[i] = outputH[i];
+ }
+
+ float* xf_ptr = NULL;
+
+ // Concatenate old and new nearend blocks.
+ for (i = 0; i < aec->num_bands - 1; ++i) {
+ WebRtc_ReadBuffer(aec->nearFrBufH[i],
+ (void**)&nearend_ptr,
+ nearend,
+ PART_LEN);
+ memcpy(aec->dBufH[i] + PART_LEN, nearend_ptr, sizeof(nearend));
+ }
+ WebRtc_ReadBuffer(aec->nearFrBuf, (void**)&nearend_ptr, nearend, PART_LEN);
+ memcpy(aec->dBuf + PART_LEN, nearend_ptr, sizeof(nearend));
+
+ // ---------- Ooura fft ----------
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+ {
+ float farend[PART_LEN];
+ float* farend_ptr = NULL;
+ WebRtc_ReadBuffer(aec->far_time_buf, (void**)&farend_ptr, farend, 1);
+ RTC_AEC_DEBUG_WAV_WRITE(aec->farFile, farend_ptr, PART_LEN);
+ RTC_AEC_DEBUG_WAV_WRITE(aec->nearFile, nearend_ptr, PART_LEN);
+ }
+#endif
+
+ // We should always have at least one element stored in |far_buf|.
+ assert(WebRtc_available_read(aec->far_buf) > 0);
+ WebRtc_ReadBuffer(aec->far_buf, (void**)&xf_ptr, &xf[0][0], 1);
+
+ // Near fft
+ memcpy(fft, aec->dBuf, sizeof(float) * PART_LEN2);
+ TimeToFrequency(fft, df, 0);
+
+ // Power smoothing
+ for (i = 0; i < PART_LEN1; i++) {
+ far_spectrum = (xf_ptr[i] * xf_ptr[i]) +
+ (xf_ptr[PART_LEN1 + i] * xf_ptr[PART_LEN1 + i]);
+ aec->xPow[i] =
+ gPow[0] * aec->xPow[i] + gPow[1] * aec->num_partitions * far_spectrum;
+ // Calculate absolute spectra
+ abs_far_spectrum[i] = sqrtf(far_spectrum);
+
+ near_spectrum = df[0][i] * df[0][i] + df[1][i] * df[1][i];
+ aec->dPow[i] = gPow[0] * aec->dPow[i] + gPow[1] * near_spectrum;
+ // Calculate absolute spectra
+ abs_near_spectrum[i] = sqrtf(near_spectrum);
+ }
+
+ // Estimate noise power. Wait until dPow is more stable.
+ if (aec->noiseEstCtr > 50) {
+ for (i = 0; i < PART_LEN1; i++) {
+ if (aec->dPow[i] < aec->dMinPow[i]) {
+ aec->dMinPow[i] =
+ (aec->dPow[i] + step * (aec->dMinPow[i] - aec->dPow[i])) * ramp;
+ } else {
+ aec->dMinPow[i] *= ramp;
+ }
+ }
+ }
+
+ // Smooth increasing noise power from zero at the start,
+ // to avoid a sudden burst of comfort noise.
+ if (aec->noiseEstCtr < noiseInitBlocks) {
+ aec->noiseEstCtr++;
+ for (i = 0; i < PART_LEN1; i++) {
+ if (aec->dMinPow[i] > aec->dInitMinPow[i]) {
+ aec->dInitMinPow[i] = gInitNoise[0] * aec->dInitMinPow[i] +
+ gInitNoise[1] * aec->dMinPow[i];
+ } else {
+ aec->dInitMinPow[i] = aec->dMinPow[i];
+ }
+ }
+ aec->noisePow = aec->dInitMinPow;
+ } else {
+ aec->noisePow = aec->dMinPow;
+ }
+
+ // Block wise delay estimation used for logging
+ if (aec->delay_logging_enabled) {
+ if (WebRtc_AddFarSpectrumFloat(
+ aec->delay_estimator_farend, abs_far_spectrum, PART_LEN1) == 0) {
+ int delay_estimate = WebRtc_DelayEstimatorProcessFloat(
+ aec->delay_estimator, abs_near_spectrum, PART_LEN1);
+ if (delay_estimate >= 0) {
+ // Update delay estimate buffer.
+ aec->delay_histogram[delay_estimate]++;
+ aec->num_delay_values++;
+ }
+ if (aec->delay_metrics_delivered == 1 &&
+ aec->num_delay_values >= kDelayMetricsAggregationWindow) {
+ UpdateDelayMetrics(aec);
+ }
+ }
+ }
+
+ // Update the xfBuf block position.
+ aec->xfBufBlockPos--;
+ if (aec->xfBufBlockPos == -1) {
+ aec->xfBufBlockPos = aec->num_partitions - 1;
+ }
+
+ // Buffer xf
+ memcpy(aec->xfBuf[0] + aec->xfBufBlockPos * PART_LEN1,
+ xf_ptr,
+ sizeof(float) * PART_LEN1);
+ memcpy(aec->xfBuf[1] + aec->xfBufBlockPos * PART_LEN1,
+ &xf_ptr[PART_LEN1],
+ sizeof(float) * PART_LEN1);
+
+ memset(yf, 0, sizeof(yf));
+
+ // Filter far
+ WebRtcAec_FilterFar(aec, yf);
+
+ // Inverse fft to obtain echo estimate and error.
+ fft[0] = yf[0][0];
+ fft[1] = yf[0][PART_LEN];
+ for (i = 1; i < PART_LEN; i++) {
+ fft[2 * i] = yf[0][i];
+ fft[2 * i + 1] = yf[1][i];
+ }
+ aec_rdft_inverse_128(fft);
+
+ scale = 2.0f / PART_LEN2;
+ for (i = 0; i < PART_LEN; i++) {
+ y[i] = fft[PART_LEN + i] * scale; // fft scaling
+ }
+
+ for (i = 0; i < PART_LEN; i++) {
+ e[i] = nearend_ptr[i] - y[i];
+ }
+
+ // Error fft
+ memcpy(aec->eBuf + PART_LEN, e, sizeof(float) * PART_LEN);
+ memset(fft, 0, sizeof(float) * PART_LEN);
+ memcpy(fft + PART_LEN, e, sizeof(float) * PART_LEN);
+ // TODO(bjornv): Change to use TimeToFrequency().
+ aec_rdft_forward_128(fft);
+
+ ef[1][0] = 0;
+ ef[1][PART_LEN] = 0;
+ ef[0][0] = fft[0];
+ ef[0][PART_LEN] = fft[1];
+ for (i = 1; i < PART_LEN; i++) {
+ ef[0][i] = fft[2 * i];
+ ef[1][i] = fft[2 * i + 1];
+ }
+
+ RTC_AEC_DEBUG_RAW_WRITE(aec->e_fft_file,
+ &ef[0][0],
+ sizeof(ef[0][0]) * PART_LEN1 * 2);
+
+ if (aec->metricsMode == 1) {
+ // Note that the first PART_LEN samples in fft (before transformation) are
+ // zero. Hence, the scaling by two in UpdateLevel() should not be
+ // performed. That scaling is taken care of in UpdateMetrics() instead.
+ UpdateLevel(&aec->linoutlevel, ef);
+ }
+
+ // Scale error signal inversely with far power.
+ WebRtcAec_ScaleErrorSignal(aec, ef);
+ WebRtcAec_FilterAdaptation(aec, fft, ef);
+ NonLinearProcessing(aec, output, outputH_ptr);
+
+ if (aec->metricsMode == 1) {
+ // Update power levels and echo metrics
+ UpdateLevel(&aec->farlevel, (float(*)[PART_LEN1])xf_ptr);
+ UpdateLevel(&aec->nearlevel, df);
+ UpdateMetrics(aec);
+ }
+
+ // Store the output block.
+ WebRtc_WriteBuffer(aec->outFrBuf, output, PART_LEN);
+ // For high bands
+ for (i = 0; i < aec->num_bands - 1; ++i) {
+ WebRtc_WriteBuffer(aec->outFrBufH[i], outputH[i], PART_LEN);
+ }
+
+ RTC_AEC_DEBUG_WAV_WRITE(aec->outLinearFile, e, PART_LEN);
+ RTC_AEC_DEBUG_WAV_WRITE(aec->outFile, output, PART_LEN);
+}
+
+AecCore* WebRtcAec_CreateAec() {
+ int i;
+ AecCore* aec = malloc(sizeof(AecCore));
+ if (!aec) {
+ return NULL;
+ }
+
+ aec->nearFrBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(float));
+ if (!aec->nearFrBuf) {
+ WebRtcAec_FreeAec(aec);
+ return NULL;
+ }
+
+ aec->outFrBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(float));
+ if (!aec->outFrBuf) {
+ WebRtcAec_FreeAec(aec);
+ return NULL;
+ }
+
+ for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) {
+ aec->nearFrBufH[i] = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN,
+ sizeof(float));
+ if (!aec->nearFrBufH[i]) {
+ WebRtcAec_FreeAec(aec);
+ return NULL;
+ }
+ aec->outFrBufH[i] = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN,
+ sizeof(float));
+ if (!aec->outFrBufH[i]) {
+ WebRtcAec_FreeAec(aec);
+ return NULL;
+ }
+ }
+
+ // Create far-end buffers.
+ aec->far_buf =
+ WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * 2 * PART_LEN1);
+ if (!aec->far_buf) {
+ WebRtcAec_FreeAec(aec);
+ return NULL;
+ }
+ aec->far_buf_windowed =
+ WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * 2 * PART_LEN1);
+ if (!aec->far_buf_windowed) {
+ WebRtcAec_FreeAec(aec);
+ return NULL;
+ }
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+ aec->instance_index = webrtc_aec_instance_count;
+ aec->far_time_buf =
+ WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * PART_LEN);
+ if (!aec->far_time_buf) {
+ WebRtcAec_FreeAec(aec);
+ return NULL;
+ }
+ aec->farFile = aec->nearFile = aec->outFile = aec->outLinearFile = NULL;
+ aec->debug_dump_count = 0;
+#endif
+ aec->delay_estimator_farend =
+ WebRtc_CreateDelayEstimatorFarend(PART_LEN1, kHistorySizeBlocks);
+ if (aec->delay_estimator_farend == NULL) {
+ WebRtcAec_FreeAec(aec);
+ return NULL;
+ }
+ // We create the delay_estimator with the same amount of maximum lookahead as
+ // the delay history size (kHistorySizeBlocks) for symmetry reasons.
+ aec->delay_estimator = WebRtc_CreateDelayEstimator(
+ aec->delay_estimator_farend, kHistorySizeBlocks);
+ if (aec->delay_estimator == NULL) {
+ WebRtcAec_FreeAec(aec);
+ return NULL;
+ }
+#ifdef WEBRTC_ANDROID
+ aec->delay_agnostic_enabled = 1; // DA-AEC enabled by default.
+ // DA-AEC assumes the system is causal from the beginning and will self adjust
+ // the lookahead when shifting is required.
+ WebRtc_set_lookahead(aec->delay_estimator, 0);
+#else
+ aec->delay_agnostic_enabled = 0;
+ WebRtc_set_lookahead(aec->delay_estimator, kLookaheadBlocks);
+#endif
+ aec->extended_filter_enabled = 0;
+
+ // Assembly optimization
+ WebRtcAec_FilterFar = FilterFar;
+ WebRtcAec_ScaleErrorSignal = ScaleErrorSignal;
+ WebRtcAec_FilterAdaptation = FilterAdaptation;
+ WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppress;
+ WebRtcAec_ComfortNoise = ComfortNoise;
+ WebRtcAec_SubbandCoherence = SubbandCoherence;
+
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+ if (WebRtc_GetCPUInfo(kSSE2)) {
+ WebRtcAec_InitAec_SSE2();
+ }
+#endif
+
+#if defined(MIPS_FPU_LE)
+ WebRtcAec_InitAec_mips();
+#endif
+
+#if defined(WEBRTC_HAS_NEON)
+ WebRtcAec_InitAec_neon();
+#elif defined(WEBRTC_DETECT_NEON)
+ if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) {
+ WebRtcAec_InitAec_neon();
+ }
+#endif
+
+ aec_rdft_init();
+
+ return aec;
+}
+
+void WebRtcAec_FreeAec(AecCore* aec) {
+ int i;
+ if (aec == NULL) {
+ return;
+ }
+
+ WebRtc_FreeBuffer(aec->nearFrBuf);
+ WebRtc_FreeBuffer(aec->outFrBuf);
+
+ for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) {
+ WebRtc_FreeBuffer(aec->nearFrBufH[i]);
+ WebRtc_FreeBuffer(aec->outFrBufH[i]);
+ }
+
+ WebRtc_FreeBuffer(aec->far_buf);
+ WebRtc_FreeBuffer(aec->far_buf_windowed);
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+ WebRtc_FreeBuffer(aec->far_time_buf);
+#endif
+ RTC_AEC_DEBUG_WAV_CLOSE(aec->farFile);
+ RTC_AEC_DEBUG_WAV_CLOSE(aec->nearFile);
+ RTC_AEC_DEBUG_WAV_CLOSE(aec->outFile);
+ RTC_AEC_DEBUG_WAV_CLOSE(aec->outLinearFile);
+ RTC_AEC_DEBUG_RAW_CLOSE(aec->e_fft_file);
+
+ WebRtc_FreeDelayEstimator(aec->delay_estimator);
+ WebRtc_FreeDelayEstimatorFarend(aec->delay_estimator_farend);
+
+ free(aec);
+}
+
+int WebRtcAec_InitAec(AecCore* aec, int sampFreq) {
+ int i;
+
+ aec->sampFreq = sampFreq;
+
+ if (sampFreq == 8000) {
+ aec->normal_mu = 0.6f;
+ aec->normal_error_threshold = 2e-6f;
+ aec->num_bands = 1;
+ } else {
+ aec->normal_mu = 0.5f;
+ aec->normal_error_threshold = 1.5e-6f;
+ aec->num_bands = (size_t)(sampFreq / 16000);
+ }
+
+ WebRtc_InitBuffer(aec->nearFrBuf);
+ WebRtc_InitBuffer(aec->outFrBuf);
+ for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) {
+ WebRtc_InitBuffer(aec->nearFrBufH[i]);
+ WebRtc_InitBuffer(aec->outFrBufH[i]);
+ }
+
+ // Initialize far-end buffers.
+ WebRtc_InitBuffer(aec->far_buf);
+ WebRtc_InitBuffer(aec->far_buf_windowed);
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+ WebRtc_InitBuffer(aec->far_time_buf);
+ {
+ int process_rate = sampFreq > 16000 ? 16000 : sampFreq;
+ RTC_AEC_DEBUG_WAV_REOPEN("aec_far", aec->instance_index,
+ aec->debug_dump_count, process_rate,
+ &aec->farFile );
+ RTC_AEC_DEBUG_WAV_REOPEN("aec_near", aec->instance_index,
+ aec->debug_dump_count, process_rate,
+ &aec->nearFile);
+ RTC_AEC_DEBUG_WAV_REOPEN("aec_out", aec->instance_index,
+ aec->debug_dump_count, process_rate,
+ &aec->outFile );
+ RTC_AEC_DEBUG_WAV_REOPEN("aec_out_linear", aec->instance_index,
+ aec->debug_dump_count, process_rate,
+ &aec->outLinearFile);
+ }
+
+ RTC_AEC_DEBUG_RAW_OPEN("aec_e_fft",
+ aec->debug_dump_count,
+ &aec->e_fft_file);
+
+ ++aec->debug_dump_count;
+#endif
+ aec->system_delay = 0;
+
+ if (WebRtc_InitDelayEstimatorFarend(aec->delay_estimator_farend) != 0) {
+ return -1;
+ }
+ if (WebRtc_InitDelayEstimator(aec->delay_estimator) != 0) {
+ return -1;
+ }
+ aec->delay_logging_enabled = 0;
+ aec->delay_metrics_delivered = 0;
+ memset(aec->delay_histogram, 0, sizeof(aec->delay_histogram));
+ aec->num_delay_values = 0;
+ aec->delay_median = -1;
+ aec->delay_std = -1;
+ aec->fraction_poor_delays = -1.0f;
+
+ aec->signal_delay_correction = 0;
+ aec->previous_delay = -2; // (-2): Uninitialized.
+ aec->delay_correction_count = 0;
+ aec->shift_offset = kInitialShiftOffset;
+ aec->delay_quality_threshold = kDelayQualityThresholdMin;
+
+ aec->num_partitions = kNormalNumPartitions;
+
+ // Update the delay estimator with filter length. We use half the
+ // |num_partitions| to take the echo path into account. In practice we say
+ // that the echo has a duration of maximum half |num_partitions|, which is not
+ // true, but serves as a crude measure.
+ WebRtc_set_allowed_offset(aec->delay_estimator, aec->num_partitions / 2);
+ // TODO(bjornv): I currently hard coded the enable. Once we've established
+ // that AECM has no performance regression, robust_validation will be enabled
+ // all the time and the APIs to turn it on/off will be removed. Hence, remove
+ // this line then.
+ WebRtc_enable_robust_validation(aec->delay_estimator, 1);
+ aec->frame_count = 0;
+
+ // Default target suppression mode.
+ aec->nlp_mode = 1;
+
+ // Sampling frequency multiplier w.r.t. 8 kHz.
+ // In case of multiple bands we process the lower band in 16 kHz, hence the
+ // multiplier is always 2.
+ if (aec->num_bands > 1) {
+ aec->mult = 2;
+ } else {
+ aec->mult = (short)aec->sampFreq / 8000;
+ }
+
+ aec->farBufWritePos = 0;
+ aec->farBufReadPos = 0;
+
+ aec->inSamples = 0;
+ aec->outSamples = 0;
+ aec->knownDelay = 0;
+
+ // Initialize buffers
+ memset(aec->dBuf, 0, sizeof(aec->dBuf));
+ memset(aec->eBuf, 0, sizeof(aec->eBuf));
+ // For H bands
+ for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) {
+ memset(aec->dBufH[i], 0, sizeof(aec->dBufH[i]));
+ }
+
+ memset(aec->xPow, 0, sizeof(aec->xPow));
+ memset(aec->dPow, 0, sizeof(aec->dPow));
+ memset(aec->dInitMinPow, 0, sizeof(aec->dInitMinPow));
+ aec->noisePow = aec->dInitMinPow;
+ aec->noiseEstCtr = 0;
+
+ // Initial comfort noise power
+ for (i = 0; i < PART_LEN1; i++) {
+ aec->dMinPow[i] = 1.0e6f;
+ }
+
+ // Holds the last block written to
+ aec->xfBufBlockPos = 0;
+ // TODO: Investigate need for these initializations. Deleting them doesn't
+ // change the output at all and yields 0.4% overall speedup.
+ memset(aec->xfBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * PART_LEN1);
+ memset(aec->wfBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * PART_LEN1);
+ memset(aec->sde, 0, sizeof(complex_t) * PART_LEN1);
+ memset(aec->sxd, 0, sizeof(complex_t) * PART_LEN1);
+ memset(
+ aec->xfwBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * PART_LEN1);
+ memset(aec->se, 0, sizeof(float) * PART_LEN1);
+
+ // To prevent numerical instability in the first block.
+ for (i = 0; i < PART_LEN1; i++) {
+ aec->sd[i] = 1;
+ }
+ for (i = 0; i < PART_LEN1; i++) {
+ aec->sx[i] = 1;
+ }
+
+ memset(aec->hNs, 0, sizeof(aec->hNs));
+ memset(aec->outBuf, 0, sizeof(float) * PART_LEN);
+
+ aec->hNlFbMin = 1;
+ aec->hNlFbLocalMin = 1;
+ aec->hNlXdAvgMin = 1;
+ aec->hNlNewMin = 0;
+ aec->hNlMinCtr = 0;
+ aec->overDrive = 2;
+ aec->overDriveSm = 2;
+ aec->delayIdx = 0;
+ aec->stNearState = 0;
+ aec->echoState = 0;
+ aec->divergeState = 0;
+
+ aec->seed = 777;
+ aec->delayEstCtr = 0;
+
+ // Metrics disabled by default
+ aec->metricsMode = 0;
+ InitMetrics(aec);
+
+ return 0;
+}
+
+void WebRtcAec_BufferFarendPartition(AecCore* aec, const float* farend) {
+ float fft[PART_LEN2];
+ float xf[2][PART_LEN1];
+
+ // Check if the buffer is full, and in that case flush the oldest data.
+ if (WebRtc_available_write(aec->far_buf) < 1) {
+ WebRtcAec_MoveFarReadPtr(aec, 1);
+ }
+ // Convert far-end partition to the frequency domain without windowing.
+ memcpy(fft, farend, sizeof(float) * PART_LEN2);
+ TimeToFrequency(fft, xf, 0);
+ WebRtc_WriteBuffer(aec->far_buf, &xf[0][0], 1);
+
+ // Convert far-end partition to the frequency domain with windowing.
+ memcpy(fft, farend, sizeof(float) * PART_LEN2);
+ TimeToFrequency(fft, xf, 1);
+ WebRtc_WriteBuffer(aec->far_buf_windowed, &xf[0][0], 1);
+}
+
+int WebRtcAec_MoveFarReadPtr(AecCore* aec, int elements) {
+ int elements_moved = MoveFarReadPtrWithoutSystemDelayUpdate(aec, elements);
+ aec->system_delay -= elements_moved * PART_LEN;
+ return elements_moved;
+}
+
+void WebRtcAec_ProcessFrames(AecCore* aec,
+ const float* const* nearend,
+ size_t num_bands,
+ size_t num_samples,
+ int knownDelay,
+ float* const* out) {
+ size_t i, j;
+ int out_elements = 0;
+
+ aec->frame_count++;
+ // For each frame the process is as follows:
+ // 1) If the system_delay indicates on being too small for processing a
+ // frame we stuff the buffer with enough data for 10 ms.
+ // 2 a) Adjust the buffer to the system delay, by moving the read pointer.
+ // b) Apply signal based delay correction, if we have detected poor AEC
+ // performance.
+ // 3) TODO(bjornv): Investigate if we need to add this:
+ // If we can't move read pointer due to buffer size limitations we
+ // flush/stuff the buffer.
+ // 4) Process as many partitions as possible.
+ // 5) Update the |system_delay| with respect to a full frame of FRAME_LEN
+ // samples. Even though we will have data left to process (we work with
+ // partitions) we consider updating a whole frame, since that's the
+ // amount of data we input and output in audio_processing.
+ // 6) Update the outputs.
+
+ // The AEC has two different delay estimation algorithms built in. The
+ // first relies on delay input values from the user and the amount of
+ // shifted buffer elements is controlled by |knownDelay|. This delay will
+ // give a guess on how much we need to shift far-end buffers to align with
+ // the near-end signal. The other delay estimation algorithm uses the
+ // far- and near-end signals to find the offset between them. This one
+ // (called "signal delay") is then used to fine tune the alignment, or
+ // simply compensate for errors in the system based one.
+ // Note that the two algorithms operate independently. Currently, we only
+ // allow one algorithm to be turned on.
+
+ assert(aec->num_bands == num_bands);
+
+ for (j = 0; j < num_samples; j+= FRAME_LEN) {
+ // TODO(bjornv): Change the near-end buffer handling to be the same as for
+ // far-end, that is, with a near_pre_buf.
+ // Buffer the near-end frame.
+ WebRtc_WriteBuffer(aec->nearFrBuf, &nearend[0][j], FRAME_LEN);
+ // For H band
+ for (i = 1; i < num_bands; ++i) {
+ WebRtc_WriteBuffer(aec->nearFrBufH[i - 1], &nearend[i][j], FRAME_LEN);
+ }
+
+ // 1) At most we process |aec->mult|+1 partitions in 10 ms. Make sure we
+ // have enough far-end data for that by stuffing the buffer if the
+ // |system_delay| indicates others.
+ if (aec->system_delay < FRAME_LEN) {
+ // We don't have enough data so we rewind 10 ms.
+ WebRtcAec_MoveFarReadPtr(aec, -(aec->mult + 1));
+ }
+
+ if (!aec->delay_agnostic_enabled) {
+ // 2 a) Compensate for a possible change in the system delay.
+
+ // TODO(bjornv): Investigate how we should round the delay difference;
+ // right now we know that incoming |knownDelay| is underestimated when
+ // it's less than |aec->knownDelay|. We therefore, round (-32) in that
+ // direction. In the other direction, we don't have this situation, but
+ // might flush one partition too little. This can cause non-causality,
+ // which should be investigated. Maybe, allow for a non-symmetric
+ // rounding, like -16.
+ int move_elements = (aec->knownDelay - knownDelay - 32) / PART_LEN;
+ int moved_elements =
+ MoveFarReadPtrWithoutSystemDelayUpdate(aec, move_elements);
+ aec->knownDelay -= moved_elements * PART_LEN;
+ } else {
+ // 2 b) Apply signal based delay correction.
+ int move_elements = SignalBasedDelayCorrection(aec);
+ int moved_elements =
+ MoveFarReadPtrWithoutSystemDelayUpdate(aec, move_elements);
+ int far_near_buffer_diff = WebRtc_available_read(aec->far_buf) -
+ WebRtc_available_read(aec->nearFrBuf) / PART_LEN;
+ WebRtc_SoftResetDelayEstimator(aec->delay_estimator, moved_elements);
+ WebRtc_SoftResetDelayEstimatorFarend(aec->delay_estimator_farend,
+ moved_elements);
+ aec->signal_delay_correction += moved_elements;
+ // If we rely on reported system delay values only, a buffer underrun here
+ // can never occur since we've taken care of that in 1) above. Here, we
+ // apply signal based delay correction and can therefore end up with
+ // buffer underruns since the delay estimation can be wrong. We therefore
+ // stuff the buffer with enough elements if needed.
+ if (far_near_buffer_diff < 0) {
+ WebRtcAec_MoveFarReadPtr(aec, far_near_buffer_diff);
+ }
+ }
+
+ // 4) Process as many blocks as possible.
+ while (WebRtc_available_read(aec->nearFrBuf) >= PART_LEN) {
+ ProcessBlock(aec);
+ }
+
+ // 5) Update system delay with respect to the entire frame.
+ aec->system_delay -= FRAME_LEN;
+
+ // 6) Update output frame.
+ // Stuff the out buffer if we have less than a frame to output.
+ // This should only happen for the first frame.
+ out_elements = (int)WebRtc_available_read(aec->outFrBuf);
+ if (out_elements < FRAME_LEN) {
+ WebRtc_MoveReadPtr(aec->outFrBuf, out_elements - FRAME_LEN);
+ for (i = 0; i < num_bands - 1; ++i) {
+ WebRtc_MoveReadPtr(aec->outFrBufH[i], out_elements - FRAME_LEN);
+ }
+ }
+ // Obtain an output frame.
+ WebRtc_ReadBuffer(aec->outFrBuf, NULL, &out[0][j], FRAME_LEN);
+ // For H bands.
+ for (i = 1; i < num_bands; ++i) {
+ WebRtc_ReadBuffer(aec->outFrBufH[i - 1], NULL, &out[i][j], FRAME_LEN);
+ }
+ }
+}
+
+int WebRtcAec_GetDelayMetricsCore(AecCore* self, int* median, int* std,
+ float* fraction_poor_delays) {
+ assert(self != NULL);
+ assert(median != NULL);
+ assert(std != NULL);
+
+ if (self->delay_logging_enabled == 0) {
+ // Logging disabled.
+ return -1;
+ }
+
+ if (self->delay_metrics_delivered == 0) {
+ UpdateDelayMetrics(self);
+ self->delay_metrics_delivered = 1;
+ }
+ *median = self->delay_median;
+ *std = self->delay_std;
+ *fraction_poor_delays = self->fraction_poor_delays;
+
+ return 0;
+}
+
+int WebRtcAec_echo_state(AecCore* self) { return self->echoState; }
+
+void WebRtcAec_GetEchoStats(AecCore* self,
+ Stats* erl,
+ Stats* erle,
+ Stats* a_nlp) {
+ assert(erl != NULL);
+ assert(erle != NULL);
+ assert(a_nlp != NULL);
+ *erl = self->erl;
+ *erle = self->erle;
+ *a_nlp = self->aNlp;
+}
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+void* WebRtcAec_far_time_buf(AecCore* self) { return self->far_time_buf; }
+#endif
+
+void WebRtcAec_SetConfigCore(AecCore* self,
+ int nlp_mode,
+ int metrics_mode,
+ int delay_logging) {
+ assert(nlp_mode >= 0 && nlp_mode < 3);
+ self->nlp_mode = nlp_mode;
+ self->metricsMode = metrics_mode;
+ if (self->metricsMode) {
+ InitMetrics(self);
+ }
+ // Turn on delay logging if it is either set explicitly or if delay agnostic
+ // AEC is enabled (which requires delay estimates).
+ self->delay_logging_enabled = delay_logging || self->delay_agnostic_enabled;
+ if (self->delay_logging_enabled) {
+ memset(self->delay_histogram, 0, sizeof(self->delay_histogram));
+ }
+}
+
+void WebRtcAec_enable_delay_agnostic(AecCore* self, int enable) {
+ self->delay_agnostic_enabled = enable;
+}
+
+int WebRtcAec_delay_agnostic_enabled(AecCore* self) {
+ return self->delay_agnostic_enabled;
+}
+
+void WebRtcAec_enable_extended_filter(AecCore* self, int enable) {
+ self->extended_filter_enabled = enable;
+ self->num_partitions = enable ? kExtendedNumPartitions : kNormalNumPartitions;
+ // Update the delay estimator with filter length. See InitAEC() for details.
+ WebRtc_set_allowed_offset(self->delay_estimator, self->num_partitions / 2);
+}
+
+int WebRtcAec_extended_filter_enabled(AecCore* self) {
+ return self->extended_filter_enabled;
+}
+
+int WebRtcAec_system_delay(AecCore* self) { return self->system_delay; }
+
+void WebRtcAec_SetSystemDelay(AecCore* self, int delay) {
+ assert(delay >= 0);
+ self->system_delay = delay;
+}
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core.h b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core.h
new file mode 100644
index 00000000..241f0775
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core.h
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * Specifies the interface for the AEC core.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_
+
+#include <stddef.h>
+
+#include "webrtc/typedefs.h"
+
+#define FRAME_LEN 80
+#define PART_LEN 64 // Length of partition
+#define PART_LEN1 (PART_LEN + 1) // Unique fft coefficients
+#define PART_LEN2 (PART_LEN * 2) // Length of partition * 2
+#define NUM_HIGH_BANDS_MAX 2 // Max number of high bands
+
+typedef float complex_t[2];
+// For performance reasons, some arrays of complex numbers are replaced by twice
+// as long arrays of float, all the real parts followed by all the imaginary
+// ones (complex_t[SIZE] -> float[2][SIZE]). This allows SIMD optimizations and
+// is better than two arrays (one for the real parts and one for the imaginary
+// parts) as this other way would require two pointers instead of one and cause
+// extra register spilling. This also allows the offsets to be calculated at
+// compile time.
+
+// Metrics
+enum {
+ kOffsetLevel = -100
+};
+
+typedef struct Stats {
+ float instant;
+ float average;
+ float min;
+ float max;
+ float sum;
+ float hisum;
+ float himean;
+ int counter;
+ int hicounter;
+} Stats;
+
+typedef struct AecCore AecCore;
+
+AecCore* WebRtcAec_CreateAec(); // Returns NULL on error.
+void WebRtcAec_FreeAec(AecCore* aec);
+int WebRtcAec_InitAec(AecCore* aec, int sampFreq);
+void WebRtcAec_InitAec_SSE2(void);
+#if defined(MIPS_FPU_LE)
+void WebRtcAec_InitAec_mips(void);
+#endif
+#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON)
+void WebRtcAec_InitAec_neon(void);
+#endif
+
+void WebRtcAec_BufferFarendPartition(AecCore* aec, const float* farend);
+void WebRtcAec_ProcessFrames(AecCore* aec,
+ const float* const* nearend,
+ size_t num_bands,
+ size_t num_samples,
+ int knownDelay,
+ float* const* out);
+
+// A helper function to call WebRtc_MoveReadPtr() for all far-end buffers.
+// Returns the number of elements moved, and adjusts |system_delay| by the
+// corresponding amount in ms.
+int WebRtcAec_MoveFarReadPtr(AecCore* aec, int elements);
+
+// Calculates the median, standard deviation and amount of poor values among the
+// delay estimates aggregated up to the first call to the function. After that
+// first call the metrics are aggregated and updated every second. With poor
+// values we mean values that most likely will cause the AEC to perform poorly.
+// TODO(bjornv): Consider changing tests and tools to handle constant
+// constant aggregation window throughout the session instead.
+int WebRtcAec_GetDelayMetricsCore(AecCore* self, int* median, int* std,
+ float* fraction_poor_delays);
+
+// Returns the echo state (1: echo, 0: no echo).
+int WebRtcAec_echo_state(AecCore* self);
+
+// Gets statistics of the echo metrics ERL, ERLE, A_NLP.
+void WebRtcAec_GetEchoStats(AecCore* self,
+ Stats* erl,
+ Stats* erle,
+ Stats* a_nlp);
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+void* WebRtcAec_far_time_buf(AecCore* self);
+#endif
+
+// Sets local configuration modes.
+void WebRtcAec_SetConfigCore(AecCore* self,
+ int nlp_mode,
+ int metrics_mode,
+ int delay_logging);
+
+// Non-zero enables, zero disables.
+void WebRtcAec_enable_delay_agnostic(AecCore* self, int enable);
+
+// Returns non-zero if delay agnostic (i.e., signal based delay estimation) is
+// enabled and zero if disabled.
+int WebRtcAec_delay_agnostic_enabled(AecCore* self);
+
+// Enables or disables extended filter mode. Non-zero enables, zero disables.
+void WebRtcAec_enable_extended_filter(AecCore* self, int enable);
+
+// Returns non-zero if extended filter mode is enabled and zero if disabled.
+int WebRtcAec_extended_filter_enabled(AecCore* self);
+
+// Returns the current |system_delay|, i.e., the buffered difference between
+// far-end and near-end.
+int WebRtcAec_system_delay(AecCore* self);
+
+// Sets the |system_delay| to |value|. Note that if the value is changed
+// improperly, there can be a performance regression. So it should be used with
+// care.
+void WebRtcAec_SetSystemDelay(AecCore* self, int delay);
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_internal.h b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_internal.h
new file mode 100644
index 00000000..2de02837
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_internal.h
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_INTERNAL_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_INTERNAL_H_
+
+#include "webrtc/common_audio/ring_buffer.h"
+#include "webrtc/common_audio/wav_file.h"
+#include "webrtc/modules/audio_processing/aec/aec_common.h"
+#include "webrtc/modules/audio_processing/aec/aec_core.h"
+#include "webrtc/typedefs.h"
+
+// Number of partitions for the extended filter mode. The first one is an enum
+// to be used in array declarations, as it represents the maximum filter length.
+enum {
+ kExtendedNumPartitions = 32
+};
+static const int kNormalNumPartitions = 12;
+
+// Delay estimator constants, used for logging and delay compensation if
+// if reported delays are disabled.
+enum {
+ kLookaheadBlocks = 15
+};
+enum {
+ // 500 ms for 16 kHz which is equivalent with the limit of reported delays.
+ kHistorySizeBlocks = 125
+};
+
+// Extended filter adaptation parameters.
+// TODO(ajm): No narrowband tuning yet.
+static const float kExtendedMu = 0.4f;
+static const float kExtendedErrorThreshold = 1.0e-6f;
+
+typedef struct PowerLevel {
+ float sfrsum;
+ int sfrcounter;
+ float framelevel;
+ float frsum;
+ int frcounter;
+ float minlevel;
+ float averagelevel;
+} PowerLevel;
+
+struct AecCore {
+ int farBufWritePos, farBufReadPos;
+
+ int knownDelay;
+ int inSamples, outSamples;
+ int delayEstCtr;
+
+ RingBuffer* nearFrBuf;
+ RingBuffer* outFrBuf;
+
+ RingBuffer* nearFrBufH[NUM_HIGH_BANDS_MAX];
+ RingBuffer* outFrBufH[NUM_HIGH_BANDS_MAX];
+
+ float dBuf[PART_LEN2]; // nearend
+ float eBuf[PART_LEN2]; // error
+
+ float dBufH[NUM_HIGH_BANDS_MAX][PART_LEN2]; // nearend
+
+ float xPow[PART_LEN1];
+ float dPow[PART_LEN1];
+ float dMinPow[PART_LEN1];
+ float dInitMinPow[PART_LEN1];
+ float* noisePow;
+
+ float xfBuf[2][kExtendedNumPartitions * PART_LEN1]; // farend fft buffer
+ float wfBuf[2][kExtendedNumPartitions * PART_LEN1]; // filter fft
+ complex_t sde[PART_LEN1]; // cross-psd of nearend and error
+ complex_t sxd[PART_LEN1]; // cross-psd of farend and nearend
+ // Farend windowed fft buffer.
+ complex_t xfwBuf[kExtendedNumPartitions * PART_LEN1];
+
+ float sx[PART_LEN1], sd[PART_LEN1], se[PART_LEN1]; // far, near, error psd
+ float hNs[PART_LEN1];
+ float hNlFbMin, hNlFbLocalMin;
+ float hNlXdAvgMin;
+ int hNlNewMin, hNlMinCtr;
+ float overDrive, overDriveSm;
+ int nlp_mode;
+ float outBuf[PART_LEN];
+ int delayIdx;
+
+ short stNearState, echoState;
+ short divergeState;
+
+ int xfBufBlockPos;
+
+ RingBuffer* far_buf;
+ RingBuffer* far_buf_windowed;
+ int system_delay; // Current system delay buffered in AEC.
+
+ int mult; // sampling frequency multiple
+ int sampFreq;
+ size_t num_bands;
+ uint32_t seed;
+
+ float normal_mu; // stepsize
+ float normal_error_threshold; // error threshold
+
+ int noiseEstCtr;
+
+ PowerLevel farlevel;
+ PowerLevel nearlevel;
+ PowerLevel linoutlevel;
+ PowerLevel nlpoutlevel;
+
+ int metricsMode;
+ int stateCounter;
+ Stats erl;
+ Stats erle;
+ Stats aNlp;
+ Stats rerl;
+
+ // Quantities to control H band scaling for SWB input
+ int freq_avg_ic; // initial bin for averaging nlp gain
+ int flag_Hband_cn; // for comfort noise
+ float cn_scale_Hband; // scale for comfort noise in H band
+
+ int delay_metrics_delivered;
+ int delay_histogram[kHistorySizeBlocks];
+ int num_delay_values;
+ int delay_median;
+ int delay_std;
+ float fraction_poor_delays;
+ int delay_logging_enabled;
+ void* delay_estimator_farend;
+ void* delay_estimator;
+ // Variables associated with delay correction through signal based delay
+ // estimation feedback.
+ int signal_delay_correction;
+ int previous_delay;
+ int delay_correction_count;
+ int shift_offset;
+ float delay_quality_threshold;
+ int frame_count;
+
+ // 0 = delay agnostic mode (signal based delay correction) disabled.
+ // Otherwise enabled.
+ int delay_agnostic_enabled;
+ // 1 = extended filter mode enabled, 0 = disabled.
+ int extended_filter_enabled;
+ // Runtime selection of number of filter partitions.
+ int num_partitions;
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+ // Sequence number of this AEC instance, so that different instances can
+ // choose different dump file names.
+ int instance_index;
+
+ // Number of times we've restarted dumping; used to pick new dump file names
+ // each time.
+ int debug_dump_count;
+
+ RingBuffer* far_time_buf;
+ rtc_WavWriter* farFile;
+ rtc_WavWriter* nearFile;
+ rtc_WavWriter* outFile;
+ rtc_WavWriter* outLinearFile;
+ FILE* e_fft_file;
+#endif
+};
+
+typedef void (*WebRtcAecFilterFar)(AecCore* aec, float yf[2][PART_LEN1]);
+extern WebRtcAecFilterFar WebRtcAec_FilterFar;
+typedef void (*WebRtcAecScaleErrorSignal)(AecCore* aec, float ef[2][PART_LEN1]);
+extern WebRtcAecScaleErrorSignal WebRtcAec_ScaleErrorSignal;
+typedef void (*WebRtcAecFilterAdaptation)(AecCore* aec,
+ float* fft,
+ float ef[2][PART_LEN1]);
+extern WebRtcAecFilterAdaptation WebRtcAec_FilterAdaptation;
+typedef void (*WebRtcAecOverdriveAndSuppress)(AecCore* aec,
+ float hNl[PART_LEN1],
+ const float hNlFb,
+ float efw[2][PART_LEN1]);
+extern WebRtcAecOverdriveAndSuppress WebRtcAec_OverdriveAndSuppress;
+
+typedef void (*WebRtcAecComfortNoise)(AecCore* aec,
+ float efw[2][PART_LEN1],
+ complex_t* comfortNoiseHband,
+ const float* noisePow,
+ const float* lambda);
+extern WebRtcAecComfortNoise WebRtcAec_ComfortNoise;
+
+typedef void (*WebRtcAecSubBandCoherence)(AecCore* aec,
+ float efw[2][PART_LEN1],
+ float xfw[2][PART_LEN1],
+ float* fft,
+ float* cohde,
+ float* cohxd);
+extern WebRtcAecSubBandCoherence WebRtcAec_SubbandCoherence;
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_INTERNAL_H_
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_mips.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_mips.c
new file mode 100644
index 00000000..bb33087a
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_mips.c
@@ -0,0 +1,774 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * The core AEC algorithm, which is presented with time-aligned signals.
+ */
+
+#include "webrtc/modules/audio_processing/aec/aec_core.h"
+
+#include <math.h>
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_processing/aec/aec_core_internal.h"
+#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
+
+static const int flagHbandCn = 1; // flag for adding comfort noise in H band
+extern const float WebRtcAec_weightCurve[65];
+extern const float WebRtcAec_overDriveCurve[65];
+
+void WebRtcAec_ComfortNoise_mips(AecCore* aec,
+ float efw[2][PART_LEN1],
+ complex_t* comfortNoiseHband,
+ const float* noisePow,
+ const float* lambda) {
+ int i, num;
+ float rand[PART_LEN];
+ float noise, noiseAvg, tmp, tmpAvg;
+ int16_t randW16[PART_LEN];
+ complex_t u[PART_LEN1];
+
+ const float pi2 = 6.28318530717959f;
+ const float pi2t = pi2 / 32768;
+
+ // Generate a uniform random array on [0 1]
+ WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed);
+
+ int16_t* randWptr = randW16;
+ float randTemp, randTemp2, randTemp3, randTemp4;
+ int32_t tmp1s, tmp2s, tmp3s, tmp4s;
+
+ for (i = 0; i < PART_LEN; i+=4) {
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "lh %[tmp1s], 0(%[randWptr]) \n\t"
+ "lh %[tmp2s], 2(%[randWptr]) \n\t"
+ "lh %[tmp3s], 4(%[randWptr]) \n\t"
+ "lh %[tmp4s], 6(%[randWptr]) \n\t"
+ "mtc1 %[tmp1s], %[randTemp] \n\t"
+ "mtc1 %[tmp2s], %[randTemp2] \n\t"
+ "mtc1 %[tmp3s], %[randTemp3] \n\t"
+ "mtc1 %[tmp4s], %[randTemp4] \n\t"
+ "cvt.s.w %[randTemp], %[randTemp] \n\t"
+ "cvt.s.w %[randTemp2], %[randTemp2] \n\t"
+ "cvt.s.w %[randTemp3], %[randTemp3] \n\t"
+ "cvt.s.w %[randTemp4], %[randTemp4] \n\t"
+ "addiu %[randWptr], %[randWptr], 8 \n\t"
+ "mul.s %[randTemp], %[randTemp], %[pi2t] \n\t"
+ "mul.s %[randTemp2], %[randTemp2], %[pi2t] \n\t"
+ "mul.s %[randTemp3], %[randTemp3], %[pi2t] \n\t"
+ "mul.s %[randTemp4], %[randTemp4], %[pi2t] \n\t"
+ ".set pop \n\t"
+ : [randWptr] "+r" (randWptr), [randTemp] "=&f" (randTemp),
+ [randTemp2] "=&f" (randTemp2), [randTemp3] "=&f" (randTemp3),
+ [randTemp4] "=&f" (randTemp4), [tmp1s] "=&r" (tmp1s),
+ [tmp2s] "=&r" (tmp2s), [tmp3s] "=&r" (tmp3s),
+ [tmp4s] "=&r" (tmp4s)
+ : [pi2t] "f" (pi2t)
+ : "memory"
+ );
+
+ u[i+1][0] = cosf(randTemp);
+ u[i+1][1] = sinf(randTemp);
+ u[i+2][0] = cosf(randTemp2);
+ u[i+2][1] = sinf(randTemp2);
+ u[i+3][0] = cosf(randTemp3);
+ u[i+3][1] = sinf(randTemp3);
+ u[i+4][0] = cosf(randTemp4);
+ u[i+4][1] = sinf(randTemp4);
+ }
+
+ // Reject LF noise
+ float* u_ptr = &u[1][0];
+ float noise2, noise3, noise4;
+ float tmp1f, tmp2f, tmp3f, tmp4f, tmp5f, tmp6f, tmp7f, tmp8f;
+
+ u[0][0] = 0;
+ u[0][1] = 0;
+ for (i = 1; i < PART_LEN1; i+=4) {
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "lwc1 %[noise], 4(%[noisePow]) \n\t"
+ "lwc1 %[noise2], 8(%[noisePow]) \n\t"
+ "lwc1 %[noise3], 12(%[noisePow]) \n\t"
+ "lwc1 %[noise4], 16(%[noisePow]) \n\t"
+ "sqrt.s %[noise], %[noise] \n\t"
+ "sqrt.s %[noise2], %[noise2] \n\t"
+ "sqrt.s %[noise3], %[noise3] \n\t"
+ "sqrt.s %[noise4], %[noise4] \n\t"
+ "lwc1 %[tmp1f], 0(%[u_ptr]) \n\t"
+ "lwc1 %[tmp2f], 4(%[u_ptr]) \n\t"
+ "lwc1 %[tmp3f], 8(%[u_ptr]) \n\t"
+ "lwc1 %[tmp4f], 12(%[u_ptr]) \n\t"
+ "lwc1 %[tmp5f], 16(%[u_ptr]) \n\t"
+ "lwc1 %[tmp6f], 20(%[u_ptr]) \n\t"
+ "lwc1 %[tmp7f], 24(%[u_ptr]) \n\t"
+ "lwc1 %[tmp8f], 28(%[u_ptr]) \n\t"
+ "addiu %[noisePow], %[noisePow], 16 \n\t"
+ "mul.s %[tmp1f], %[tmp1f], %[noise] \n\t"
+ "mul.s %[tmp2f], %[tmp2f], %[noise] \n\t"
+ "mul.s %[tmp3f], %[tmp3f], %[noise2] \n\t"
+ "mul.s %[tmp4f], %[tmp4f], %[noise2] \n\t"
+ "mul.s %[tmp5f], %[tmp5f], %[noise3] \n\t"
+ "mul.s %[tmp6f], %[tmp6f], %[noise3] \n\t"
+ "swc1 %[tmp1f], 0(%[u_ptr]) \n\t"
+ "swc1 %[tmp3f], 8(%[u_ptr]) \n\t"
+ "mul.s %[tmp8f], %[tmp8f], %[noise4] \n\t"
+ "mul.s %[tmp7f], %[tmp7f], %[noise4] \n\t"
+ "neg.s %[tmp2f] \n\t"
+ "neg.s %[tmp4f] \n\t"
+ "neg.s %[tmp6f] \n\t"
+ "neg.s %[tmp8f] \n\t"
+ "swc1 %[tmp5f], 16(%[u_ptr]) \n\t"
+ "swc1 %[tmp7f], 24(%[u_ptr]) \n\t"
+ "swc1 %[tmp2f], 4(%[u_ptr]) \n\t"
+ "swc1 %[tmp4f], 12(%[u_ptr]) \n\t"
+ "swc1 %[tmp6f], 20(%[u_ptr]) \n\t"
+ "swc1 %[tmp8f], 28(%[u_ptr]) \n\t"
+ "addiu %[u_ptr], %[u_ptr], 32 \n\t"
+ ".set pop \n\t"
+ : [u_ptr] "+r" (u_ptr), [noisePow] "+r" (noisePow),
+ [noise] "=&f" (noise), [noise2] "=&f" (noise2),
+ [noise3] "=&f" (noise3), [noise4] "=&f" (noise4),
+ [tmp1f] "=&f" (tmp1f), [tmp2f] "=&f" (tmp2f),
+ [tmp3f] "=&f" (tmp3f), [tmp4f] "=&f" (tmp4f),
+ [tmp5f] "=&f" (tmp5f), [tmp6f] "=&f" (tmp6f),
+ [tmp7f] "=&f" (tmp7f), [tmp8f] "=&f" (tmp8f)
+ :
+ : "memory"
+ );
+ }
+ u[PART_LEN][1] = 0;
+ noisePow -= PART_LEN;
+
+ u_ptr = &u[0][0];
+ float* u_ptr_end = &u[PART_LEN][0];
+ float* efw_ptr_0 = &efw[0][0];
+ float* efw_ptr_1 = &efw[1][0];
+ float tmp9f, tmp10f;
+ const float tmp1c = 1.0;
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "1: \n\t"
+ "lwc1 %[tmp1f], 0(%[lambda]) \n\t"
+ "lwc1 %[tmp6f], 4(%[lambda]) \n\t"
+ "addiu %[lambda], %[lambda], 8 \n\t"
+ "c.lt.s %[tmp1f], %[tmp1c] \n\t"
+ "bc1f 4f \n\t"
+ " nop \n\t"
+ "c.lt.s %[tmp6f], %[tmp1c] \n\t"
+ "bc1f 3f \n\t"
+ " nop \n\t"
+ "2: \n\t"
+ "mul.s %[tmp1f], %[tmp1f], %[tmp1f] \n\t"
+ "mul.s %[tmp6f], %[tmp6f], %[tmp6f] \n\t"
+ "sub.s %[tmp1f], %[tmp1c], %[tmp1f] \n\t"
+ "sub.s %[tmp6f], %[tmp1c], %[tmp6f] \n\t"
+ "sqrt.s %[tmp1f], %[tmp1f] \n\t"
+ "sqrt.s %[tmp6f], %[tmp6f] \n\t"
+ "lwc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t"
+ "lwc1 %[tmp3f], 0(%[u_ptr]) \n\t"
+ "lwc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t"
+ "lwc1 %[tmp8f], 8(%[u_ptr]) \n\t"
+ "lwc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t"
+ "lwc1 %[tmp5f], 4(%[u_ptr]) \n\t"
+ "lwc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t"
+ "lwc1 %[tmp10f], 12(%[u_ptr]) \n\t"
+#if !defined(MIPS32_R2_LE)
+ "mul.s %[tmp3f], %[tmp1f], %[tmp3f] \n\t"
+ "add.s %[tmp2f], %[tmp2f], %[tmp3f] \n\t"
+ "mul.s %[tmp3f], %[tmp1f], %[tmp5f] \n\t"
+ "add.s %[tmp4f], %[tmp4f], %[tmp3f] \n\t"
+ "mul.s %[tmp3f], %[tmp6f], %[tmp8f] \n\t"
+ "add.s %[tmp7f], %[tmp7f], %[tmp3f] \n\t"
+ "mul.s %[tmp3f], %[tmp6f], %[tmp10f] \n\t"
+ "add.s %[tmp9f], %[tmp9f], %[tmp3f] \n\t"
+#else // #if !defined(MIPS32_R2_LE)
+ "madd.s %[tmp2f], %[tmp2f], %[tmp1f], %[tmp3f] \n\t"
+ "madd.s %[tmp4f], %[tmp4f], %[tmp1f], %[tmp5f] \n\t"
+ "madd.s %[tmp7f], %[tmp7f], %[tmp6f], %[tmp8f] \n\t"
+ "madd.s %[tmp9f], %[tmp9f], %[tmp6f], %[tmp10f] \n\t"
+#endif // #if !defined(MIPS32_R2_LE)
+ "swc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t"
+ "swc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t"
+ "swc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t"
+ "b 5f \n\t"
+ " swc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t"
+ "3: \n\t"
+ "mul.s %[tmp1f], %[tmp1f], %[tmp1f] \n\t"
+ "sub.s %[tmp1f], %[tmp1c], %[tmp1f] \n\t"
+ "sqrt.s %[tmp1f], %[tmp1f] \n\t"
+ "lwc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t"
+ "lwc1 %[tmp3f], 0(%[u_ptr]) \n\t"
+ "lwc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t"
+ "lwc1 %[tmp5f], 4(%[u_ptr]) \n\t"
+#if !defined(MIPS32_R2_LE)
+ "mul.s %[tmp3f], %[tmp1f], %[tmp3f] \n\t"
+ "add.s %[tmp2f], %[tmp2f], %[tmp3f] \n\t"
+ "mul.s %[tmp3f], %[tmp1f], %[tmp5f] \n\t"
+ "add.s %[tmp4f], %[tmp4f], %[tmp3f] \n\t"
+#else // #if !defined(MIPS32_R2_LE)
+ "madd.s %[tmp2f], %[tmp2f], %[tmp1f], %[tmp3f] \n\t"
+ "madd.s %[tmp4f], %[tmp4f], %[tmp1f], %[tmp5f] \n\t"
+#endif // #if !defined(MIPS32_R2_LE)
+ "swc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t"
+ "b 5f \n\t"
+ " swc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t"
+ "4: \n\t"
+ "c.lt.s %[tmp6f], %[tmp1c] \n\t"
+ "bc1f 5f \n\t"
+ " nop \n\t"
+ "mul.s %[tmp6f], %[tmp6f], %[tmp6f] \n\t"
+ "sub.s %[tmp6f], %[tmp1c], %[tmp6f] \n\t"
+ "sqrt.s %[tmp6f], %[tmp6f] \n\t"
+ "lwc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t"
+ "lwc1 %[tmp8f], 8(%[u_ptr]) \n\t"
+ "lwc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t"
+ "lwc1 %[tmp10f], 12(%[u_ptr]) \n\t"
+#if !defined(MIPS32_R2_LE)
+ "mul.s %[tmp3f], %[tmp6f], %[tmp8f] \n\t"
+ "add.s %[tmp7f], %[tmp7f], %[tmp3f] \n\t"
+ "mul.s %[tmp3f], %[tmp6f], %[tmp10f] \n\t"
+ "add.s %[tmp9f], %[tmp9f], %[tmp3f] \n\t"
+#else // #if !defined(MIPS32_R2_LE)
+ "madd.s %[tmp7f], %[tmp7f], %[tmp6f], %[tmp8f] \n\t"
+ "madd.s %[tmp9f], %[tmp9f], %[tmp6f], %[tmp10f] \n\t"
+#endif // #if !defined(MIPS32_R2_LE)
+ "swc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t"
+ "swc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t"
+ "5: \n\t"
+ "addiu %[u_ptr], %[u_ptr], 16 \n\t"
+ "addiu %[efw_ptr_0], %[efw_ptr_0], 8 \n\t"
+ "bne %[u_ptr], %[u_ptr_end], 1b \n\t"
+ " addiu %[efw_ptr_1], %[efw_ptr_1], 8 \n\t"
+ ".set pop \n\t"
+ : [lambda] "+r" (lambda), [u_ptr] "+r" (u_ptr),
+ [efw_ptr_0] "+r" (efw_ptr_0), [efw_ptr_1] "+r" (efw_ptr_1),
+ [tmp1f] "=&f" (tmp1f), [tmp2f] "=&f" (tmp2f), [tmp3f] "=&f" (tmp3f),
+ [tmp4f] "=&f" (tmp4f), [tmp5f] "=&f" (tmp5f),
+ [tmp6f] "=&f" (tmp6f), [tmp7f] "=&f" (tmp7f), [tmp8f] "=&f" (tmp8f),
+ [tmp9f] "=&f" (tmp9f), [tmp10f] "=&f" (tmp10f)
+ : [tmp1c] "f" (tmp1c), [u_ptr_end] "r" (u_ptr_end)
+ : "memory"
+ );
+
+ lambda -= PART_LEN;
+ tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[PART_LEN] * lambda[PART_LEN], 0));
+ //tmp = 1 - lambda[i];
+ efw[0][PART_LEN] += tmp * u[PART_LEN][0];
+ efw[1][PART_LEN] += tmp * u[PART_LEN][1];
+
+ // For H band comfort noise
+ // TODO: don't compute noise and "tmp" twice. Use the previous results.
+ noiseAvg = 0.0;
+ tmpAvg = 0.0;
+ num = 0;
+ if ((aec->sampFreq == 32000 || aec->sampFreq == 48000) && flagHbandCn == 1) {
+ for (i = 0; i < PART_LEN; i++) {
+ rand[i] = ((float)randW16[i]) / 32768;
+ }
+
+ // average noise scale
+ // average over second half of freq spectrum (i.e., 4->8khz)
+ // TODO: we shouldn't need num. We know how many elements we're summing.
+ for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
+ num++;
+ noiseAvg += sqrtf(noisePow[i]);
+ }
+ noiseAvg /= (float)num;
+
+ // average nlp scale
+ // average over second half of freq spectrum (i.e., 4->8khz)
+ // TODO: we shouldn't need num. We know how many elements we're summing.
+ num = 0;
+ for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
+ num++;
+ tmpAvg += sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0));
+ }
+ tmpAvg /= (float)num;
+
+ // Use average noise for H band
+ // TODO: we should probably have a new random vector here.
+ // Reject LF noise
+ u[0][0] = 0;
+ u[0][1] = 0;
+ for (i = 1; i < PART_LEN1; i++) {
+ tmp = pi2 * rand[i - 1];
+
+ // Use average noise for H band
+ u[i][0] = noiseAvg * (float)cos(tmp);
+ u[i][1] = -noiseAvg * (float)sin(tmp);
+ }
+ u[PART_LEN][1] = 0;
+
+ for (i = 0; i < PART_LEN1; i++) {
+ // Use average NLP weight for H band
+ comfortNoiseHband[i][0] = tmpAvg * u[i][0];
+ comfortNoiseHband[i][1] = tmpAvg * u[i][1];
+ }
+ }
+}
+
+void WebRtcAec_FilterFar_mips(AecCore* aec, float yf[2][PART_LEN1]) {
+ int i;
+ for (i = 0; i < aec->num_partitions; i++) {
+ int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
+ int pos = i * PART_LEN1;
+ // Check for wrap
+ if (i + aec->xfBufBlockPos >= aec->num_partitions) {
+ xPos -= aec->num_partitions * (PART_LEN1);
+ }
+ float* yf0 = yf[0];
+ float* yf1 = yf[1];
+ float* aRe = aec->xfBuf[0] + xPos;
+ float* aIm = aec->xfBuf[1] + xPos;
+ float* bRe = aec->wfBuf[0] + pos;
+ float* bIm = aec->wfBuf[1] + pos;
+ float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13;
+ int len = PART_LEN1 >> 1;
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "1: \n\t"
+ "lwc1 %[f0], 0(%[aRe]) \n\t"
+ "lwc1 %[f1], 0(%[bRe]) \n\t"
+ "lwc1 %[f2], 0(%[bIm]) \n\t"
+ "lwc1 %[f3], 0(%[aIm]) \n\t"
+ "lwc1 %[f4], 4(%[aRe]) \n\t"
+ "lwc1 %[f5], 4(%[bRe]) \n\t"
+ "lwc1 %[f6], 4(%[bIm]) \n\t"
+ "mul.s %[f8], %[f0], %[f1] \n\t"
+ "mul.s %[f0], %[f0], %[f2] \n\t"
+ "mul.s %[f9], %[f4], %[f5] \n\t"
+ "mul.s %[f4], %[f4], %[f6] \n\t"
+ "lwc1 %[f7], 4(%[aIm]) \n\t"
+#if !defined(MIPS32_R2_LE)
+ "mul.s %[f12], %[f2], %[f3] \n\t"
+ "mul.s %[f1], %[f3], %[f1] \n\t"
+ "mul.s %[f11], %[f6], %[f7] \n\t"
+ "addiu %[aRe], %[aRe], 8 \n\t"
+ "addiu %[aIm], %[aIm], 8 \n\t"
+ "addiu %[len], %[len], -1 \n\t"
+ "sub.s %[f8], %[f8], %[f12] \n\t"
+ "mul.s %[f12], %[f7], %[f5] \n\t"
+ "lwc1 %[f2], 0(%[yf0]) \n\t"
+ "add.s %[f1], %[f0], %[f1] \n\t"
+ "lwc1 %[f3], 0(%[yf1]) \n\t"
+ "sub.s %[f9], %[f9], %[f11] \n\t"
+ "lwc1 %[f6], 4(%[yf0]) \n\t"
+ "add.s %[f4], %[f4], %[f12] \n\t"
+#else // #if !defined(MIPS32_R2_LE)
+ "addiu %[aRe], %[aRe], 8 \n\t"
+ "addiu %[aIm], %[aIm], 8 \n\t"
+ "addiu %[len], %[len], -1 \n\t"
+ "nmsub.s %[f8], %[f8], %[f2], %[f3] \n\t"
+ "lwc1 %[f2], 0(%[yf0]) \n\t"
+ "madd.s %[f1], %[f0], %[f3], %[f1] \n\t"
+ "lwc1 %[f3], 0(%[yf1]) \n\t"
+ "nmsub.s %[f9], %[f9], %[f6], %[f7] \n\t"
+ "lwc1 %[f6], 4(%[yf0]) \n\t"
+ "madd.s %[f4], %[f4], %[f7], %[f5] \n\t"
+#endif // #if !defined(MIPS32_R2_LE)
+ "lwc1 %[f5], 4(%[yf1]) \n\t"
+ "add.s %[f2], %[f2], %[f8] \n\t"
+ "addiu %[bRe], %[bRe], 8 \n\t"
+ "addiu %[bIm], %[bIm], 8 \n\t"
+ "add.s %[f3], %[f3], %[f1] \n\t"
+ "add.s %[f6], %[f6], %[f9] \n\t"
+ "add.s %[f5], %[f5], %[f4] \n\t"
+ "swc1 %[f2], 0(%[yf0]) \n\t"
+ "swc1 %[f3], 0(%[yf1]) \n\t"
+ "swc1 %[f6], 4(%[yf0]) \n\t"
+ "swc1 %[f5], 4(%[yf1]) \n\t"
+ "addiu %[yf0], %[yf0], 8 \n\t"
+ "bgtz %[len], 1b \n\t"
+ " addiu %[yf1], %[yf1], 8 \n\t"
+ "lwc1 %[f0], 0(%[aRe]) \n\t"
+ "lwc1 %[f1], 0(%[bRe]) \n\t"
+ "lwc1 %[f2], 0(%[bIm]) \n\t"
+ "lwc1 %[f3], 0(%[aIm]) \n\t"
+ "mul.s %[f8], %[f0], %[f1] \n\t"
+ "mul.s %[f0], %[f0], %[f2] \n\t"
+#if !defined(MIPS32_R2_LE)
+ "mul.s %[f12], %[f2], %[f3] \n\t"
+ "mul.s %[f1], %[f3], %[f1] \n\t"
+ "sub.s %[f8], %[f8], %[f12] \n\t"
+ "lwc1 %[f2], 0(%[yf0]) \n\t"
+ "add.s %[f1], %[f0], %[f1] \n\t"
+ "lwc1 %[f3], 0(%[yf1]) \n\t"
+#else // #if !defined(MIPS32_R2_LE)
+ "nmsub.s %[f8], %[f8], %[f2], %[f3] \n\t"
+ "lwc1 %[f2], 0(%[yf0]) \n\t"
+ "madd.s %[f1], %[f0], %[f3], %[f1] \n\t"
+ "lwc1 %[f3], 0(%[yf1]) \n\t"
+#endif // #if !defined(MIPS32_R2_LE)
+ "add.s %[f2], %[f2], %[f8] \n\t"
+ "add.s %[f3], %[f3], %[f1] \n\t"
+ "swc1 %[f2], 0(%[yf0]) \n\t"
+ "swc1 %[f3], 0(%[yf1]) \n\t"
+ ".set pop \n\t"
+ : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
+ [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
+ [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8),
+ [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11),
+ [f12] "=&f" (f12), [f13] "=&f" (f13), [aRe] "+r" (aRe),
+ [aIm] "+r" (aIm), [bRe] "+r" (bRe), [bIm] "+r" (bIm),
+ [yf0] "+r" (yf0), [yf1] "+r" (yf1), [len] "+r" (len)
+ :
+ : "memory"
+ );
+ }
+}
+
+void WebRtcAec_FilterAdaptation_mips(AecCore* aec,
+ float* fft,
+ float ef[2][PART_LEN1]) {
+ int i;
+ for (i = 0; i < aec->num_partitions; i++) {
+ int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1);
+ int pos;
+ // Check for wrap
+ if (i + aec->xfBufBlockPos >= aec->num_partitions) {
+ xPos -= aec->num_partitions * PART_LEN1;
+ }
+
+ pos = i * PART_LEN1;
+ float* aRe = aec->xfBuf[0] + xPos;
+ float* aIm = aec->xfBuf[1] + xPos;
+ float* bRe = ef[0];
+ float* bIm = ef[1];
+ float* fft_tmp;
+
+ float f0, f1, f2, f3, f4, f5, f6 ,f7, f8, f9, f10, f11, f12;
+ int len = PART_LEN >> 1;
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "addiu %[fft_tmp], %[fft], 0 \n\t"
+ "1: \n\t"
+ "lwc1 %[f0], 0(%[aRe]) \n\t"
+ "lwc1 %[f1], 0(%[bRe]) \n\t"
+ "lwc1 %[f2], 0(%[bIm]) \n\t"
+ "lwc1 %[f4], 4(%[aRe]) \n\t"
+ "lwc1 %[f5], 4(%[bRe]) \n\t"
+ "lwc1 %[f6], 4(%[bIm]) \n\t"
+ "addiu %[aRe], %[aRe], 8 \n\t"
+ "addiu %[bRe], %[bRe], 8 \n\t"
+ "mul.s %[f8], %[f0], %[f1] \n\t"
+ "mul.s %[f0], %[f0], %[f2] \n\t"
+ "lwc1 %[f3], 0(%[aIm]) \n\t"
+ "mul.s %[f9], %[f4], %[f5] \n\t"
+ "lwc1 %[f7], 4(%[aIm]) \n\t"
+ "mul.s %[f4], %[f4], %[f6] \n\t"
+#if !defined(MIPS32_R2_LE)
+ "mul.s %[f10], %[f3], %[f2] \n\t"
+ "mul.s %[f1], %[f3], %[f1] \n\t"
+ "mul.s %[f11], %[f7], %[f6] \n\t"
+ "mul.s %[f5], %[f7], %[f5] \n\t"
+ "addiu %[aIm], %[aIm], 8 \n\t"
+ "addiu %[bIm], %[bIm], 8 \n\t"
+ "addiu %[len], %[len], -1 \n\t"
+ "add.s %[f8], %[f8], %[f10] \n\t"
+ "sub.s %[f1], %[f0], %[f1] \n\t"
+ "add.s %[f9], %[f9], %[f11] \n\t"
+ "sub.s %[f5], %[f4], %[f5] \n\t"
+#else // #if !defined(MIPS32_R2_LE)
+ "addiu %[aIm], %[aIm], 8 \n\t"
+ "addiu %[bIm], %[bIm], 8 \n\t"
+ "addiu %[len], %[len], -1 \n\t"
+ "madd.s %[f8], %[f8], %[f3], %[f2] \n\t"
+ "nmsub.s %[f1], %[f0], %[f3], %[f1] \n\t"
+ "madd.s %[f9], %[f9], %[f7], %[f6] \n\t"
+ "nmsub.s %[f5], %[f4], %[f7], %[f5] \n\t"
+#endif // #if !defined(MIPS32_R2_LE)
+ "swc1 %[f8], 0(%[fft_tmp]) \n\t"
+ "swc1 %[f1], 4(%[fft_tmp]) \n\t"
+ "swc1 %[f9], 8(%[fft_tmp]) \n\t"
+ "swc1 %[f5], 12(%[fft_tmp]) \n\t"
+ "bgtz %[len], 1b \n\t"
+ " addiu %[fft_tmp], %[fft_tmp], 16 \n\t"
+ "lwc1 %[f0], 0(%[aRe]) \n\t"
+ "lwc1 %[f1], 0(%[bRe]) \n\t"
+ "lwc1 %[f2], 0(%[bIm]) \n\t"
+ "lwc1 %[f3], 0(%[aIm]) \n\t"
+ "mul.s %[f8], %[f0], %[f1] \n\t"
+#if !defined(MIPS32_R2_LE)
+ "mul.s %[f10], %[f3], %[f2] \n\t"
+ "add.s %[f8], %[f8], %[f10] \n\t"
+#else // #if !defined(MIPS32_R2_LE)
+ "madd.s %[f8], %[f8], %[f3], %[f2] \n\t"
+#endif // #if !defined(MIPS32_R2_LE)
+ "swc1 %[f8], 4(%[fft]) \n\t"
+ ".set pop \n\t"
+ : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
+ [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
+ [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8),
+ [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11),
+ [f12] "=&f" (f12), [aRe] "+r" (aRe), [aIm] "+r" (aIm),
+ [bRe] "+r" (bRe), [bIm] "+r" (bIm), [fft_tmp] "=&r" (fft_tmp),
+ [len] "+r" (len)
+ : [fft] "r" (fft)
+ : "memory"
+ );
+
+ aec_rdft_inverse_128(fft);
+ memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
+
+ // fft scaling
+ {
+ float scale = 2.0f / PART_LEN2;
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "addiu %[fft_tmp], %[fft], 0 \n\t"
+ "addiu %[len], $zero, 8 \n\t"
+ "1: \n\t"
+ "addiu %[len], %[len], -1 \n\t"
+ "lwc1 %[f0], 0(%[fft_tmp]) \n\t"
+ "lwc1 %[f1], 4(%[fft_tmp]) \n\t"
+ "lwc1 %[f2], 8(%[fft_tmp]) \n\t"
+ "lwc1 %[f3], 12(%[fft_tmp]) \n\t"
+ "mul.s %[f0], %[f0], %[scale] \n\t"
+ "mul.s %[f1], %[f1], %[scale] \n\t"
+ "mul.s %[f2], %[f2], %[scale] \n\t"
+ "mul.s %[f3], %[f3], %[scale] \n\t"
+ "lwc1 %[f4], 16(%[fft_tmp]) \n\t"
+ "lwc1 %[f5], 20(%[fft_tmp]) \n\t"
+ "lwc1 %[f6], 24(%[fft_tmp]) \n\t"
+ "lwc1 %[f7], 28(%[fft_tmp]) \n\t"
+ "mul.s %[f4], %[f4], %[scale] \n\t"
+ "mul.s %[f5], %[f5], %[scale] \n\t"
+ "mul.s %[f6], %[f6], %[scale] \n\t"
+ "mul.s %[f7], %[f7], %[scale] \n\t"
+ "swc1 %[f0], 0(%[fft_tmp]) \n\t"
+ "swc1 %[f1], 4(%[fft_tmp]) \n\t"
+ "swc1 %[f2], 8(%[fft_tmp]) \n\t"
+ "swc1 %[f3], 12(%[fft_tmp]) \n\t"
+ "swc1 %[f4], 16(%[fft_tmp]) \n\t"
+ "swc1 %[f5], 20(%[fft_tmp]) \n\t"
+ "swc1 %[f6], 24(%[fft_tmp]) \n\t"
+ "swc1 %[f7], 28(%[fft_tmp]) \n\t"
+ "bgtz %[len], 1b \n\t"
+ " addiu %[fft_tmp], %[fft_tmp], 32 \n\t"
+ ".set pop \n\t"
+ : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
+ [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
+ [f6] "=&f" (f6), [f7] "=&f" (f7), [len] "=&r" (len),
+ [fft_tmp] "=&r" (fft_tmp)
+ : [scale] "f" (scale), [fft] "r" (fft)
+ : "memory"
+ );
+ }
+ aec_rdft_forward_128(fft);
+ aRe = aec->wfBuf[0] + pos;
+ aIm = aec->wfBuf[1] + pos;
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "addiu %[fft_tmp], %[fft], 0 \n\t"
+ "addiu %[len], $zero, 31 \n\t"
+ "lwc1 %[f0], 0(%[aRe]) \n\t"
+ "lwc1 %[f1], 0(%[fft_tmp]) \n\t"
+ "lwc1 %[f2], 256(%[aRe]) \n\t"
+ "lwc1 %[f3], 4(%[fft_tmp]) \n\t"
+ "lwc1 %[f4], 4(%[aRe]) \n\t"
+ "lwc1 %[f5], 8(%[fft_tmp]) \n\t"
+ "lwc1 %[f6], 4(%[aIm]) \n\t"
+ "lwc1 %[f7], 12(%[fft_tmp]) \n\t"
+ "add.s %[f0], %[f0], %[f1] \n\t"
+ "add.s %[f2], %[f2], %[f3] \n\t"
+ "add.s %[f4], %[f4], %[f5] \n\t"
+ "add.s %[f6], %[f6], %[f7] \n\t"
+ "addiu %[fft_tmp], %[fft_tmp], 16 \n\t"
+ "swc1 %[f0], 0(%[aRe]) \n\t"
+ "swc1 %[f2], 256(%[aRe]) \n\t"
+ "swc1 %[f4], 4(%[aRe]) \n\t"
+ "addiu %[aRe], %[aRe], 8 \n\t"
+ "swc1 %[f6], 4(%[aIm]) \n\t"
+ "addiu %[aIm], %[aIm], 8 \n\t"
+ "1: \n\t"
+ "lwc1 %[f0], 0(%[aRe]) \n\t"
+ "lwc1 %[f1], 0(%[fft_tmp]) \n\t"
+ "lwc1 %[f2], 0(%[aIm]) \n\t"
+ "lwc1 %[f3], 4(%[fft_tmp]) \n\t"
+ "lwc1 %[f4], 4(%[aRe]) \n\t"
+ "lwc1 %[f5], 8(%[fft_tmp]) \n\t"
+ "lwc1 %[f6], 4(%[aIm]) \n\t"
+ "lwc1 %[f7], 12(%[fft_tmp]) \n\t"
+ "add.s %[f0], %[f0], %[f1] \n\t"
+ "add.s %[f2], %[f2], %[f3] \n\t"
+ "add.s %[f4], %[f4], %[f5] \n\t"
+ "add.s %[f6], %[f6], %[f7] \n\t"
+ "addiu %[len], %[len], -1 \n\t"
+ "addiu %[fft_tmp], %[fft_tmp], 16 \n\t"
+ "swc1 %[f0], 0(%[aRe]) \n\t"
+ "swc1 %[f2], 0(%[aIm]) \n\t"
+ "swc1 %[f4], 4(%[aRe]) \n\t"
+ "addiu %[aRe], %[aRe], 8 \n\t"
+ "swc1 %[f6], 4(%[aIm]) \n\t"
+ "bgtz %[len], 1b \n\t"
+ " addiu %[aIm], %[aIm], 8 \n\t"
+ ".set pop \n\t"
+ : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
+ [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
+ [f6] "=&f" (f6), [f7] "=&f" (f7), [len] "=&r" (len),
+ [fft_tmp] "=&r" (fft_tmp), [aRe] "+r" (aRe), [aIm] "+r" (aIm)
+ : [fft] "r" (fft)
+ : "memory"
+ );
+ }
+}
+
+void WebRtcAec_OverdriveAndSuppress_mips(AecCore* aec,
+ float hNl[PART_LEN1],
+ const float hNlFb,
+ float efw[2][PART_LEN1]) {
+ int i;
+ const float one = 1.0;
+ float* p_hNl;
+ float* p_efw0;
+ float* p_efw1;
+ float* p_WebRtcAec_wC;
+ float temp1, temp2, temp3, temp4;
+
+ p_hNl = &hNl[0];
+ p_efw0 = &efw[0][0];
+ p_efw1 = &efw[1][0];
+ p_WebRtcAec_wC = (float*)&WebRtcAec_weightCurve[0];
+
+ for (i = 0; i < PART_LEN1; i++) {
+ // Weight subbands
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "lwc1 %[temp1], 0(%[p_hNl]) \n\t"
+ "lwc1 %[temp2], 0(%[p_wC]) \n\t"
+ "c.lt.s %[hNlFb], %[temp1] \n\t"
+ "bc1f 1f \n\t"
+ " mul.s %[temp3], %[temp2], %[hNlFb] \n\t"
+ "sub.s %[temp4], %[one], %[temp2] \n\t"
+#if !defined(MIPS32_R2_LE)
+ "mul.s %[temp1], %[temp1], %[temp4] \n\t"
+ "add.s %[temp1], %[temp3], %[temp1] \n\t"
+#else // #if !defined(MIPS32_R2_LE)
+ "madd.s %[temp1], %[temp3], %[temp1], %[temp4] \n\t"
+#endif // #if !defined(MIPS32_R2_LE)
+ "swc1 %[temp1], 0(%[p_hNl]) \n\t"
+ "1: \n\t"
+ "addiu %[p_wC], %[p_wC], 4 \n\t"
+ ".set pop \n\t"
+ : [temp1] "=&f" (temp1), [temp2] "=&f" (temp2), [temp3] "=&f" (temp3),
+ [temp4] "=&f" (temp4), [p_wC] "+r" (p_WebRtcAec_wC)
+ : [hNlFb] "f" (hNlFb), [one] "f" (one), [p_hNl] "r" (p_hNl)
+ : "memory"
+ );
+
+ hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]);
+
+ __asm __volatile (
+ "lwc1 %[temp1], 0(%[p_hNl]) \n\t"
+ "lwc1 %[temp3], 0(%[p_efw1]) \n\t"
+ "lwc1 %[temp2], 0(%[p_efw0]) \n\t"
+ "addiu %[p_hNl], %[p_hNl], 4 \n\t"
+ "mul.s %[temp3], %[temp3], %[temp1] \n\t"
+ "mul.s %[temp2], %[temp2], %[temp1] \n\t"
+ "addiu %[p_efw0], %[p_efw0], 4 \n\t"
+ "addiu %[p_efw1], %[p_efw1], 4 \n\t"
+ "neg.s %[temp4], %[temp3] \n\t"
+ "swc1 %[temp2], -4(%[p_efw0]) \n\t"
+ "swc1 %[temp4], -4(%[p_efw1]) \n\t"
+ : [temp1] "=&f" (temp1), [temp2] "=&f" (temp2), [temp3] "=&f" (temp3),
+ [temp4] "=&f" (temp4), [p_efw0] "+r" (p_efw0), [p_efw1] "+r" (p_efw1),
+ [p_hNl] "+r" (p_hNl)
+ :
+ : "memory"
+ );
+ }
+}
+
+void WebRtcAec_ScaleErrorSignal_mips(AecCore* aec, float ef[2][PART_LEN1]) {
+ const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
+ const float error_threshold = aec->extended_filter_enabled
+ ? kExtendedErrorThreshold
+ : aec->normal_error_threshold;
+ int len = (PART_LEN1);
+ float* ef0 = ef[0];
+ float* ef1 = ef[1];
+ float* xPow = aec->xPow;
+ float fac1 = 1e-10f;
+ float err_th2 = error_threshold * error_threshold;
+ float f0, f1, f2;
+#if !defined(MIPS32_R2_LE)
+ float f3;
+#endif
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "1: \n\t"
+ "lwc1 %[f0], 0(%[xPow]) \n\t"
+ "lwc1 %[f1], 0(%[ef0]) \n\t"
+ "lwc1 %[f2], 0(%[ef1]) \n\t"
+ "add.s %[f0], %[f0], %[fac1] \n\t"
+ "div.s %[f1], %[f1], %[f0] \n\t"
+ "div.s %[f2], %[f2], %[f0] \n\t"
+ "mul.s %[f0], %[f1], %[f1] \n\t"
+#if defined(MIPS32_R2_LE)
+ "madd.s %[f0], %[f0], %[f2], %[f2] \n\t"
+#else
+ "mul.s %[f3], %[f2], %[f2] \n\t"
+ "add.s %[f0], %[f0], %[f3] \n\t"
+#endif
+ "c.le.s %[f0], %[err_th2] \n\t"
+ "nop \n\t"
+ "bc1t 2f \n\t"
+ " nop \n\t"
+ "sqrt.s %[f0], %[f0] \n\t"
+ "add.s %[f0], %[f0], %[fac1] \n\t"
+ "div.s %[f0], %[err_th], %[f0] \n\t"
+ "mul.s %[f1], %[f1], %[f0] \n\t"
+ "mul.s %[f2], %[f2], %[f0] \n\t"
+ "2: \n\t"
+ "mul.s %[f1], %[f1], %[mu] \n\t"
+ "mul.s %[f2], %[f2], %[mu] \n\t"
+ "swc1 %[f1], 0(%[ef0]) \n\t"
+ "swc1 %[f2], 0(%[ef1]) \n\t"
+ "addiu %[len], %[len], -1 \n\t"
+ "addiu %[xPow], %[xPow], 4 \n\t"
+ "addiu %[ef0], %[ef0], 4 \n\t"
+ "bgtz %[len], 1b \n\t"
+ " addiu %[ef1], %[ef1], 4 \n\t"
+ ".set pop \n\t"
+ : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
+#if !defined(MIPS32_R2_LE)
+ [f3] "=&f" (f3),
+#endif
+ [xPow] "+r" (xPow), [ef0] "+r" (ef0), [ef1] "+r" (ef1),
+ [len] "+r" (len)
+ : [fac1] "f" (fac1), [err_th2] "f" (err_th2), [mu] "f" (mu),
+ [err_th] "f" (error_threshold)
+ : "memory"
+ );
+}
+
+void WebRtcAec_InitAec_mips(void) {
+ WebRtcAec_FilterFar = WebRtcAec_FilterFar_mips;
+ WebRtcAec_FilterAdaptation = WebRtcAec_FilterAdaptation_mips;
+ WebRtcAec_ScaleErrorSignal = WebRtcAec_ScaleErrorSignal_mips;
+ WebRtcAec_ComfortNoise = WebRtcAec_ComfortNoise_mips;
+ WebRtcAec_OverdriveAndSuppress = WebRtcAec_OverdriveAndSuppress_mips;
+}
+
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_neon.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_neon.c
new file mode 100644
index 00000000..9a677aaa
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_neon.c
@@ -0,0 +1,736 @@
+/*
+ * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * The core AEC algorithm, neon version of speed-critical functions.
+ *
+ * Based on aec_core_sse2.c.
+ */
+
+#include <arm_neon.h>
+#include <math.h>
+#include <string.h> // memset
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_processing/aec/aec_common.h"
+#include "webrtc/modules/audio_processing/aec/aec_core_internal.h"
+#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
+
+enum { kShiftExponentIntoTopMantissa = 8 };
+enum { kFloatExponentShift = 23 };
+
+__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) {
+ return aRe * bRe - aIm * bIm;
+}
+
+__inline static float MulIm(float aRe, float aIm, float bRe, float bIm) {
+ return aRe * bIm + aIm * bRe;
+}
+
+static void FilterFarNEON(AecCore* aec, float yf[2][PART_LEN1]) {
+ int i;
+ const int num_partitions = aec->num_partitions;
+ for (i = 0; i < num_partitions; i++) {
+ int j;
+ int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
+ int pos = i * PART_LEN1;
+ // Check for wrap
+ if (i + aec->xfBufBlockPos >= num_partitions) {
+ xPos -= num_partitions * PART_LEN1;
+ }
+
+ // vectorized code (four at once)
+ for (j = 0; j + 3 < PART_LEN1; j += 4) {
+ const float32x4_t xfBuf_re = vld1q_f32(&aec->xfBuf[0][xPos + j]);
+ const float32x4_t xfBuf_im = vld1q_f32(&aec->xfBuf[1][xPos + j]);
+ const float32x4_t wfBuf_re = vld1q_f32(&aec->wfBuf[0][pos + j]);
+ const float32x4_t wfBuf_im = vld1q_f32(&aec->wfBuf[1][pos + j]);
+ const float32x4_t yf_re = vld1q_f32(&yf[0][j]);
+ const float32x4_t yf_im = vld1q_f32(&yf[1][j]);
+ const float32x4_t a = vmulq_f32(xfBuf_re, wfBuf_re);
+ const float32x4_t e = vmlsq_f32(a, xfBuf_im, wfBuf_im);
+ const float32x4_t c = vmulq_f32(xfBuf_re, wfBuf_im);
+ const float32x4_t f = vmlaq_f32(c, xfBuf_im, wfBuf_re);
+ const float32x4_t g = vaddq_f32(yf_re, e);
+ const float32x4_t h = vaddq_f32(yf_im, f);
+ vst1q_f32(&yf[0][j], g);
+ vst1q_f32(&yf[1][j], h);
+ }
+ // scalar code for the remaining items.
+ for (; j < PART_LEN1; j++) {
+ yf[0][j] += MulRe(aec->xfBuf[0][xPos + j],
+ aec->xfBuf[1][xPos + j],
+ aec->wfBuf[0][pos + j],
+ aec->wfBuf[1][pos + j]);
+ yf[1][j] += MulIm(aec->xfBuf[0][xPos + j],
+ aec->xfBuf[1][xPos + j],
+ aec->wfBuf[0][pos + j],
+ aec->wfBuf[1][pos + j]);
+ }
+ }
+}
+
+// ARM64's arm_neon.h has already defined vdivq_f32 vsqrtq_f32.
+#if !defined (WEBRTC_ARCH_ARM64)
+static float32x4_t vdivq_f32(float32x4_t a, float32x4_t b) {
+ int i;
+ float32x4_t x = vrecpeq_f32(b);
+ // from arm documentation
+ // The Newton-Raphson iteration:
+ // x[n+1] = x[n] * (2 - d * x[n])
+ // converges to (1/d) if x0 is the result of VRECPE applied to d.
+ //
+ // Note: The precision did not improve after 2 iterations.
+ for (i = 0; i < 2; i++) {
+ x = vmulq_f32(vrecpsq_f32(b, x), x);
+ }
+ // a/b = a*(1/b)
+ return vmulq_f32(a, x);
+}
+
+static float32x4_t vsqrtq_f32(float32x4_t s) {
+ int i;
+ float32x4_t x = vrsqrteq_f32(s);
+
+ // Code to handle sqrt(0).
+ // If the input to sqrtf() is zero, a zero will be returned.
+ // If the input to vrsqrteq_f32() is zero, positive infinity is returned.
+ const uint32x4_t vec_p_inf = vdupq_n_u32(0x7F800000);
+ // check for divide by zero
+ const uint32x4_t div_by_zero = vceqq_u32(vec_p_inf, vreinterpretq_u32_f32(x));
+ // zero out the positive infinity results
+ x = vreinterpretq_f32_u32(vandq_u32(vmvnq_u32(div_by_zero),
+ vreinterpretq_u32_f32(x)));
+ // from arm documentation
+ // The Newton-Raphson iteration:
+ // x[n+1] = x[n] * (3 - d * (x[n] * x[n])) / 2)
+ // converges to (1/√d) if x0 is the result of VRSQRTE applied to d.
+ //
+ // Note: The precision did not improve after 2 iterations.
+ for (i = 0; i < 2; i++) {
+ x = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, x), s), x);
+ }
+ // sqrt(s) = s * 1/sqrt(s)
+ return vmulq_f32(s, x);;
+}
+#endif // WEBRTC_ARCH_ARM64
+
+static void ScaleErrorSignalNEON(AecCore* aec, float ef[2][PART_LEN1]) {
+ const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
+ const float error_threshold = aec->extended_filter_enabled ?
+ kExtendedErrorThreshold : aec->normal_error_threshold;
+ const float32x4_t k1e_10f = vdupq_n_f32(1e-10f);
+ const float32x4_t kMu = vmovq_n_f32(mu);
+ const float32x4_t kThresh = vmovq_n_f32(error_threshold);
+ int i;
+ // vectorized code (four at once)
+ for (i = 0; i + 3 < PART_LEN1; i += 4) {
+ const float32x4_t xPow = vld1q_f32(&aec->xPow[i]);
+ const float32x4_t ef_re_base = vld1q_f32(&ef[0][i]);
+ const float32x4_t ef_im_base = vld1q_f32(&ef[1][i]);
+ const float32x4_t xPowPlus = vaddq_f32(xPow, k1e_10f);
+ float32x4_t ef_re = vdivq_f32(ef_re_base, xPowPlus);
+ float32x4_t ef_im = vdivq_f32(ef_im_base, xPowPlus);
+ const float32x4_t ef_re2 = vmulq_f32(ef_re, ef_re);
+ const float32x4_t ef_sum2 = vmlaq_f32(ef_re2, ef_im, ef_im);
+ const float32x4_t absEf = vsqrtq_f32(ef_sum2);
+ const uint32x4_t bigger = vcgtq_f32(absEf, kThresh);
+ const float32x4_t absEfPlus = vaddq_f32(absEf, k1e_10f);
+ const float32x4_t absEfInv = vdivq_f32(kThresh, absEfPlus);
+ uint32x4_t ef_re_if = vreinterpretq_u32_f32(vmulq_f32(ef_re, absEfInv));
+ uint32x4_t ef_im_if = vreinterpretq_u32_f32(vmulq_f32(ef_im, absEfInv));
+ uint32x4_t ef_re_u32 = vandq_u32(vmvnq_u32(bigger),
+ vreinterpretq_u32_f32(ef_re));
+ uint32x4_t ef_im_u32 = vandq_u32(vmvnq_u32(bigger),
+ vreinterpretq_u32_f32(ef_im));
+ ef_re_if = vandq_u32(bigger, ef_re_if);
+ ef_im_if = vandq_u32(bigger, ef_im_if);
+ ef_re_u32 = vorrq_u32(ef_re_u32, ef_re_if);
+ ef_im_u32 = vorrq_u32(ef_im_u32, ef_im_if);
+ ef_re = vmulq_f32(vreinterpretq_f32_u32(ef_re_u32), kMu);
+ ef_im = vmulq_f32(vreinterpretq_f32_u32(ef_im_u32), kMu);
+ vst1q_f32(&ef[0][i], ef_re);
+ vst1q_f32(&ef[1][i], ef_im);
+ }
+ // scalar code for the remaining items.
+ for (; i < PART_LEN1; i++) {
+ float abs_ef;
+ ef[0][i] /= (aec->xPow[i] + 1e-10f);
+ ef[1][i] /= (aec->xPow[i] + 1e-10f);
+ abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]);
+
+ if (abs_ef > error_threshold) {
+ abs_ef = error_threshold / (abs_ef + 1e-10f);
+ ef[0][i] *= abs_ef;
+ ef[1][i] *= abs_ef;
+ }
+
+ // Stepsize factor
+ ef[0][i] *= mu;
+ ef[1][i] *= mu;
+ }
+}
+
+static void FilterAdaptationNEON(AecCore* aec,
+ float* fft,
+ float ef[2][PART_LEN1]) {
+ int i;
+ const int num_partitions = aec->num_partitions;
+ for (i = 0; i < num_partitions; i++) {
+ int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
+ int pos = i * PART_LEN1;
+ int j;
+ // Check for wrap
+ if (i + aec->xfBufBlockPos >= num_partitions) {
+ xPos -= num_partitions * PART_LEN1;
+ }
+
+ // Process the whole array...
+ for (j = 0; j < PART_LEN; j += 4) {
+ // Load xfBuf and ef.
+ const float32x4_t xfBuf_re = vld1q_f32(&aec->xfBuf[0][xPos + j]);
+ const float32x4_t xfBuf_im = vld1q_f32(&aec->xfBuf[1][xPos + j]);
+ const float32x4_t ef_re = vld1q_f32(&ef[0][j]);
+ const float32x4_t ef_im = vld1q_f32(&ef[1][j]);
+ // Calculate the product of conjugate(xfBuf) by ef.
+ // re(conjugate(a) * b) = aRe * bRe + aIm * bIm
+ // im(conjugate(a) * b)= aRe * bIm - aIm * bRe
+ const float32x4_t a = vmulq_f32(xfBuf_re, ef_re);
+ const float32x4_t e = vmlaq_f32(a, xfBuf_im, ef_im);
+ const float32x4_t c = vmulq_f32(xfBuf_re, ef_im);
+ const float32x4_t f = vmlsq_f32(c, xfBuf_im, ef_re);
+ // Interleave real and imaginary parts.
+ const float32x4x2_t g_n_h = vzipq_f32(e, f);
+ // Store
+ vst1q_f32(&fft[2 * j + 0], g_n_h.val[0]);
+ vst1q_f32(&fft[2 * j + 4], g_n_h.val[1]);
+ }
+ // ... and fixup the first imaginary entry.
+ fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN],
+ -aec->xfBuf[1][xPos + PART_LEN],
+ ef[0][PART_LEN],
+ ef[1][PART_LEN]);
+
+ aec_rdft_inverse_128(fft);
+ memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
+
+ // fft scaling
+ {
+ const float scale = 2.0f / PART_LEN2;
+ const float32x4_t scale_ps = vmovq_n_f32(scale);
+ for (j = 0; j < PART_LEN; j += 4) {
+ const float32x4_t fft_ps = vld1q_f32(&fft[j]);
+ const float32x4_t fft_scale = vmulq_f32(fft_ps, scale_ps);
+ vst1q_f32(&fft[j], fft_scale);
+ }
+ }
+ aec_rdft_forward_128(fft);
+
+ {
+ const float wt1 = aec->wfBuf[1][pos];
+ aec->wfBuf[0][pos + PART_LEN] += fft[1];
+ for (j = 0; j < PART_LEN; j += 4) {
+ float32x4_t wtBuf_re = vld1q_f32(&aec->wfBuf[0][pos + j]);
+ float32x4_t wtBuf_im = vld1q_f32(&aec->wfBuf[1][pos + j]);
+ const float32x4_t fft0 = vld1q_f32(&fft[2 * j + 0]);
+ const float32x4_t fft4 = vld1q_f32(&fft[2 * j + 4]);
+ const float32x4x2_t fft_re_im = vuzpq_f32(fft0, fft4);
+ wtBuf_re = vaddq_f32(wtBuf_re, fft_re_im.val[0]);
+ wtBuf_im = vaddq_f32(wtBuf_im, fft_re_im.val[1]);
+
+ vst1q_f32(&aec->wfBuf[0][pos + j], wtBuf_re);
+ vst1q_f32(&aec->wfBuf[1][pos + j], wtBuf_im);
+ }
+ aec->wfBuf[1][pos] = wt1;
+ }
+ }
+}
+
+static float32x4_t vpowq_f32(float32x4_t a, float32x4_t b) {
+ // a^b = exp2(b * log2(a))
+ // exp2(x) and log2(x) are calculated using polynomial approximations.
+ float32x4_t log2_a, b_log2_a, a_exp_b;
+
+ // Calculate log2(x), x = a.
+ {
+ // To calculate log2(x), we decompose x like this:
+ // x = y * 2^n
+ // n is an integer
+ // y is in the [1.0, 2.0) range
+ //
+ // log2(x) = log2(y) + n
+ // n can be evaluated by playing with float representation.
+ // log2(y) in a small range can be approximated, this code uses an order
+ // five polynomial approximation. The coefficients have been
+ // estimated with the Remez algorithm and the resulting
+ // polynomial has a maximum relative error of 0.00086%.
+
+ // Compute n.
+ // This is done by masking the exponent, shifting it into the top bit of
+ // the mantissa, putting eight into the biased exponent (to shift/
+ // compensate the fact that the exponent has been shifted in the top/
+ // fractional part and finally getting rid of the implicit leading one
+ // from the mantissa by substracting it out.
+ const uint32x4_t vec_float_exponent_mask = vdupq_n_u32(0x7F800000);
+ const uint32x4_t vec_eight_biased_exponent = vdupq_n_u32(0x43800000);
+ const uint32x4_t vec_implicit_leading_one = vdupq_n_u32(0x43BF8000);
+ const uint32x4_t two_n = vandq_u32(vreinterpretq_u32_f32(a),
+ vec_float_exponent_mask);
+ const uint32x4_t n_1 = vshrq_n_u32(two_n, kShiftExponentIntoTopMantissa);
+ const uint32x4_t n_0 = vorrq_u32(n_1, vec_eight_biased_exponent);
+ const float32x4_t n =
+ vsubq_f32(vreinterpretq_f32_u32(n_0),
+ vreinterpretq_f32_u32(vec_implicit_leading_one));
+ // Compute y.
+ const uint32x4_t vec_mantissa_mask = vdupq_n_u32(0x007FFFFF);
+ const uint32x4_t vec_zero_biased_exponent_is_one = vdupq_n_u32(0x3F800000);
+ const uint32x4_t mantissa = vandq_u32(vreinterpretq_u32_f32(a),
+ vec_mantissa_mask);
+ const float32x4_t y =
+ vreinterpretq_f32_u32(vorrq_u32(mantissa,
+ vec_zero_biased_exponent_is_one));
+ // Approximate log2(y) ~= (y - 1) * pol5(y).
+ // pol5(y) = C5 * y^5 + C4 * y^4 + C3 * y^3 + C2 * y^2 + C1 * y + C0
+ const float32x4_t C5 = vdupq_n_f32(-3.4436006e-2f);
+ const float32x4_t C4 = vdupq_n_f32(3.1821337e-1f);
+ const float32x4_t C3 = vdupq_n_f32(-1.2315303f);
+ const float32x4_t C2 = vdupq_n_f32(2.5988452f);
+ const float32x4_t C1 = vdupq_n_f32(-3.3241990f);
+ const float32x4_t C0 = vdupq_n_f32(3.1157899f);
+ float32x4_t pol5_y = C5;
+ pol5_y = vmlaq_f32(C4, y, pol5_y);
+ pol5_y = vmlaq_f32(C3, y, pol5_y);
+ pol5_y = vmlaq_f32(C2, y, pol5_y);
+ pol5_y = vmlaq_f32(C1, y, pol5_y);
+ pol5_y = vmlaq_f32(C0, y, pol5_y);
+ const float32x4_t y_minus_one =
+ vsubq_f32(y, vreinterpretq_f32_u32(vec_zero_biased_exponent_is_one));
+ const float32x4_t log2_y = vmulq_f32(y_minus_one, pol5_y);
+
+ // Combine parts.
+ log2_a = vaddq_f32(n, log2_y);
+ }
+
+ // b * log2(a)
+ b_log2_a = vmulq_f32(b, log2_a);
+
+ // Calculate exp2(x), x = b * log2(a).
+ {
+ // To calculate 2^x, we decompose x like this:
+ // x = n + y
+ // n is an integer, the value of x - 0.5 rounded down, therefore
+ // y is in the [0.5, 1.5) range
+ //
+ // 2^x = 2^n * 2^y
+ // 2^n can be evaluated by playing with float representation.
+ // 2^y in a small range can be approximated, this code uses an order two
+ // polynomial approximation. The coefficients have been estimated
+ // with the Remez algorithm and the resulting polynomial has a
+ // maximum relative error of 0.17%.
+ // To avoid over/underflow, we reduce the range of input to ]-127, 129].
+ const float32x4_t max_input = vdupq_n_f32(129.f);
+ const float32x4_t min_input = vdupq_n_f32(-126.99999f);
+ const float32x4_t x_min = vminq_f32(b_log2_a, max_input);
+ const float32x4_t x_max = vmaxq_f32(x_min, min_input);
+ // Compute n.
+ const float32x4_t half = vdupq_n_f32(0.5f);
+ const float32x4_t x_minus_half = vsubq_f32(x_max, half);
+ const int32x4_t x_minus_half_floor = vcvtq_s32_f32(x_minus_half);
+
+ // Compute 2^n.
+ const int32x4_t float_exponent_bias = vdupq_n_s32(127);
+ const int32x4_t two_n_exponent =
+ vaddq_s32(x_minus_half_floor, float_exponent_bias);
+ const float32x4_t two_n =
+ vreinterpretq_f32_s32(vshlq_n_s32(two_n_exponent, kFloatExponentShift));
+ // Compute y.
+ const float32x4_t y = vsubq_f32(x_max, vcvtq_f32_s32(x_minus_half_floor));
+
+ // Approximate 2^y ~= C2 * y^2 + C1 * y + C0.
+ const float32x4_t C2 = vdupq_n_f32(3.3718944e-1f);
+ const float32x4_t C1 = vdupq_n_f32(6.5763628e-1f);
+ const float32x4_t C0 = vdupq_n_f32(1.0017247f);
+ float32x4_t exp2_y = C2;
+ exp2_y = vmlaq_f32(C1, y, exp2_y);
+ exp2_y = vmlaq_f32(C0, y, exp2_y);
+
+ // Combine parts.
+ a_exp_b = vmulq_f32(exp2_y, two_n);
+ }
+
+ return a_exp_b;
+}
+
+static void OverdriveAndSuppressNEON(AecCore* aec,
+ float hNl[PART_LEN1],
+ const float hNlFb,
+ float efw[2][PART_LEN1]) {
+ int i;
+ const float32x4_t vec_hNlFb = vmovq_n_f32(hNlFb);
+ const float32x4_t vec_one = vdupq_n_f32(1.0f);
+ const float32x4_t vec_minus_one = vdupq_n_f32(-1.0f);
+ const float32x4_t vec_overDriveSm = vmovq_n_f32(aec->overDriveSm);
+
+ // vectorized code (four at once)
+ for (i = 0; i + 3 < PART_LEN1; i += 4) {
+ // Weight subbands
+ float32x4_t vec_hNl = vld1q_f32(&hNl[i]);
+ const float32x4_t vec_weightCurve = vld1q_f32(&WebRtcAec_weightCurve[i]);
+ const uint32x4_t bigger = vcgtq_f32(vec_hNl, vec_hNlFb);
+ const float32x4_t vec_weightCurve_hNlFb = vmulq_f32(vec_weightCurve,
+ vec_hNlFb);
+ const float32x4_t vec_one_weightCurve = vsubq_f32(vec_one, vec_weightCurve);
+ const float32x4_t vec_one_weightCurve_hNl = vmulq_f32(vec_one_weightCurve,
+ vec_hNl);
+ const uint32x4_t vec_if0 = vandq_u32(vmvnq_u32(bigger),
+ vreinterpretq_u32_f32(vec_hNl));
+ const float32x4_t vec_one_weightCurve_add =
+ vaddq_f32(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl);
+ const uint32x4_t vec_if1 =
+ vandq_u32(bigger, vreinterpretq_u32_f32(vec_one_weightCurve_add));
+
+ vec_hNl = vreinterpretq_f32_u32(vorrq_u32(vec_if0, vec_if1));
+
+ {
+ const float32x4_t vec_overDriveCurve =
+ vld1q_f32(&WebRtcAec_overDriveCurve[i]);
+ const float32x4_t vec_overDriveSm_overDriveCurve =
+ vmulq_f32(vec_overDriveSm, vec_overDriveCurve);
+ vec_hNl = vpowq_f32(vec_hNl, vec_overDriveSm_overDriveCurve);
+ vst1q_f32(&hNl[i], vec_hNl);
+ }
+
+ // Suppress error signal
+ {
+ float32x4_t vec_efw_re = vld1q_f32(&efw[0][i]);
+ float32x4_t vec_efw_im = vld1q_f32(&efw[1][i]);
+ vec_efw_re = vmulq_f32(vec_efw_re, vec_hNl);
+ vec_efw_im = vmulq_f32(vec_efw_im, vec_hNl);
+
+ // Ooura fft returns incorrect sign on imaginary component. It matters
+ // here because we are making an additive change with comfort noise.
+ vec_efw_im = vmulq_f32(vec_efw_im, vec_minus_one);
+ vst1q_f32(&efw[0][i], vec_efw_re);
+ vst1q_f32(&efw[1][i], vec_efw_im);
+ }
+ }
+
+ // scalar code for the remaining items.
+ for (; i < PART_LEN1; i++) {
+ // Weight subbands
+ if (hNl[i] > hNlFb) {
+ hNl[i] = WebRtcAec_weightCurve[i] * hNlFb +
+ (1 - WebRtcAec_weightCurve[i]) * hNl[i];
+ }
+
+ hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]);
+
+ // Suppress error signal
+ efw[0][i] *= hNl[i];
+ efw[1][i] *= hNl[i];
+
+ // Ooura fft returns incorrect sign on imaginary component. It matters
+ // here because we are making an additive change with comfort noise.
+ efw[1][i] *= -1;
+ }
+}
+
+static int PartitionDelay(const AecCore* aec) {
+ // Measures the energy in each filter partition and returns the partition with
+ // highest energy.
+ // TODO(bjornv): Spread computational cost by computing one partition per
+ // block?
+ float wfEnMax = 0;
+ int i;
+ int delay = 0;
+
+ for (i = 0; i < aec->num_partitions; i++) {
+ int j;
+ int pos = i * PART_LEN1;
+ float wfEn = 0;
+ float32x4_t vec_wfEn = vdupq_n_f32(0.0f);
+ // vectorized code (four at once)
+ for (j = 0; j + 3 < PART_LEN1; j += 4) {
+ const float32x4_t vec_wfBuf0 = vld1q_f32(&aec->wfBuf[0][pos + j]);
+ const float32x4_t vec_wfBuf1 = vld1q_f32(&aec->wfBuf[1][pos + j]);
+ vec_wfEn = vmlaq_f32(vec_wfEn, vec_wfBuf0, vec_wfBuf0);
+ vec_wfEn = vmlaq_f32(vec_wfEn, vec_wfBuf1, vec_wfBuf1);
+ }
+ {
+ float32x2_t vec_total;
+ // A B C D
+ vec_total = vpadd_f32(vget_low_f32(vec_wfEn), vget_high_f32(vec_wfEn));
+ // A+B C+D
+ vec_total = vpadd_f32(vec_total, vec_total);
+ // A+B+C+D A+B+C+D
+ wfEn = vget_lane_f32(vec_total, 0);
+ }
+
+ // scalar code for the remaining items.
+ for (; j < PART_LEN1; j++) {
+ wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] +
+ aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j];
+ }
+
+ if (wfEn > wfEnMax) {
+ wfEnMax = wfEn;
+ delay = i;
+ }
+ }
+ return delay;
+}
+
+// Updates the following smoothed Power Spectral Densities (PSD):
+// - sd : near-end
+// - se : residual echo
+// - sx : far-end
+// - sde : cross-PSD of near-end and residual echo
+// - sxd : cross-PSD of near-end and far-end
+//
+// In addition to updating the PSDs, also the filter diverge state is determined
+// upon actions are taken.
+static void SmoothedPSD(AecCore* aec,
+ float efw[2][PART_LEN1],
+ float dfw[2][PART_LEN1],
+ float xfw[2][PART_LEN1]) {
+ // Power estimate smoothing coefficients.
+ const float* ptrGCoh = aec->extended_filter_enabled
+ ? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1]
+ : WebRtcAec_kNormalSmoothingCoefficients[aec->mult - 1];
+ int i;
+ float sdSum = 0, seSum = 0;
+ const float32x4_t vec_15 = vdupq_n_f32(WebRtcAec_kMinFarendPSD);
+ float32x4_t vec_sdSum = vdupq_n_f32(0.0f);
+ float32x4_t vec_seSum = vdupq_n_f32(0.0f);
+
+ for (i = 0; i + 3 < PART_LEN1; i += 4) {
+ const float32x4_t vec_dfw0 = vld1q_f32(&dfw[0][i]);
+ const float32x4_t vec_dfw1 = vld1q_f32(&dfw[1][i]);
+ const float32x4_t vec_efw0 = vld1q_f32(&efw[0][i]);
+ const float32x4_t vec_efw1 = vld1q_f32(&efw[1][i]);
+ const float32x4_t vec_xfw0 = vld1q_f32(&xfw[0][i]);
+ const float32x4_t vec_xfw1 = vld1q_f32(&xfw[1][i]);
+ float32x4_t vec_sd = vmulq_n_f32(vld1q_f32(&aec->sd[i]), ptrGCoh[0]);
+ float32x4_t vec_se = vmulq_n_f32(vld1q_f32(&aec->se[i]), ptrGCoh[0]);
+ float32x4_t vec_sx = vmulq_n_f32(vld1q_f32(&aec->sx[i]), ptrGCoh[0]);
+ float32x4_t vec_dfw_sumsq = vmulq_f32(vec_dfw0, vec_dfw0);
+ float32x4_t vec_efw_sumsq = vmulq_f32(vec_efw0, vec_efw0);
+ float32x4_t vec_xfw_sumsq = vmulq_f32(vec_xfw0, vec_xfw0);
+
+ vec_dfw_sumsq = vmlaq_f32(vec_dfw_sumsq, vec_dfw1, vec_dfw1);
+ vec_efw_sumsq = vmlaq_f32(vec_efw_sumsq, vec_efw1, vec_efw1);
+ vec_xfw_sumsq = vmlaq_f32(vec_xfw_sumsq, vec_xfw1, vec_xfw1);
+ vec_xfw_sumsq = vmaxq_f32(vec_xfw_sumsq, vec_15);
+ vec_sd = vmlaq_n_f32(vec_sd, vec_dfw_sumsq, ptrGCoh[1]);
+ vec_se = vmlaq_n_f32(vec_se, vec_efw_sumsq, ptrGCoh[1]);
+ vec_sx = vmlaq_n_f32(vec_sx, vec_xfw_sumsq, ptrGCoh[1]);
+
+ vst1q_f32(&aec->sd[i], vec_sd);
+ vst1q_f32(&aec->se[i], vec_se);
+ vst1q_f32(&aec->sx[i], vec_sx);
+
+ {
+ float32x4x2_t vec_sde = vld2q_f32(&aec->sde[i][0]);
+ float32x4_t vec_dfwefw0011 = vmulq_f32(vec_dfw0, vec_efw0);
+ float32x4_t vec_dfwefw0110 = vmulq_f32(vec_dfw0, vec_efw1);
+ vec_sde.val[0] = vmulq_n_f32(vec_sde.val[0], ptrGCoh[0]);
+ vec_sde.val[1] = vmulq_n_f32(vec_sde.val[1], ptrGCoh[0]);
+ vec_dfwefw0011 = vmlaq_f32(vec_dfwefw0011, vec_dfw1, vec_efw1);
+ vec_dfwefw0110 = vmlsq_f32(vec_dfwefw0110, vec_dfw1, vec_efw0);
+ vec_sde.val[0] = vmlaq_n_f32(vec_sde.val[0], vec_dfwefw0011, ptrGCoh[1]);
+ vec_sde.val[1] = vmlaq_n_f32(vec_sde.val[1], vec_dfwefw0110, ptrGCoh[1]);
+ vst2q_f32(&aec->sde[i][0], vec_sde);
+ }
+
+ {
+ float32x4x2_t vec_sxd = vld2q_f32(&aec->sxd[i][0]);
+ float32x4_t vec_dfwxfw0011 = vmulq_f32(vec_dfw0, vec_xfw0);
+ float32x4_t vec_dfwxfw0110 = vmulq_f32(vec_dfw0, vec_xfw1);
+ vec_sxd.val[0] = vmulq_n_f32(vec_sxd.val[0], ptrGCoh[0]);
+ vec_sxd.val[1] = vmulq_n_f32(vec_sxd.val[1], ptrGCoh[0]);
+ vec_dfwxfw0011 = vmlaq_f32(vec_dfwxfw0011, vec_dfw1, vec_xfw1);
+ vec_dfwxfw0110 = vmlsq_f32(vec_dfwxfw0110, vec_dfw1, vec_xfw0);
+ vec_sxd.val[0] = vmlaq_n_f32(vec_sxd.val[0], vec_dfwxfw0011, ptrGCoh[1]);
+ vec_sxd.val[1] = vmlaq_n_f32(vec_sxd.val[1], vec_dfwxfw0110, ptrGCoh[1]);
+ vst2q_f32(&aec->sxd[i][0], vec_sxd);
+ }
+
+ vec_sdSum = vaddq_f32(vec_sdSum, vec_sd);
+ vec_seSum = vaddq_f32(vec_seSum, vec_se);
+ }
+ {
+ float32x2_t vec_sdSum_total;
+ float32x2_t vec_seSum_total;
+ // A B C D
+ vec_sdSum_total = vpadd_f32(vget_low_f32(vec_sdSum),
+ vget_high_f32(vec_sdSum));
+ vec_seSum_total = vpadd_f32(vget_low_f32(vec_seSum),
+ vget_high_f32(vec_seSum));
+ // A+B C+D
+ vec_sdSum_total = vpadd_f32(vec_sdSum_total, vec_sdSum_total);
+ vec_seSum_total = vpadd_f32(vec_seSum_total, vec_seSum_total);
+ // A+B+C+D A+B+C+D
+ sdSum = vget_lane_f32(vec_sdSum_total, 0);
+ seSum = vget_lane_f32(vec_seSum_total, 0);
+ }
+
+ // scalar code for the remaining items.
+ for (; i < PART_LEN1; i++) {
+ aec->sd[i] = ptrGCoh[0] * aec->sd[i] +
+ ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]);
+ aec->se[i] = ptrGCoh[0] * aec->se[i] +
+ ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]);
+ // We threshold here to protect against the ill-effects of a zero farend.
+ // The threshold is not arbitrarily chosen, but balances protection and
+ // adverse interaction with the algorithm's tuning.
+ // TODO(bjornv): investigate further why this is so sensitive.
+ aec->sx[i] =
+ ptrGCoh[0] * aec->sx[i] +
+ ptrGCoh[1] * WEBRTC_SPL_MAX(
+ xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i],
+ WebRtcAec_kMinFarendPSD);
+
+ aec->sde[i][0] =
+ ptrGCoh[0] * aec->sde[i][0] +
+ ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]);
+ aec->sde[i][1] =
+ ptrGCoh[0] * aec->sde[i][1] +
+ ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]);
+
+ aec->sxd[i][0] =
+ ptrGCoh[0] * aec->sxd[i][0] +
+ ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]);
+ aec->sxd[i][1] =
+ ptrGCoh[0] * aec->sxd[i][1] +
+ ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]);
+
+ sdSum += aec->sd[i];
+ seSum += aec->se[i];
+ }
+
+ // Divergent filter safeguard.
+ aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum;
+
+ if (aec->divergeState)
+ memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1);
+
+ // Reset if error is significantly larger than nearend (13 dB).
+ if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum))
+ memset(aec->wfBuf, 0, sizeof(aec->wfBuf));
+}
+
+// Window time domain data to be used by the fft.
+__inline static void WindowData(float* x_windowed, const float* x) {
+ int i;
+ for (i = 0; i < PART_LEN; i += 4) {
+ const float32x4_t vec_Buf1 = vld1q_f32(&x[i]);
+ const float32x4_t vec_Buf2 = vld1q_f32(&x[PART_LEN + i]);
+ const float32x4_t vec_sqrtHanning = vld1q_f32(&WebRtcAec_sqrtHanning[i]);
+ // A B C D
+ float32x4_t vec_sqrtHanning_rev =
+ vld1q_f32(&WebRtcAec_sqrtHanning[PART_LEN - i - 3]);
+ // B A D C
+ vec_sqrtHanning_rev = vrev64q_f32(vec_sqrtHanning_rev);
+ // D C B A
+ vec_sqrtHanning_rev = vcombine_f32(vget_high_f32(vec_sqrtHanning_rev),
+ vget_low_f32(vec_sqrtHanning_rev));
+ vst1q_f32(&x_windowed[i], vmulq_f32(vec_Buf1, vec_sqrtHanning));
+ vst1q_f32(&x_windowed[PART_LEN + i],
+ vmulq_f32(vec_Buf2, vec_sqrtHanning_rev));
+ }
+}
+
+// Puts fft output data into a complex valued array.
+__inline static void StoreAsComplex(const float* data,
+ float data_complex[2][PART_LEN1]) {
+ int i;
+ for (i = 0; i < PART_LEN; i += 4) {
+ const float32x4x2_t vec_data = vld2q_f32(&data[2 * i]);
+ vst1q_f32(&data_complex[0][i], vec_data.val[0]);
+ vst1q_f32(&data_complex[1][i], vec_data.val[1]);
+ }
+ // fix beginning/end values
+ data_complex[1][0] = 0;
+ data_complex[1][PART_LEN] = 0;
+ data_complex[0][0] = data[0];
+ data_complex[0][PART_LEN] = data[1];
+}
+
+static void SubbandCoherenceNEON(AecCore* aec,
+ float efw[2][PART_LEN1],
+ float xfw[2][PART_LEN1],
+ float* fft,
+ float* cohde,
+ float* cohxd) {
+ float dfw[2][PART_LEN1];
+ int i;
+
+ if (aec->delayEstCtr == 0)
+ aec->delayIdx = PartitionDelay(aec);
+
+ // Use delayed far.
+ memcpy(xfw,
+ aec->xfwBuf + aec->delayIdx * PART_LEN1,
+ sizeof(xfw[0][0]) * 2 * PART_LEN1);
+
+ // Windowed near fft
+ WindowData(fft, aec->dBuf);
+ aec_rdft_forward_128(fft);
+ StoreAsComplex(fft, dfw);
+
+ // Windowed error fft
+ WindowData(fft, aec->eBuf);
+ aec_rdft_forward_128(fft);
+ StoreAsComplex(fft, efw);
+
+ SmoothedPSD(aec, efw, dfw, xfw);
+
+ {
+ const float32x4_t vec_1eminus10 = vdupq_n_f32(1e-10f);
+
+ // Subband coherence
+ for (i = 0; i + 3 < PART_LEN1; i += 4) {
+ const float32x4_t vec_sd = vld1q_f32(&aec->sd[i]);
+ const float32x4_t vec_se = vld1q_f32(&aec->se[i]);
+ const float32x4_t vec_sx = vld1q_f32(&aec->sx[i]);
+ const float32x4_t vec_sdse = vmlaq_f32(vec_1eminus10, vec_sd, vec_se);
+ const float32x4_t vec_sdsx = vmlaq_f32(vec_1eminus10, vec_sd, vec_sx);
+ float32x4x2_t vec_sde = vld2q_f32(&aec->sde[i][0]);
+ float32x4x2_t vec_sxd = vld2q_f32(&aec->sxd[i][0]);
+ float32x4_t vec_cohde = vmulq_f32(vec_sde.val[0], vec_sde.val[0]);
+ float32x4_t vec_cohxd = vmulq_f32(vec_sxd.val[0], vec_sxd.val[0]);
+ vec_cohde = vmlaq_f32(vec_cohde, vec_sde.val[1], vec_sde.val[1]);
+ vec_cohde = vdivq_f32(vec_cohde, vec_sdse);
+ vec_cohxd = vmlaq_f32(vec_cohxd, vec_sxd.val[1], vec_sxd.val[1]);
+ vec_cohxd = vdivq_f32(vec_cohxd, vec_sdsx);
+
+ vst1q_f32(&cohde[i], vec_cohde);
+ vst1q_f32(&cohxd[i], vec_cohxd);
+ }
+ }
+ // scalar code for the remaining items.
+ for (; i < PART_LEN1; i++) {
+ cohde[i] =
+ (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) /
+ (aec->sd[i] * aec->se[i] + 1e-10f);
+ cohxd[i] =
+ (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) /
+ (aec->sx[i] * aec->sd[i] + 1e-10f);
+ }
+}
+
+void WebRtcAec_InitAec_neon(void) {
+ WebRtcAec_FilterFar = FilterFarNEON;
+ WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON;
+ WebRtcAec_FilterAdaptation = FilterAdaptationNEON;
+ WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON;
+ WebRtcAec_SubbandCoherence = SubbandCoherenceNEON;
+}
+
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_sse2.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_sse2.c
new file mode 100644
index 00000000..b1bffcbb
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_sse2.c
@@ -0,0 +1,731 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * The core AEC algorithm, SSE2 version of speed-critical functions.
+ */
+
+#include <emmintrin.h>
+#include <math.h>
+#include <string.h> // memset
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_processing/aec/aec_common.h"
+#include "webrtc/modules/audio_processing/aec/aec_core_internal.h"
+#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
+
+__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) {
+ return aRe * bRe - aIm * bIm;
+}
+
+__inline static float MulIm(float aRe, float aIm, float bRe, float bIm) {
+ return aRe * bIm + aIm * bRe;
+}
+
+static void FilterFarSSE2(AecCore* aec, float yf[2][PART_LEN1]) {
+ int i;
+ const int num_partitions = aec->num_partitions;
+ for (i = 0; i < num_partitions; i++) {
+ int j;
+ int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
+ int pos = i * PART_LEN1;
+ // Check for wrap
+ if (i + aec->xfBufBlockPos >= num_partitions) {
+ xPos -= num_partitions * (PART_LEN1);
+ }
+
+ // vectorized code (four at once)
+ for (j = 0; j + 3 < PART_LEN1; j += 4) {
+ const __m128 xfBuf_re = _mm_loadu_ps(&aec->xfBuf[0][xPos + j]);
+ const __m128 xfBuf_im = _mm_loadu_ps(&aec->xfBuf[1][xPos + j]);
+ const __m128 wfBuf_re = _mm_loadu_ps(&aec->wfBuf[0][pos + j]);
+ const __m128 wfBuf_im = _mm_loadu_ps(&aec->wfBuf[1][pos + j]);
+ const __m128 yf_re = _mm_loadu_ps(&yf[0][j]);
+ const __m128 yf_im = _mm_loadu_ps(&yf[1][j]);
+ const __m128 a = _mm_mul_ps(xfBuf_re, wfBuf_re);
+ const __m128 b = _mm_mul_ps(xfBuf_im, wfBuf_im);
+ const __m128 c = _mm_mul_ps(xfBuf_re, wfBuf_im);
+ const __m128 d = _mm_mul_ps(xfBuf_im, wfBuf_re);
+ const __m128 e = _mm_sub_ps(a, b);
+ const __m128 f = _mm_add_ps(c, d);
+ const __m128 g = _mm_add_ps(yf_re, e);
+ const __m128 h = _mm_add_ps(yf_im, f);
+ _mm_storeu_ps(&yf[0][j], g);
+ _mm_storeu_ps(&yf[1][j], h);
+ }
+ // scalar code for the remaining items.
+ for (; j < PART_LEN1; j++) {
+ yf[0][j] += MulRe(aec->xfBuf[0][xPos + j],
+ aec->xfBuf[1][xPos + j],
+ aec->wfBuf[0][pos + j],
+ aec->wfBuf[1][pos + j]);
+ yf[1][j] += MulIm(aec->xfBuf[0][xPos + j],
+ aec->xfBuf[1][xPos + j],
+ aec->wfBuf[0][pos + j],
+ aec->wfBuf[1][pos + j]);
+ }
+ }
+}
+
+static void ScaleErrorSignalSSE2(AecCore* aec, float ef[2][PART_LEN1]) {
+ const __m128 k1e_10f = _mm_set1_ps(1e-10f);
+ const __m128 kMu = aec->extended_filter_enabled ? _mm_set1_ps(kExtendedMu)
+ : _mm_set1_ps(aec->normal_mu);
+ const __m128 kThresh = aec->extended_filter_enabled
+ ? _mm_set1_ps(kExtendedErrorThreshold)
+ : _mm_set1_ps(aec->normal_error_threshold);
+
+ int i;
+ // vectorized code (four at once)
+ for (i = 0; i + 3 < PART_LEN1; i += 4) {
+ const __m128 xPow = _mm_loadu_ps(&aec->xPow[i]);
+ const __m128 ef_re_base = _mm_loadu_ps(&ef[0][i]);
+ const __m128 ef_im_base = _mm_loadu_ps(&ef[1][i]);
+
+ const __m128 xPowPlus = _mm_add_ps(xPow, k1e_10f);
+ __m128 ef_re = _mm_div_ps(ef_re_base, xPowPlus);
+ __m128 ef_im = _mm_div_ps(ef_im_base, xPowPlus);
+ const __m128 ef_re2 = _mm_mul_ps(ef_re, ef_re);
+ const __m128 ef_im2 = _mm_mul_ps(ef_im, ef_im);
+ const __m128 ef_sum2 = _mm_add_ps(ef_re2, ef_im2);
+ const __m128 absEf = _mm_sqrt_ps(ef_sum2);
+ const __m128 bigger = _mm_cmpgt_ps(absEf, kThresh);
+ __m128 absEfPlus = _mm_add_ps(absEf, k1e_10f);
+ const __m128 absEfInv = _mm_div_ps(kThresh, absEfPlus);
+ __m128 ef_re_if = _mm_mul_ps(ef_re, absEfInv);
+ __m128 ef_im_if = _mm_mul_ps(ef_im, absEfInv);
+ ef_re_if = _mm_and_ps(bigger, ef_re_if);
+ ef_im_if = _mm_and_ps(bigger, ef_im_if);
+ ef_re = _mm_andnot_ps(bigger, ef_re);
+ ef_im = _mm_andnot_ps(bigger, ef_im);
+ ef_re = _mm_or_ps(ef_re, ef_re_if);
+ ef_im = _mm_or_ps(ef_im, ef_im_if);
+ ef_re = _mm_mul_ps(ef_re, kMu);
+ ef_im = _mm_mul_ps(ef_im, kMu);
+
+ _mm_storeu_ps(&ef[0][i], ef_re);
+ _mm_storeu_ps(&ef[1][i], ef_im);
+ }
+ // scalar code for the remaining items.
+ {
+ const float mu =
+ aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
+ const float error_threshold = aec->extended_filter_enabled
+ ? kExtendedErrorThreshold
+ : aec->normal_error_threshold;
+ for (; i < (PART_LEN1); i++) {
+ float abs_ef;
+ ef[0][i] /= (aec->xPow[i] + 1e-10f);
+ ef[1][i] /= (aec->xPow[i] + 1e-10f);
+ abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]);
+
+ if (abs_ef > error_threshold) {
+ abs_ef = error_threshold / (abs_ef + 1e-10f);
+ ef[0][i] *= abs_ef;
+ ef[1][i] *= abs_ef;
+ }
+
+ // Stepsize factor
+ ef[0][i] *= mu;
+ ef[1][i] *= mu;
+ }
+ }
+}
+
+static void FilterAdaptationSSE2(AecCore* aec,
+ float* fft,
+ float ef[2][PART_LEN1]) {
+ int i, j;
+ const int num_partitions = aec->num_partitions;
+ for (i = 0; i < num_partitions; i++) {
+ int xPos = (i + aec->xfBufBlockPos) * (PART_LEN1);
+ int pos = i * PART_LEN1;
+ // Check for wrap
+ if (i + aec->xfBufBlockPos >= num_partitions) {
+ xPos -= num_partitions * PART_LEN1;
+ }
+
+ // Process the whole array...
+ for (j = 0; j < PART_LEN; j += 4) {
+ // Load xfBuf and ef.
+ const __m128 xfBuf_re = _mm_loadu_ps(&aec->xfBuf[0][xPos + j]);
+ const __m128 xfBuf_im = _mm_loadu_ps(&aec->xfBuf[1][xPos + j]);
+ const __m128 ef_re = _mm_loadu_ps(&ef[0][j]);
+ const __m128 ef_im = _mm_loadu_ps(&ef[1][j]);
+ // Calculate the product of conjugate(xfBuf) by ef.
+ // re(conjugate(a) * b) = aRe * bRe + aIm * bIm
+ // im(conjugate(a) * b)= aRe * bIm - aIm * bRe
+ const __m128 a = _mm_mul_ps(xfBuf_re, ef_re);
+ const __m128 b = _mm_mul_ps(xfBuf_im, ef_im);
+ const __m128 c = _mm_mul_ps(xfBuf_re, ef_im);
+ const __m128 d = _mm_mul_ps(xfBuf_im, ef_re);
+ const __m128 e = _mm_add_ps(a, b);
+ const __m128 f = _mm_sub_ps(c, d);
+ // Interleave real and imaginary parts.
+ const __m128 g = _mm_unpacklo_ps(e, f);
+ const __m128 h = _mm_unpackhi_ps(e, f);
+ // Store
+ _mm_storeu_ps(&fft[2 * j + 0], g);
+ _mm_storeu_ps(&fft[2 * j + 4], h);
+ }
+ // ... and fixup the first imaginary entry.
+ fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN],
+ -aec->xfBuf[1][xPos + PART_LEN],
+ ef[0][PART_LEN],
+ ef[1][PART_LEN]);
+
+ aec_rdft_inverse_128(fft);
+ memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
+
+ // fft scaling
+ {
+ float scale = 2.0f / PART_LEN2;
+ const __m128 scale_ps = _mm_load_ps1(&scale);
+ for (j = 0; j < PART_LEN; j += 4) {
+ const __m128 fft_ps = _mm_loadu_ps(&fft[j]);
+ const __m128 fft_scale = _mm_mul_ps(fft_ps, scale_ps);
+ _mm_storeu_ps(&fft[j], fft_scale);
+ }
+ }
+ aec_rdft_forward_128(fft);
+
+ {
+ float wt1 = aec->wfBuf[1][pos];
+ aec->wfBuf[0][pos + PART_LEN] += fft[1];
+ for (j = 0; j < PART_LEN; j += 4) {
+ __m128 wtBuf_re = _mm_loadu_ps(&aec->wfBuf[0][pos + j]);
+ __m128 wtBuf_im = _mm_loadu_ps(&aec->wfBuf[1][pos + j]);
+ const __m128 fft0 = _mm_loadu_ps(&fft[2 * j + 0]);
+ const __m128 fft4 = _mm_loadu_ps(&fft[2 * j + 4]);
+ const __m128 fft_re =
+ _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(2, 0, 2, 0));
+ const __m128 fft_im =
+ _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(3, 1, 3, 1));
+ wtBuf_re = _mm_add_ps(wtBuf_re, fft_re);
+ wtBuf_im = _mm_add_ps(wtBuf_im, fft_im);
+ _mm_storeu_ps(&aec->wfBuf[0][pos + j], wtBuf_re);
+ _mm_storeu_ps(&aec->wfBuf[1][pos + j], wtBuf_im);
+ }
+ aec->wfBuf[1][pos] = wt1;
+ }
+ }
+}
+
+static __m128 mm_pow_ps(__m128 a, __m128 b) {
+ // a^b = exp2(b * log2(a))
+ // exp2(x) and log2(x) are calculated using polynomial approximations.
+ __m128 log2_a, b_log2_a, a_exp_b;
+
+ // Calculate log2(x), x = a.
+ {
+ // To calculate log2(x), we decompose x like this:
+ // x = y * 2^n
+ // n is an integer
+ // y is in the [1.0, 2.0) range
+ //
+ // log2(x) = log2(y) + n
+ // n can be evaluated by playing with float representation.
+ // log2(y) in a small range can be approximated, this code uses an order
+ // five polynomial approximation. The coefficients have been
+ // estimated with the Remez algorithm and the resulting
+ // polynomial has a maximum relative error of 0.00086%.
+
+ // Compute n.
+ // This is done by masking the exponent, shifting it into the top bit of
+ // the mantissa, putting eight into the biased exponent (to shift/
+ // compensate the fact that the exponent has been shifted in the top/
+ // fractional part and finally getting rid of the implicit leading one
+ // from the mantissa by substracting it out.
+ static const ALIGN16_BEG int float_exponent_mask[4] ALIGN16_END = {
+ 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000};
+ static const ALIGN16_BEG int eight_biased_exponent[4] ALIGN16_END = {
+ 0x43800000, 0x43800000, 0x43800000, 0x43800000};
+ static const ALIGN16_BEG int implicit_leading_one[4] ALIGN16_END = {
+ 0x43BF8000, 0x43BF8000, 0x43BF8000, 0x43BF8000};
+ static const int shift_exponent_into_top_mantissa = 8;
+ const __m128 two_n = _mm_and_ps(a, *((__m128*)float_exponent_mask));
+ const __m128 n_1 = _mm_castsi128_ps(_mm_srli_epi32(
+ _mm_castps_si128(two_n), shift_exponent_into_top_mantissa));
+ const __m128 n_0 = _mm_or_ps(n_1, *((__m128*)eight_biased_exponent));
+ const __m128 n = _mm_sub_ps(n_0, *((__m128*)implicit_leading_one));
+
+ // Compute y.
+ static const ALIGN16_BEG int mantissa_mask[4] ALIGN16_END = {
+ 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF};
+ static const ALIGN16_BEG int zero_biased_exponent_is_one[4] ALIGN16_END = {
+ 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000};
+ const __m128 mantissa = _mm_and_ps(a, *((__m128*)mantissa_mask));
+ const __m128 y =
+ _mm_or_ps(mantissa, *((__m128*)zero_biased_exponent_is_one));
+
+ // Approximate log2(y) ~= (y - 1) * pol5(y).
+ // pol5(y) = C5 * y^5 + C4 * y^4 + C3 * y^3 + C2 * y^2 + C1 * y + C0
+ static const ALIGN16_BEG float ALIGN16_END C5[4] = {
+ -3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f};
+ static const ALIGN16_BEG float ALIGN16_END
+ C4[4] = {3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f};
+ static const ALIGN16_BEG float ALIGN16_END
+ C3[4] = {-1.2315303f, -1.2315303f, -1.2315303f, -1.2315303f};
+ static const ALIGN16_BEG float ALIGN16_END
+ C2[4] = {2.5988452f, 2.5988452f, 2.5988452f, 2.5988452f};
+ static const ALIGN16_BEG float ALIGN16_END
+ C1[4] = {-3.3241990f, -3.3241990f, -3.3241990f, -3.3241990f};
+ static const ALIGN16_BEG float ALIGN16_END
+ C0[4] = {3.1157899f, 3.1157899f, 3.1157899f, 3.1157899f};
+ const __m128 pol5_y_0 = _mm_mul_ps(y, *((__m128*)C5));
+ const __m128 pol5_y_1 = _mm_add_ps(pol5_y_0, *((__m128*)C4));
+ const __m128 pol5_y_2 = _mm_mul_ps(pol5_y_1, y);
+ const __m128 pol5_y_3 = _mm_add_ps(pol5_y_2, *((__m128*)C3));
+ const __m128 pol5_y_4 = _mm_mul_ps(pol5_y_3, y);
+ const __m128 pol5_y_5 = _mm_add_ps(pol5_y_4, *((__m128*)C2));
+ const __m128 pol5_y_6 = _mm_mul_ps(pol5_y_5, y);
+ const __m128 pol5_y_7 = _mm_add_ps(pol5_y_6, *((__m128*)C1));
+ const __m128 pol5_y_8 = _mm_mul_ps(pol5_y_7, y);
+ const __m128 pol5_y = _mm_add_ps(pol5_y_8, *((__m128*)C0));
+ const __m128 y_minus_one =
+ _mm_sub_ps(y, *((__m128*)zero_biased_exponent_is_one));
+ const __m128 log2_y = _mm_mul_ps(y_minus_one, pol5_y);
+
+ // Combine parts.
+ log2_a = _mm_add_ps(n, log2_y);
+ }
+
+ // b * log2(a)
+ b_log2_a = _mm_mul_ps(b, log2_a);
+
+ // Calculate exp2(x), x = b * log2(a).
+ {
+ // To calculate 2^x, we decompose x like this:
+ // x = n + y
+ // n is an integer, the value of x - 0.5 rounded down, therefore
+ // y is in the [0.5, 1.5) range
+ //
+ // 2^x = 2^n * 2^y
+ // 2^n can be evaluated by playing with float representation.
+ // 2^y in a small range can be approximated, this code uses an order two
+ // polynomial approximation. The coefficients have been estimated
+ // with the Remez algorithm and the resulting polynomial has a
+ // maximum relative error of 0.17%.
+
+ // To avoid over/underflow, we reduce the range of input to ]-127, 129].
+ static const ALIGN16_BEG float max_input[4] ALIGN16_END = {129.f, 129.f,
+ 129.f, 129.f};
+ static const ALIGN16_BEG float min_input[4] ALIGN16_END = {
+ -126.99999f, -126.99999f, -126.99999f, -126.99999f};
+ const __m128 x_min = _mm_min_ps(b_log2_a, *((__m128*)max_input));
+ const __m128 x_max = _mm_max_ps(x_min, *((__m128*)min_input));
+ // Compute n.
+ static const ALIGN16_BEG float half[4] ALIGN16_END = {0.5f, 0.5f,
+ 0.5f, 0.5f};
+ const __m128 x_minus_half = _mm_sub_ps(x_max, *((__m128*)half));
+ const __m128i x_minus_half_floor = _mm_cvtps_epi32(x_minus_half);
+ // Compute 2^n.
+ static const ALIGN16_BEG int float_exponent_bias[4] ALIGN16_END = {
+ 127, 127, 127, 127};
+ static const int float_exponent_shift = 23;
+ const __m128i two_n_exponent =
+ _mm_add_epi32(x_minus_half_floor, *((__m128i*)float_exponent_bias));
+ const __m128 two_n =
+ _mm_castsi128_ps(_mm_slli_epi32(two_n_exponent, float_exponent_shift));
+ // Compute y.
+ const __m128 y = _mm_sub_ps(x_max, _mm_cvtepi32_ps(x_minus_half_floor));
+ // Approximate 2^y ~= C2 * y^2 + C1 * y + C0.
+ static const ALIGN16_BEG float C2[4] ALIGN16_END = {
+ 3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f};
+ static const ALIGN16_BEG float C1[4] ALIGN16_END = {
+ 6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f};
+ static const ALIGN16_BEG float C0[4] ALIGN16_END = {1.0017247f, 1.0017247f,
+ 1.0017247f, 1.0017247f};
+ const __m128 exp2_y_0 = _mm_mul_ps(y, *((__m128*)C2));
+ const __m128 exp2_y_1 = _mm_add_ps(exp2_y_0, *((__m128*)C1));
+ const __m128 exp2_y_2 = _mm_mul_ps(exp2_y_1, y);
+ const __m128 exp2_y = _mm_add_ps(exp2_y_2, *((__m128*)C0));
+
+ // Combine parts.
+ a_exp_b = _mm_mul_ps(exp2_y, two_n);
+ }
+ return a_exp_b;
+}
+
+static void OverdriveAndSuppressSSE2(AecCore* aec,
+ float hNl[PART_LEN1],
+ const float hNlFb,
+ float efw[2][PART_LEN1]) {
+ int i;
+ const __m128 vec_hNlFb = _mm_set1_ps(hNlFb);
+ const __m128 vec_one = _mm_set1_ps(1.0f);
+ const __m128 vec_minus_one = _mm_set1_ps(-1.0f);
+ const __m128 vec_overDriveSm = _mm_set1_ps(aec->overDriveSm);
+ // vectorized code (four at once)
+ for (i = 0; i + 3 < PART_LEN1; i += 4) {
+ // Weight subbands
+ __m128 vec_hNl = _mm_loadu_ps(&hNl[i]);
+ const __m128 vec_weightCurve = _mm_loadu_ps(&WebRtcAec_weightCurve[i]);
+ const __m128 bigger = _mm_cmpgt_ps(vec_hNl, vec_hNlFb);
+ const __m128 vec_weightCurve_hNlFb = _mm_mul_ps(vec_weightCurve, vec_hNlFb);
+ const __m128 vec_one_weightCurve = _mm_sub_ps(vec_one, vec_weightCurve);
+ const __m128 vec_one_weightCurve_hNl =
+ _mm_mul_ps(vec_one_weightCurve, vec_hNl);
+ const __m128 vec_if0 = _mm_andnot_ps(bigger, vec_hNl);
+ const __m128 vec_if1 = _mm_and_ps(
+ bigger, _mm_add_ps(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl));
+ vec_hNl = _mm_or_ps(vec_if0, vec_if1);
+
+ {
+ const __m128 vec_overDriveCurve =
+ _mm_loadu_ps(&WebRtcAec_overDriveCurve[i]);
+ const __m128 vec_overDriveSm_overDriveCurve =
+ _mm_mul_ps(vec_overDriveSm, vec_overDriveCurve);
+ vec_hNl = mm_pow_ps(vec_hNl, vec_overDriveSm_overDriveCurve);
+ _mm_storeu_ps(&hNl[i], vec_hNl);
+ }
+
+ // Suppress error signal
+ {
+ __m128 vec_efw_re = _mm_loadu_ps(&efw[0][i]);
+ __m128 vec_efw_im = _mm_loadu_ps(&efw[1][i]);
+ vec_efw_re = _mm_mul_ps(vec_efw_re, vec_hNl);
+ vec_efw_im = _mm_mul_ps(vec_efw_im, vec_hNl);
+
+ // Ooura fft returns incorrect sign on imaginary component. It matters
+ // here because we are making an additive change with comfort noise.
+ vec_efw_im = _mm_mul_ps(vec_efw_im, vec_minus_one);
+ _mm_storeu_ps(&efw[0][i], vec_efw_re);
+ _mm_storeu_ps(&efw[1][i], vec_efw_im);
+ }
+ }
+ // scalar code for the remaining items.
+ for (; i < PART_LEN1; i++) {
+ // Weight subbands
+ if (hNl[i] > hNlFb) {
+ hNl[i] = WebRtcAec_weightCurve[i] * hNlFb +
+ (1 - WebRtcAec_weightCurve[i]) * hNl[i];
+ }
+ hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]);
+
+ // Suppress error signal
+ efw[0][i] *= hNl[i];
+ efw[1][i] *= hNl[i];
+
+ // Ooura fft returns incorrect sign on imaginary component. It matters
+ // here because we are making an additive change with comfort noise.
+ efw[1][i] *= -1;
+ }
+}
+
+__inline static void _mm_add_ps_4x1(__m128 sum, float *dst) {
+ // A+B C+D
+ sum = _mm_add_ps(sum, _mm_shuffle_ps(sum, sum, _MM_SHUFFLE(0, 0, 3, 2)));
+ // A+B+C+D A+B+C+D
+ sum = _mm_add_ps(sum, _mm_shuffle_ps(sum, sum, _MM_SHUFFLE(1, 1, 1, 1)));
+ _mm_store_ss(dst, sum);
+}
+static int PartitionDelay(const AecCore* aec) {
+ // Measures the energy in each filter partition and returns the partition with
+ // highest energy.
+ // TODO(bjornv): Spread computational cost by computing one partition per
+ // block?
+ float wfEnMax = 0;
+ int i;
+ int delay = 0;
+
+ for (i = 0; i < aec->num_partitions; i++) {
+ int j;
+ int pos = i * PART_LEN1;
+ float wfEn = 0;
+ __m128 vec_wfEn = _mm_set1_ps(0.0f);
+ // vectorized code (four at once)
+ for (j = 0; j + 3 < PART_LEN1; j += 4) {
+ const __m128 vec_wfBuf0 = _mm_loadu_ps(&aec->wfBuf[0][pos + j]);
+ const __m128 vec_wfBuf1 = _mm_loadu_ps(&aec->wfBuf[1][pos + j]);
+ vec_wfEn = _mm_add_ps(vec_wfEn, _mm_mul_ps(vec_wfBuf0, vec_wfBuf0));
+ vec_wfEn = _mm_add_ps(vec_wfEn, _mm_mul_ps(vec_wfBuf1, vec_wfBuf1));
+ }
+ _mm_add_ps_4x1(vec_wfEn, &wfEn);
+
+ // scalar code for the remaining items.
+ for (; j < PART_LEN1; j++) {
+ wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] +
+ aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j];
+ }
+
+ if (wfEn > wfEnMax) {
+ wfEnMax = wfEn;
+ delay = i;
+ }
+ }
+ return delay;
+}
+
+// Updates the following smoothed Power Spectral Densities (PSD):
+// - sd : near-end
+// - se : residual echo
+// - sx : far-end
+// - sde : cross-PSD of near-end and residual echo
+// - sxd : cross-PSD of near-end and far-end
+//
+// In addition to updating the PSDs, also the filter diverge state is determined
+// upon actions are taken.
+static void SmoothedPSD(AecCore* aec,
+ float efw[2][PART_LEN1],
+ float dfw[2][PART_LEN1],
+ float xfw[2][PART_LEN1]) {
+ // Power estimate smoothing coefficients.
+ const float* ptrGCoh = aec->extended_filter_enabled
+ ? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1]
+ : WebRtcAec_kNormalSmoothingCoefficients[aec->mult - 1];
+ int i;
+ float sdSum = 0, seSum = 0;
+ const __m128 vec_15 = _mm_set1_ps(WebRtcAec_kMinFarendPSD);
+ const __m128 vec_GCoh0 = _mm_set1_ps(ptrGCoh[0]);
+ const __m128 vec_GCoh1 = _mm_set1_ps(ptrGCoh[1]);
+ __m128 vec_sdSum = _mm_set1_ps(0.0f);
+ __m128 vec_seSum = _mm_set1_ps(0.0f);
+
+ for (i = 0; i + 3 < PART_LEN1; i += 4) {
+ const __m128 vec_dfw0 = _mm_loadu_ps(&dfw[0][i]);
+ const __m128 vec_dfw1 = _mm_loadu_ps(&dfw[1][i]);
+ const __m128 vec_efw0 = _mm_loadu_ps(&efw[0][i]);
+ const __m128 vec_efw1 = _mm_loadu_ps(&efw[1][i]);
+ const __m128 vec_xfw0 = _mm_loadu_ps(&xfw[0][i]);
+ const __m128 vec_xfw1 = _mm_loadu_ps(&xfw[1][i]);
+ __m128 vec_sd = _mm_mul_ps(_mm_loadu_ps(&aec->sd[i]), vec_GCoh0);
+ __m128 vec_se = _mm_mul_ps(_mm_loadu_ps(&aec->se[i]), vec_GCoh0);
+ __m128 vec_sx = _mm_mul_ps(_mm_loadu_ps(&aec->sx[i]), vec_GCoh0);
+ __m128 vec_dfw_sumsq = _mm_mul_ps(vec_dfw0, vec_dfw0);
+ __m128 vec_efw_sumsq = _mm_mul_ps(vec_efw0, vec_efw0);
+ __m128 vec_xfw_sumsq = _mm_mul_ps(vec_xfw0, vec_xfw0);
+ vec_dfw_sumsq = _mm_add_ps(vec_dfw_sumsq, _mm_mul_ps(vec_dfw1, vec_dfw1));
+ vec_efw_sumsq = _mm_add_ps(vec_efw_sumsq, _mm_mul_ps(vec_efw1, vec_efw1));
+ vec_xfw_sumsq = _mm_add_ps(vec_xfw_sumsq, _mm_mul_ps(vec_xfw1, vec_xfw1));
+ vec_xfw_sumsq = _mm_max_ps(vec_xfw_sumsq, vec_15);
+ vec_sd = _mm_add_ps(vec_sd, _mm_mul_ps(vec_dfw_sumsq, vec_GCoh1));
+ vec_se = _mm_add_ps(vec_se, _mm_mul_ps(vec_efw_sumsq, vec_GCoh1));
+ vec_sx = _mm_add_ps(vec_sx, _mm_mul_ps(vec_xfw_sumsq, vec_GCoh1));
+ _mm_storeu_ps(&aec->sd[i], vec_sd);
+ _mm_storeu_ps(&aec->se[i], vec_se);
+ _mm_storeu_ps(&aec->sx[i], vec_sx);
+
+ {
+ const __m128 vec_3210 = _mm_loadu_ps(&aec->sde[i][0]);
+ const __m128 vec_7654 = _mm_loadu_ps(&aec->sde[i + 2][0]);
+ __m128 vec_a = _mm_shuffle_ps(vec_3210, vec_7654,
+ _MM_SHUFFLE(2, 0, 2, 0));
+ __m128 vec_b = _mm_shuffle_ps(vec_3210, vec_7654,
+ _MM_SHUFFLE(3, 1, 3, 1));
+ __m128 vec_dfwefw0011 = _mm_mul_ps(vec_dfw0, vec_efw0);
+ __m128 vec_dfwefw0110 = _mm_mul_ps(vec_dfw0, vec_efw1);
+ vec_a = _mm_mul_ps(vec_a, vec_GCoh0);
+ vec_b = _mm_mul_ps(vec_b, vec_GCoh0);
+ vec_dfwefw0011 = _mm_add_ps(vec_dfwefw0011,
+ _mm_mul_ps(vec_dfw1, vec_efw1));
+ vec_dfwefw0110 = _mm_sub_ps(vec_dfwefw0110,
+ _mm_mul_ps(vec_dfw1, vec_efw0));
+ vec_a = _mm_add_ps(vec_a, _mm_mul_ps(vec_dfwefw0011, vec_GCoh1));
+ vec_b = _mm_add_ps(vec_b, _mm_mul_ps(vec_dfwefw0110, vec_GCoh1));
+ _mm_storeu_ps(&aec->sde[i][0], _mm_unpacklo_ps(vec_a, vec_b));
+ _mm_storeu_ps(&aec->sde[i + 2][0], _mm_unpackhi_ps(vec_a, vec_b));
+ }
+
+ {
+ const __m128 vec_3210 = _mm_loadu_ps(&aec->sxd[i][0]);
+ const __m128 vec_7654 = _mm_loadu_ps(&aec->sxd[i + 2][0]);
+ __m128 vec_a = _mm_shuffle_ps(vec_3210, vec_7654,
+ _MM_SHUFFLE(2, 0, 2, 0));
+ __m128 vec_b = _mm_shuffle_ps(vec_3210, vec_7654,
+ _MM_SHUFFLE(3, 1, 3, 1));
+ __m128 vec_dfwxfw0011 = _mm_mul_ps(vec_dfw0, vec_xfw0);
+ __m128 vec_dfwxfw0110 = _mm_mul_ps(vec_dfw0, vec_xfw1);
+ vec_a = _mm_mul_ps(vec_a, vec_GCoh0);
+ vec_b = _mm_mul_ps(vec_b, vec_GCoh0);
+ vec_dfwxfw0011 = _mm_add_ps(vec_dfwxfw0011,
+ _mm_mul_ps(vec_dfw1, vec_xfw1));
+ vec_dfwxfw0110 = _mm_sub_ps(vec_dfwxfw0110,
+ _mm_mul_ps(vec_dfw1, vec_xfw0));
+ vec_a = _mm_add_ps(vec_a, _mm_mul_ps(vec_dfwxfw0011, vec_GCoh1));
+ vec_b = _mm_add_ps(vec_b, _mm_mul_ps(vec_dfwxfw0110, vec_GCoh1));
+ _mm_storeu_ps(&aec->sxd[i][0], _mm_unpacklo_ps(vec_a, vec_b));
+ _mm_storeu_ps(&aec->sxd[i + 2][0], _mm_unpackhi_ps(vec_a, vec_b));
+ }
+
+ vec_sdSum = _mm_add_ps(vec_sdSum, vec_sd);
+ vec_seSum = _mm_add_ps(vec_seSum, vec_se);
+ }
+
+ _mm_add_ps_4x1(vec_sdSum, &sdSum);
+ _mm_add_ps_4x1(vec_seSum, &seSum);
+
+ for (; i < PART_LEN1; i++) {
+ aec->sd[i] = ptrGCoh[0] * aec->sd[i] +
+ ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]);
+ aec->se[i] = ptrGCoh[0] * aec->se[i] +
+ ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]);
+ // We threshold here to protect against the ill-effects of a zero farend.
+ // The threshold is not arbitrarily chosen, but balances protection and
+ // adverse interaction with the algorithm's tuning.
+ // TODO(bjornv): investigate further why this is so sensitive.
+ aec->sx[i] =
+ ptrGCoh[0] * aec->sx[i] +
+ ptrGCoh[1] * WEBRTC_SPL_MAX(
+ xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i],
+ WebRtcAec_kMinFarendPSD);
+
+ aec->sde[i][0] =
+ ptrGCoh[0] * aec->sde[i][0] +
+ ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]);
+ aec->sde[i][1] =
+ ptrGCoh[0] * aec->sde[i][1] +
+ ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]);
+
+ aec->sxd[i][0] =
+ ptrGCoh[0] * aec->sxd[i][0] +
+ ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]);
+ aec->sxd[i][1] =
+ ptrGCoh[0] * aec->sxd[i][1] +
+ ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]);
+
+ sdSum += aec->sd[i];
+ seSum += aec->se[i];
+ }
+
+ // Divergent filter safeguard.
+ aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum;
+
+ if (aec->divergeState)
+ memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1);
+
+ // Reset if error is significantly larger than nearend (13 dB).
+ if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum))
+ memset(aec->wfBuf, 0, sizeof(aec->wfBuf));
+}
+
+// Window time domain data to be used by the fft.
+__inline static void WindowData(float* x_windowed, const float* x) {
+ int i;
+ for (i = 0; i < PART_LEN; i += 4) {
+ const __m128 vec_Buf1 = _mm_loadu_ps(&x[i]);
+ const __m128 vec_Buf2 = _mm_loadu_ps(&x[PART_LEN + i]);
+ const __m128 vec_sqrtHanning = _mm_load_ps(&WebRtcAec_sqrtHanning[i]);
+ // A B C D
+ __m128 vec_sqrtHanning_rev =
+ _mm_loadu_ps(&WebRtcAec_sqrtHanning[PART_LEN - i - 3]);
+ // D C B A
+ vec_sqrtHanning_rev =
+ _mm_shuffle_ps(vec_sqrtHanning_rev, vec_sqrtHanning_rev,
+ _MM_SHUFFLE(0, 1, 2, 3));
+ _mm_storeu_ps(&x_windowed[i], _mm_mul_ps(vec_Buf1, vec_sqrtHanning));
+ _mm_storeu_ps(&x_windowed[PART_LEN + i],
+ _mm_mul_ps(vec_Buf2, vec_sqrtHanning_rev));
+ }
+}
+
+// Puts fft output data into a complex valued array.
+__inline static void StoreAsComplex(const float* data,
+ float data_complex[2][PART_LEN1]) {
+ int i;
+ for (i = 0; i < PART_LEN; i += 4) {
+ const __m128 vec_fft0 = _mm_loadu_ps(&data[2 * i]);
+ const __m128 vec_fft4 = _mm_loadu_ps(&data[2 * i + 4]);
+ const __m128 vec_a = _mm_shuffle_ps(vec_fft0, vec_fft4,
+ _MM_SHUFFLE(2, 0, 2, 0));
+ const __m128 vec_b = _mm_shuffle_ps(vec_fft0, vec_fft4,
+ _MM_SHUFFLE(3, 1, 3, 1));
+ _mm_storeu_ps(&data_complex[0][i], vec_a);
+ _mm_storeu_ps(&data_complex[1][i], vec_b);
+ }
+ // fix beginning/end values
+ data_complex[1][0] = 0;
+ data_complex[1][PART_LEN] = 0;
+ data_complex[0][0] = data[0];
+ data_complex[0][PART_LEN] = data[1];
+}
+
+static void SubbandCoherenceSSE2(AecCore* aec,
+ float efw[2][PART_LEN1],
+ float xfw[2][PART_LEN1],
+ float* fft,
+ float* cohde,
+ float* cohxd) {
+ float dfw[2][PART_LEN1];
+ int i;
+
+ if (aec->delayEstCtr == 0)
+ aec->delayIdx = PartitionDelay(aec);
+
+ // Use delayed far.
+ memcpy(xfw,
+ aec->xfwBuf + aec->delayIdx * PART_LEN1,
+ sizeof(xfw[0][0]) * 2 * PART_LEN1);
+
+ // Windowed near fft
+ WindowData(fft, aec->dBuf);
+ aec_rdft_forward_128(fft);
+ StoreAsComplex(fft, dfw);
+
+ // Windowed error fft
+ WindowData(fft, aec->eBuf);
+ aec_rdft_forward_128(fft);
+ StoreAsComplex(fft, efw);
+
+ SmoothedPSD(aec, efw, dfw, xfw);
+
+ {
+ const __m128 vec_1eminus10 = _mm_set1_ps(1e-10f);
+
+ // Subband coherence
+ for (i = 0; i + 3 < PART_LEN1; i += 4) {
+ const __m128 vec_sd = _mm_loadu_ps(&aec->sd[i]);
+ const __m128 vec_se = _mm_loadu_ps(&aec->se[i]);
+ const __m128 vec_sx = _mm_loadu_ps(&aec->sx[i]);
+ const __m128 vec_sdse = _mm_add_ps(vec_1eminus10,
+ _mm_mul_ps(vec_sd, vec_se));
+ const __m128 vec_sdsx = _mm_add_ps(vec_1eminus10,
+ _mm_mul_ps(vec_sd, vec_sx));
+ const __m128 vec_sde_3210 = _mm_loadu_ps(&aec->sde[i][0]);
+ const __m128 vec_sde_7654 = _mm_loadu_ps(&aec->sde[i + 2][0]);
+ const __m128 vec_sxd_3210 = _mm_loadu_ps(&aec->sxd[i][0]);
+ const __m128 vec_sxd_7654 = _mm_loadu_ps(&aec->sxd[i + 2][0]);
+ const __m128 vec_sde_0 = _mm_shuffle_ps(vec_sde_3210, vec_sde_7654,
+ _MM_SHUFFLE(2, 0, 2, 0));
+ const __m128 vec_sde_1 = _mm_shuffle_ps(vec_sde_3210, vec_sde_7654,
+ _MM_SHUFFLE(3, 1, 3, 1));
+ const __m128 vec_sxd_0 = _mm_shuffle_ps(vec_sxd_3210, vec_sxd_7654,
+ _MM_SHUFFLE(2, 0, 2, 0));
+ const __m128 vec_sxd_1 = _mm_shuffle_ps(vec_sxd_3210, vec_sxd_7654,
+ _MM_SHUFFLE(3, 1, 3, 1));
+ __m128 vec_cohde = _mm_mul_ps(vec_sde_0, vec_sde_0);
+ __m128 vec_cohxd = _mm_mul_ps(vec_sxd_0, vec_sxd_0);
+ vec_cohde = _mm_add_ps(vec_cohde, _mm_mul_ps(vec_sde_1, vec_sde_1));
+ vec_cohde = _mm_div_ps(vec_cohde, vec_sdse);
+ vec_cohxd = _mm_add_ps(vec_cohxd, _mm_mul_ps(vec_sxd_1, vec_sxd_1));
+ vec_cohxd = _mm_div_ps(vec_cohxd, vec_sdsx);
+ _mm_storeu_ps(&cohde[i], vec_cohde);
+ _mm_storeu_ps(&cohxd[i], vec_cohxd);
+ }
+
+ // scalar code for the remaining items.
+ for (; i < PART_LEN1; i++) {
+ cohde[i] =
+ (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) /
+ (aec->sd[i] * aec->se[i] + 1e-10f);
+ cohxd[i] =
+ (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) /
+ (aec->sx[i] * aec->sd[i] + 1e-10f);
+ }
+ }
+}
+
+void WebRtcAec_InitAec_SSE2(void) {
+ WebRtcAec_FilterFar = FilterFarSSE2;
+ WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2;
+ WebRtcAec_FilterAdaptation = FilterAdaptationSSE2;
+ WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2;
+ WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2;
+}
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft.c
new file mode 100644
index 00000000..2c3cff2d
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft.c
@@ -0,0 +1,589 @@
+/*
+ * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
+ * Copyright Takuya OOURA, 1996-2001
+ *
+ * You may use, copy, modify and distribute this code for any purpose (include
+ * commercial use) and without fee. Please refer to this package when you modify
+ * this code.
+ *
+ * Changes by the WebRTC authors:
+ * - Trivial type modifications.
+ * - Minimal code subset to do rdft of length 128.
+ * - Optimizations because of known length.
+ *
+ * All changes are covered by the WebRTC license and IP grant:
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
+
+#include <math.h>
+
+#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h"
+#include "webrtc/typedefs.h"
+
+// These tables used to be computed at run-time. For example, refer to:
+// https://code.google.com/p/webrtc/source/browse/trunk/webrtc/modules/audio_processing/aec/aec_rdft.c?r=6564
+// to see the initialization code.
+const float rdft_w[64] = {
+ 1.0000000000f, 0.0000000000f, 0.7071067691f, 0.7071067691f,
+ 0.9238795638f, 0.3826834559f, 0.3826834559f, 0.9238795638f,
+ 0.9807852507f, 0.1950903237f, 0.5555702448f, 0.8314695954f,
+ 0.8314695954f, 0.5555702448f, 0.1950903237f, 0.9807852507f,
+ 0.9951847196f, 0.0980171412f, 0.6343933344f, 0.7730104327f,
+ 0.8819212914f, 0.4713967443f, 0.2902846634f, 0.9569403529f,
+ 0.9569403529f, 0.2902846634f, 0.4713967443f, 0.8819212914f,
+ 0.7730104327f, 0.6343933344f, 0.0980171412f, 0.9951847196f,
+ 0.7071067691f, 0.4993977249f, 0.4975923598f, 0.4945882559f,
+ 0.4903926253f, 0.4850156307f, 0.4784701765f, 0.4707720280f,
+ 0.4619397819f, 0.4519946277f, 0.4409606457f, 0.4288643003f,
+ 0.4157347977f, 0.4016037583f, 0.3865052164f, 0.3704755902f,
+ 0.3535533845f, 0.3357794881f, 0.3171966672f, 0.2978496552f,
+ 0.2777851224f, 0.2570513785f, 0.2356983721f, 0.2137775421f,
+ 0.1913417280f, 0.1684449315f, 0.1451423317f, 0.1214900985f,
+ 0.0975451618f, 0.0733652338f, 0.0490085706f, 0.0245338380f,
+};
+const float rdft_wk3ri_first[16] = {
+ 1.000000000f, 0.000000000f, 0.382683456f, 0.923879564f,
+ 0.831469536f, 0.555570245f, -0.195090353f, 0.980785251f,
+ 0.956940353f, 0.290284693f, 0.098017156f, 0.995184720f,
+ 0.634393334f, 0.773010492f, -0.471396863f, 0.881921172f,
+};
+const float rdft_wk3ri_second[16] = {
+ -0.707106769f, 0.707106769f, -0.923879564f, -0.382683456f,
+ -0.980785251f, 0.195090353f, -0.555570245f, -0.831469536f,
+ -0.881921172f, 0.471396863f, -0.773010492f, -0.634393334f,
+ -0.995184720f, -0.098017156f, -0.290284693f, -0.956940353f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk1r[32] = {
+ 1.000000000f, 1.000000000f, 0.707106769f, 0.707106769f,
+ 0.923879564f, 0.923879564f, 0.382683456f, 0.382683456f,
+ 0.980785251f, 0.980785251f, 0.555570245f, 0.555570245f,
+ 0.831469595f, 0.831469595f, 0.195090324f, 0.195090324f,
+ 0.995184720f, 0.995184720f, 0.634393334f, 0.634393334f,
+ 0.881921291f, 0.881921291f, 0.290284663f, 0.290284663f,
+ 0.956940353f, 0.956940353f, 0.471396744f, 0.471396744f,
+ 0.773010433f, 0.773010433f, 0.098017141f, 0.098017141f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk2r[32] = {
+ 1.000000000f, 1.000000000f, -0.000000000f, -0.000000000f,
+ 0.707106769f, 0.707106769f, -0.707106769f, -0.707106769f,
+ 0.923879564f, 0.923879564f, -0.382683456f, -0.382683456f,
+ 0.382683456f, 0.382683456f, -0.923879564f, -0.923879564f,
+ 0.980785251f, 0.980785251f, -0.195090324f, -0.195090324f,
+ 0.555570245f, 0.555570245f, -0.831469595f, -0.831469595f,
+ 0.831469595f, 0.831469595f, -0.555570245f, -0.555570245f,
+ 0.195090324f, 0.195090324f, -0.980785251f, -0.980785251f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk3r[32] = {
+ 1.000000000f, 1.000000000f, -0.707106769f, -0.707106769f,
+ 0.382683456f, 0.382683456f, -0.923879564f, -0.923879564f,
+ 0.831469536f, 0.831469536f, -0.980785251f, -0.980785251f,
+ -0.195090353f, -0.195090353f, -0.555570245f, -0.555570245f,
+ 0.956940353f, 0.956940353f, -0.881921172f, -0.881921172f,
+ 0.098017156f, 0.098017156f, -0.773010492f, -0.773010492f,
+ 0.634393334f, 0.634393334f, -0.995184720f, -0.995184720f,
+ -0.471396863f, -0.471396863f, -0.290284693f, -0.290284693f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk1i[32] = {
+ -0.000000000f, 0.000000000f, -0.707106769f, 0.707106769f,
+ -0.382683456f, 0.382683456f, -0.923879564f, 0.923879564f,
+ -0.195090324f, 0.195090324f, -0.831469595f, 0.831469595f,
+ -0.555570245f, 0.555570245f, -0.980785251f, 0.980785251f,
+ -0.098017141f, 0.098017141f, -0.773010433f, 0.773010433f,
+ -0.471396744f, 0.471396744f, -0.956940353f, 0.956940353f,
+ -0.290284663f, 0.290284663f, -0.881921291f, 0.881921291f,
+ -0.634393334f, 0.634393334f, -0.995184720f, 0.995184720f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk2i[32] = {
+ -0.000000000f, 0.000000000f, -1.000000000f, 1.000000000f,
+ -0.707106769f, 0.707106769f, -0.707106769f, 0.707106769f,
+ -0.382683456f, 0.382683456f, -0.923879564f, 0.923879564f,
+ -0.923879564f, 0.923879564f, -0.382683456f, 0.382683456f,
+ -0.195090324f, 0.195090324f, -0.980785251f, 0.980785251f,
+ -0.831469595f, 0.831469595f, -0.555570245f, 0.555570245f,
+ -0.555570245f, 0.555570245f, -0.831469595f, 0.831469595f,
+ -0.980785251f, 0.980785251f, -0.195090324f, 0.195090324f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk3i[32] = {
+ -0.000000000f, 0.000000000f, -0.707106769f, 0.707106769f,
+ -0.923879564f, 0.923879564f, 0.382683456f, -0.382683456f,
+ -0.555570245f, 0.555570245f, -0.195090353f, 0.195090353f,
+ -0.980785251f, 0.980785251f, 0.831469536f, -0.831469536f,
+ -0.290284693f, 0.290284693f, -0.471396863f, 0.471396863f,
+ -0.995184720f, 0.995184720f, 0.634393334f, -0.634393334f,
+ -0.773010492f, 0.773010492f, 0.098017156f, -0.098017156f,
+ -0.881921172f, 0.881921172f, 0.956940353f, -0.956940353f,
+};
+ALIGN16_BEG const float ALIGN16_END cftmdl_wk1r[4] = {
+ 0.707106769f, 0.707106769f, 0.707106769f, -0.707106769f,
+};
+
+static void bitrv2_128_C(float* a) {
+ /*
+ Following things have been attempted but are no faster:
+ (a) Storing the swap indexes in a LUT (index calculations are done
+ for 'free' while waiting on memory/L1).
+ (b) Consolidate the load/store of two consecutive floats by a 64 bit
+ integer (execution is memory/L1 bound).
+ (c) Do a mix of floats and 64 bit integer to maximize register
+ utilization (execution is memory/L1 bound).
+ (d) Replacing ip[i] by ((k<<31)>>25) + ((k >> 1)<<5).
+ (e) Hard-coding of the offsets to completely eliminates index
+ calculations.
+ */
+
+ unsigned int j, j1, k, k1;
+ float xr, xi, yr, yi;
+
+ static const int ip[4] = {0, 64, 32, 96};
+ for (k = 0; k < 4; k++) {
+ for (j = 0; j < k; j++) {
+ j1 = 2 * j + ip[k];
+ k1 = 2 * k + ip[j];
+ xr = a[j1 + 0];
+ xi = a[j1 + 1];
+ yr = a[k1 + 0];
+ yi = a[k1 + 1];
+ a[j1 + 0] = yr;
+ a[j1 + 1] = yi;
+ a[k1 + 0] = xr;
+ a[k1 + 1] = xi;
+ j1 += 8;
+ k1 += 16;
+ xr = a[j1 + 0];
+ xi = a[j1 + 1];
+ yr = a[k1 + 0];
+ yi = a[k1 + 1];
+ a[j1 + 0] = yr;
+ a[j1 + 1] = yi;
+ a[k1 + 0] = xr;
+ a[k1 + 1] = xi;
+ j1 += 8;
+ k1 -= 8;
+ xr = a[j1 + 0];
+ xi = a[j1 + 1];
+ yr = a[k1 + 0];
+ yi = a[k1 + 1];
+ a[j1 + 0] = yr;
+ a[j1 + 1] = yi;
+ a[k1 + 0] = xr;
+ a[k1 + 1] = xi;
+ j1 += 8;
+ k1 += 16;
+ xr = a[j1 + 0];
+ xi = a[j1 + 1];
+ yr = a[k1 + 0];
+ yi = a[k1 + 1];
+ a[j1 + 0] = yr;
+ a[j1 + 1] = yi;
+ a[k1 + 0] = xr;
+ a[k1 + 1] = xi;
+ }
+ j1 = 2 * k + 8 + ip[k];
+ k1 = j1 + 8;
+ xr = a[j1 + 0];
+ xi = a[j1 + 1];
+ yr = a[k1 + 0];
+ yi = a[k1 + 1];
+ a[j1 + 0] = yr;
+ a[j1 + 1] = yi;
+ a[k1 + 0] = xr;
+ a[k1 + 1] = xi;
+ }
+}
+
+static void cft1st_128_C(float* a) {
+ const int n = 128;
+ int j, k1, k2;
+ float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
+ float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+ // The processing of the first set of elements was simplified in C to avoid
+ // some operations (multiplication by zero or one, addition of two elements
+ // multiplied by the same weight, ...).
+ x0r = a[0] + a[2];
+ x0i = a[1] + a[3];
+ x1r = a[0] - a[2];
+ x1i = a[1] - a[3];
+ x2r = a[4] + a[6];
+ x2i = a[5] + a[7];
+ x3r = a[4] - a[6];
+ x3i = a[5] - a[7];
+ a[0] = x0r + x2r;
+ a[1] = x0i + x2i;
+ a[4] = x0r - x2r;
+ a[5] = x0i - x2i;
+ a[2] = x1r - x3i;
+ a[3] = x1i + x3r;
+ a[6] = x1r + x3i;
+ a[7] = x1i - x3r;
+ wk1r = rdft_w[2];
+ x0r = a[8] + a[10];
+ x0i = a[9] + a[11];
+ x1r = a[8] - a[10];
+ x1i = a[9] - a[11];
+ x2r = a[12] + a[14];
+ x2i = a[13] + a[15];
+ x3r = a[12] - a[14];
+ x3i = a[13] - a[15];
+ a[8] = x0r + x2r;
+ a[9] = x0i + x2i;
+ a[12] = x2i - x0i;
+ a[13] = x0r - x2r;
+ x0r = x1r - x3i;
+ x0i = x1i + x3r;
+ a[10] = wk1r * (x0r - x0i);
+ a[11] = wk1r * (x0r + x0i);
+ x0r = x3i + x1r;
+ x0i = x3r - x1i;
+ a[14] = wk1r * (x0i - x0r);
+ a[15] = wk1r * (x0i + x0r);
+ k1 = 0;
+ for (j = 16; j < n; j += 16) {
+ k1 += 2;
+ k2 = 2 * k1;
+ wk2r = rdft_w[k1 + 0];
+ wk2i = rdft_w[k1 + 1];
+ wk1r = rdft_w[k2 + 0];
+ wk1i = rdft_w[k2 + 1];
+ wk3r = rdft_wk3ri_first[k1 + 0];
+ wk3i = rdft_wk3ri_first[k1 + 1];
+ x0r = a[j + 0] + a[j + 2];
+ x0i = a[j + 1] + a[j + 3];
+ x1r = a[j + 0] - a[j + 2];
+ x1i = a[j + 1] - a[j + 3];
+ x2r = a[j + 4] + a[j + 6];
+ x2i = a[j + 5] + a[j + 7];
+ x3r = a[j + 4] - a[j + 6];
+ x3i = a[j + 5] - a[j + 7];
+ a[j + 0] = x0r + x2r;
+ a[j + 1] = x0i + x2i;
+ x0r -= x2r;
+ x0i -= x2i;
+ a[j + 4] = wk2r * x0r - wk2i * x0i;
+ a[j + 5] = wk2r * x0i + wk2i * x0r;
+ x0r = x1r - x3i;
+ x0i = x1i + x3r;
+ a[j + 2] = wk1r * x0r - wk1i * x0i;
+ a[j + 3] = wk1r * x0i + wk1i * x0r;
+ x0r = x1r + x3i;
+ x0i = x1i - x3r;
+ a[j + 6] = wk3r * x0r - wk3i * x0i;
+ a[j + 7] = wk3r * x0i + wk3i * x0r;
+ wk1r = rdft_w[k2 + 2];
+ wk1i = rdft_w[k2 + 3];
+ wk3r = rdft_wk3ri_second[k1 + 0];
+ wk3i = rdft_wk3ri_second[k1 + 1];
+ x0r = a[j + 8] + a[j + 10];
+ x0i = a[j + 9] + a[j + 11];
+ x1r = a[j + 8] - a[j + 10];
+ x1i = a[j + 9] - a[j + 11];
+ x2r = a[j + 12] + a[j + 14];
+ x2i = a[j + 13] + a[j + 15];
+ x3r = a[j + 12] - a[j + 14];
+ x3i = a[j + 13] - a[j + 15];
+ a[j + 8] = x0r + x2r;
+ a[j + 9] = x0i + x2i;
+ x0r -= x2r;
+ x0i -= x2i;
+ a[j + 12] = -wk2i * x0r - wk2r * x0i;
+ a[j + 13] = -wk2i * x0i + wk2r * x0r;
+ x0r = x1r - x3i;
+ x0i = x1i + x3r;
+ a[j + 10] = wk1r * x0r - wk1i * x0i;
+ a[j + 11] = wk1r * x0i + wk1i * x0r;
+ x0r = x1r + x3i;
+ x0i = x1i - x3r;
+ a[j + 14] = wk3r * x0r - wk3i * x0i;
+ a[j + 15] = wk3r * x0i + wk3i * x0r;
+ }
+}
+
+static void cftmdl_128_C(float* a) {
+ const int l = 8;
+ const int n = 128;
+ const int m = 32;
+ int j0, j1, j2, j3, k, k1, k2, m2;
+ float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
+ float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+ for (j0 = 0; j0 < l; j0 += 2) {
+ j1 = j0 + 8;
+ j2 = j0 + 16;
+ j3 = j0 + 24;
+ x0r = a[j0 + 0] + a[j1 + 0];
+ x0i = a[j0 + 1] + a[j1 + 1];
+ x1r = a[j0 + 0] - a[j1 + 0];
+ x1i = a[j0 + 1] - a[j1 + 1];
+ x2r = a[j2 + 0] + a[j3 + 0];
+ x2i = a[j2 + 1] + a[j3 + 1];
+ x3r = a[j2 + 0] - a[j3 + 0];
+ x3i = a[j2 + 1] - a[j3 + 1];
+ a[j0 + 0] = x0r + x2r;
+ a[j0 + 1] = x0i + x2i;
+ a[j2 + 0] = x0r - x2r;
+ a[j2 + 1] = x0i - x2i;
+ a[j1 + 0] = x1r - x3i;
+ a[j1 + 1] = x1i + x3r;
+ a[j3 + 0] = x1r + x3i;
+ a[j3 + 1] = x1i - x3r;
+ }
+ wk1r = rdft_w[2];
+ for (j0 = m; j0 < l + m; j0 += 2) {
+ j1 = j0 + 8;
+ j2 = j0 + 16;
+ j3 = j0 + 24;
+ x0r = a[j0 + 0] + a[j1 + 0];
+ x0i = a[j0 + 1] + a[j1 + 1];
+ x1r = a[j0 + 0] - a[j1 + 0];
+ x1i = a[j0 + 1] - a[j1 + 1];
+ x2r = a[j2 + 0] + a[j3 + 0];
+ x2i = a[j2 + 1] + a[j3 + 1];
+ x3r = a[j2 + 0] - a[j3 + 0];
+ x3i = a[j2 + 1] - a[j3 + 1];
+ a[j0 + 0] = x0r + x2r;
+ a[j0 + 1] = x0i + x2i;
+ a[j2 + 0] = x2i - x0i;
+ a[j2 + 1] = x0r - x2r;
+ x0r = x1r - x3i;
+ x0i = x1i + x3r;
+ a[j1 + 0] = wk1r * (x0r - x0i);
+ a[j1 + 1] = wk1r * (x0r + x0i);
+ x0r = x3i + x1r;
+ x0i = x3r - x1i;
+ a[j3 + 0] = wk1r * (x0i - x0r);
+ a[j3 + 1] = wk1r * (x0i + x0r);
+ }
+ k1 = 0;
+ m2 = 2 * m;
+ for (k = m2; k < n; k += m2) {
+ k1 += 2;
+ k2 = 2 * k1;
+ wk2r = rdft_w[k1 + 0];
+ wk2i = rdft_w[k1 + 1];
+ wk1r = rdft_w[k2 + 0];
+ wk1i = rdft_w[k2 + 1];
+ wk3r = rdft_wk3ri_first[k1 + 0];
+ wk3i = rdft_wk3ri_first[k1 + 1];
+ for (j0 = k; j0 < l + k; j0 += 2) {
+ j1 = j0 + 8;
+ j2 = j0 + 16;
+ j3 = j0 + 24;
+ x0r = a[j0 + 0] + a[j1 + 0];
+ x0i = a[j0 + 1] + a[j1 + 1];
+ x1r = a[j0 + 0] - a[j1 + 0];
+ x1i = a[j0 + 1] - a[j1 + 1];
+ x2r = a[j2 + 0] + a[j3 + 0];
+ x2i = a[j2 + 1] + a[j3 + 1];
+ x3r = a[j2 + 0] - a[j3 + 0];
+ x3i = a[j2 + 1] - a[j3 + 1];
+ a[j0 + 0] = x0r + x2r;
+ a[j0 + 1] = x0i + x2i;
+ x0r -= x2r;
+ x0i -= x2i;
+ a[j2 + 0] = wk2r * x0r - wk2i * x0i;
+ a[j2 + 1] = wk2r * x0i + wk2i * x0r;
+ x0r = x1r - x3i;
+ x0i = x1i + x3r;
+ a[j1 + 0] = wk1r * x0r - wk1i * x0i;
+ a[j1 + 1] = wk1r * x0i + wk1i * x0r;
+ x0r = x1r + x3i;
+ x0i = x1i - x3r;
+ a[j3 + 0] = wk3r * x0r - wk3i * x0i;
+ a[j3 + 1] = wk3r * x0i + wk3i * x0r;
+ }
+ wk1r = rdft_w[k2 + 2];
+ wk1i = rdft_w[k2 + 3];
+ wk3r = rdft_wk3ri_second[k1 + 0];
+ wk3i = rdft_wk3ri_second[k1 + 1];
+ for (j0 = k + m; j0 < l + (k + m); j0 += 2) {
+ j1 = j0 + 8;
+ j2 = j0 + 16;
+ j3 = j0 + 24;
+ x0r = a[j0 + 0] + a[j1 + 0];
+ x0i = a[j0 + 1] + a[j1 + 1];
+ x1r = a[j0 + 0] - a[j1 + 0];
+ x1i = a[j0 + 1] - a[j1 + 1];
+ x2r = a[j2 + 0] + a[j3 + 0];
+ x2i = a[j2 + 1] + a[j3 + 1];
+ x3r = a[j2 + 0] - a[j3 + 0];
+ x3i = a[j2 + 1] - a[j3 + 1];
+ a[j0 + 0] = x0r + x2r;
+ a[j0 + 1] = x0i + x2i;
+ x0r -= x2r;
+ x0i -= x2i;
+ a[j2 + 0] = -wk2i * x0r - wk2r * x0i;
+ a[j2 + 1] = -wk2i * x0i + wk2r * x0r;
+ x0r = x1r - x3i;
+ x0i = x1i + x3r;
+ a[j1 + 0] = wk1r * x0r - wk1i * x0i;
+ a[j1 + 1] = wk1r * x0i + wk1i * x0r;
+ x0r = x1r + x3i;
+ x0i = x1i - x3r;
+ a[j3 + 0] = wk3r * x0r - wk3i * x0i;
+ a[j3 + 1] = wk3r * x0i + wk3i * x0r;
+ }
+ }
+}
+
+static void cftfsub_128_C(float* a) {
+ int j, j1, j2, j3, l;
+ float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+ cft1st_128(a);
+ cftmdl_128(a);
+ l = 32;
+ for (j = 0; j < l; j += 2) {
+ j1 = j + l;
+ j2 = j1 + l;
+ j3 = j2 + l;
+ x0r = a[j] + a[j1];
+ x0i = a[j + 1] + a[j1 + 1];
+ x1r = a[j] - a[j1];
+ x1i = a[j + 1] - a[j1 + 1];
+ x2r = a[j2] + a[j3];
+ x2i = a[j2 + 1] + a[j3 + 1];
+ x3r = a[j2] - a[j3];
+ x3i = a[j2 + 1] - a[j3 + 1];
+ a[j] = x0r + x2r;
+ a[j + 1] = x0i + x2i;
+ a[j2] = x0r - x2r;
+ a[j2 + 1] = x0i - x2i;
+ a[j1] = x1r - x3i;
+ a[j1 + 1] = x1i + x3r;
+ a[j3] = x1r + x3i;
+ a[j3 + 1] = x1i - x3r;
+ }
+}
+
+static void cftbsub_128_C(float* a) {
+ int j, j1, j2, j3, l;
+ float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+ cft1st_128(a);
+ cftmdl_128(a);
+ l = 32;
+
+ for (j = 0; j < l; j += 2) {
+ j1 = j + l;
+ j2 = j1 + l;
+ j3 = j2 + l;
+ x0r = a[j] + a[j1];
+ x0i = -a[j + 1] - a[j1 + 1];
+ x1r = a[j] - a[j1];
+ x1i = -a[j + 1] + a[j1 + 1];
+ x2r = a[j2] + a[j3];
+ x2i = a[j2 + 1] + a[j3 + 1];
+ x3r = a[j2] - a[j3];
+ x3i = a[j2 + 1] - a[j3 + 1];
+ a[j] = x0r + x2r;
+ a[j + 1] = x0i - x2i;
+ a[j2] = x0r - x2r;
+ a[j2 + 1] = x0i + x2i;
+ a[j1] = x1r - x3i;
+ a[j1 + 1] = x1i - x3r;
+ a[j3] = x1r + x3i;
+ a[j3 + 1] = x1i + x3r;
+ }
+}
+
+static void rftfsub_128_C(float* a) {
+ const float* c = rdft_w + 32;
+ int j1, j2, k1, k2;
+ float wkr, wki, xr, xi, yr, yi;
+
+ for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) {
+ k2 = 128 - j2;
+ k1 = 32 - j1;
+ wkr = 0.5f - c[k1];
+ wki = c[j1];
+ xr = a[j2 + 0] - a[k2 + 0];
+ xi = a[j2 + 1] + a[k2 + 1];
+ yr = wkr * xr - wki * xi;
+ yi = wkr * xi + wki * xr;
+ a[j2 + 0] -= yr;
+ a[j2 + 1] -= yi;
+ a[k2 + 0] += yr;
+ a[k2 + 1] -= yi;
+ }
+}
+
+static void rftbsub_128_C(float* a) {
+ const float* c = rdft_w + 32;
+ int j1, j2, k1, k2;
+ float wkr, wki, xr, xi, yr, yi;
+
+ a[1] = -a[1];
+ for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) {
+ k2 = 128 - j2;
+ k1 = 32 - j1;
+ wkr = 0.5f - c[k1];
+ wki = c[j1];
+ xr = a[j2 + 0] - a[k2 + 0];
+ xi = a[j2 + 1] + a[k2 + 1];
+ yr = wkr * xr + wki * xi;
+ yi = wkr * xi - wki * xr;
+ a[j2 + 0] = a[j2 + 0] - yr;
+ a[j2 + 1] = yi - a[j2 + 1];
+ a[k2 + 0] = yr + a[k2 + 0];
+ a[k2 + 1] = yi - a[k2 + 1];
+ }
+ a[65] = -a[65];
+}
+
+void aec_rdft_forward_128(float* a) {
+ float xi;
+ bitrv2_128(a);
+ cftfsub_128(a);
+ rftfsub_128(a);
+ xi = a[0] - a[1];
+ a[0] += a[1];
+ a[1] = xi;
+}
+
+void aec_rdft_inverse_128(float* a) {
+ a[1] = 0.5f * (a[0] - a[1]);
+ a[0] -= a[1];
+ rftbsub_128(a);
+ bitrv2_128(a);
+ cftbsub_128(a);
+}
+
+// code path selection
+RftSub128 cft1st_128;
+RftSub128 cftmdl_128;
+RftSub128 rftfsub_128;
+RftSub128 rftbsub_128;
+RftSub128 cftfsub_128;
+RftSub128 cftbsub_128;
+RftSub128 bitrv2_128;
+
+void aec_rdft_init(void) {
+ cft1st_128 = cft1st_128_C;
+ cftmdl_128 = cftmdl_128_C;
+ rftfsub_128 = rftfsub_128_C;
+ rftbsub_128 = rftbsub_128_C;
+ cftfsub_128 = cftfsub_128_C;
+ cftbsub_128 = cftbsub_128_C;
+ bitrv2_128 = bitrv2_128_C;
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+ if (WebRtc_GetCPUInfo(kSSE2)) {
+ aec_rdft_init_sse2();
+ }
+#endif
+#if defined(MIPS_FPU_LE)
+ aec_rdft_init_mips();
+#endif
+#if defined(WEBRTC_HAS_NEON)
+ aec_rdft_init_neon();
+#elif defined(WEBRTC_DETECT_NEON)
+ if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) {
+ aec_rdft_init_neon();
+ }
+#endif
+}
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft.h b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft.h
new file mode 100644
index 00000000..18eb7a5c
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_
+
+#include "webrtc/modules/audio_processing/aec/aec_common.h"
+
+// These intrinsics were unavailable before VS 2008.
+// TODO(andrew): move to a common file.
+#if defined(_MSC_VER) && _MSC_VER < 1500
+#include <emmintrin.h>
+static __inline __m128 _mm_castsi128_ps(__m128i a) { return *(__m128*)&a; }
+static __inline __m128i _mm_castps_si128(__m128 a) { return *(__m128i*)&a; }
+#endif
+
+// Constants shared by all paths (C, SSE2, NEON).
+extern const float rdft_w[64];
+// Constants used by the C path.
+extern const float rdft_wk3ri_first[16];
+extern const float rdft_wk3ri_second[16];
+// Constants used by SSE2 and NEON but initialized in the C path.
+extern ALIGN16_BEG const float ALIGN16_END rdft_wk1r[32];
+extern ALIGN16_BEG const float ALIGN16_END rdft_wk2r[32];
+extern ALIGN16_BEG const float ALIGN16_END rdft_wk3r[32];
+extern ALIGN16_BEG const float ALIGN16_END rdft_wk1i[32];
+extern ALIGN16_BEG const float ALIGN16_END rdft_wk2i[32];
+extern ALIGN16_BEG const float ALIGN16_END rdft_wk3i[32];
+extern ALIGN16_BEG const float ALIGN16_END cftmdl_wk1r[4];
+
+// code path selection function pointers
+typedef void (*RftSub128)(float* a);
+extern RftSub128 rftfsub_128;
+extern RftSub128 rftbsub_128;
+extern RftSub128 cft1st_128;
+extern RftSub128 cftmdl_128;
+extern RftSub128 cftfsub_128;
+extern RftSub128 cftbsub_128;
+extern RftSub128 bitrv2_128;
+
+// entry points
+void aec_rdft_init(void);
+void aec_rdft_init_sse2(void);
+void aec_rdft_forward_128(float* a);
+void aec_rdft_inverse_128(float* a);
+
+#if defined(MIPS_FPU_LE)
+void aec_rdft_init_mips(void);
+#endif
+#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON)
+void aec_rdft_init_neon(void);
+#endif
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft_mips.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft_mips.c
new file mode 100644
index 00000000..7e64e657
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft_mips.c
@@ -0,0 +1,1187 @@
+/*
+ * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
+#include "webrtc/typedefs.h"
+
+static void bitrv2_128_mips(float* a) {
+ // n is 128
+ float xr, xi, yr, yi;
+
+ xr = a[8];
+ xi = a[9];
+ yr = a[16];
+ yi = a[17];
+ a[8] = yr;
+ a[9] = yi;
+ a[16] = xr;
+ a[17] = xi;
+
+ xr = a[64];
+ xi = a[65];
+ yr = a[2];
+ yi = a[3];
+ a[64] = yr;
+ a[65] = yi;
+ a[2] = xr;
+ a[3] = xi;
+
+ xr = a[72];
+ xi = a[73];
+ yr = a[18];
+ yi = a[19];
+ a[72] = yr;
+ a[73] = yi;
+ a[18] = xr;
+ a[19] = xi;
+
+ xr = a[80];
+ xi = a[81];
+ yr = a[10];
+ yi = a[11];
+ a[80] = yr;
+ a[81] = yi;
+ a[10] = xr;
+ a[11] = xi;
+
+ xr = a[88];
+ xi = a[89];
+ yr = a[26];
+ yi = a[27];
+ a[88] = yr;
+ a[89] = yi;
+ a[26] = xr;
+ a[27] = xi;
+
+ xr = a[74];
+ xi = a[75];
+ yr = a[82];
+ yi = a[83];
+ a[74] = yr;
+ a[75] = yi;
+ a[82] = xr;
+ a[83] = xi;
+
+ xr = a[32];
+ xi = a[33];
+ yr = a[4];
+ yi = a[5];
+ a[32] = yr;
+ a[33] = yi;
+ a[4] = xr;
+ a[5] = xi;
+
+ xr = a[40];
+ xi = a[41];
+ yr = a[20];
+ yi = a[21];
+ a[40] = yr;
+ a[41] = yi;
+ a[20] = xr;
+ a[21] = xi;
+
+ xr = a[48];
+ xi = a[49];
+ yr = a[12];
+ yi = a[13];
+ a[48] = yr;
+ a[49] = yi;
+ a[12] = xr;
+ a[13] = xi;
+
+ xr = a[56];
+ xi = a[57];
+ yr = a[28];
+ yi = a[29];
+ a[56] = yr;
+ a[57] = yi;
+ a[28] = xr;
+ a[29] = xi;
+
+ xr = a[34];
+ xi = a[35];
+ yr = a[68];
+ yi = a[69];
+ a[34] = yr;
+ a[35] = yi;
+ a[68] = xr;
+ a[69] = xi;
+
+ xr = a[42];
+ xi = a[43];
+ yr = a[84];
+ yi = a[85];
+ a[42] = yr;
+ a[43] = yi;
+ a[84] = xr;
+ a[85] = xi;
+
+ xr = a[50];
+ xi = a[51];
+ yr = a[76];
+ yi = a[77];
+ a[50] = yr;
+ a[51] = yi;
+ a[76] = xr;
+ a[77] = xi;
+
+ xr = a[58];
+ xi = a[59];
+ yr = a[92];
+ yi = a[93];
+ a[58] = yr;
+ a[59] = yi;
+ a[92] = xr;
+ a[93] = xi;
+
+ xr = a[44];
+ xi = a[45];
+ yr = a[52];
+ yi = a[53];
+ a[44] = yr;
+ a[45] = yi;
+ a[52] = xr;
+ a[53] = xi;
+
+ xr = a[96];
+ xi = a[97];
+ yr = a[6];
+ yi = a[7];
+ a[96] = yr;
+ a[97] = yi;
+ a[6] = xr;
+ a[7] = xi;
+
+ xr = a[104];
+ xi = a[105];
+ yr = a[22];
+ yi = a[23];
+ a[104] = yr;
+ a[105] = yi;
+ a[22] = xr;
+ a[23] = xi;
+
+ xr = a[112];
+ xi = a[113];
+ yr = a[14];
+ yi = a[15];
+ a[112] = yr;
+ a[113] = yi;
+ a[14] = xr;
+ a[15] = xi;
+
+ xr = a[120];
+ xi = a[121];
+ yr = a[30];
+ yi = a[31];
+ a[120] = yr;
+ a[121] = yi;
+ a[30] = xr;
+ a[31] = xi;
+
+ xr = a[98];
+ xi = a[99];
+ yr = a[70];
+ yi = a[71];
+ a[98] = yr;
+ a[99] = yi;
+ a[70] = xr;
+ a[71] = xi;
+
+ xr = a[106];
+ xi = a[107];
+ yr = a[86];
+ yi = a[87];
+ a[106] = yr;
+ a[107] = yi;
+ a[86] = xr;
+ a[87] = xi;
+
+ xr = a[114];
+ xi = a[115];
+ yr = a[78];
+ yi = a[79];
+ a[114] = yr;
+ a[115] = yi;
+ a[78] = xr;
+ a[79] = xi;
+
+ xr = a[122];
+ xi = a[123];
+ yr = a[94];
+ yi = a[95];
+ a[122] = yr;
+ a[123] = yi;
+ a[94] = xr;
+ a[95] = xi;
+
+ xr = a[100];
+ xi = a[101];
+ yr = a[38];
+ yi = a[39];
+ a[100] = yr;
+ a[101] = yi;
+ a[38] = xr;
+ a[39] = xi;
+
+ xr = a[108];
+ xi = a[109];
+ yr = a[54];
+ yi = a[55];
+ a[108] = yr;
+ a[109] = yi;
+ a[54] = xr;
+ a[55] = xi;
+
+ xr = a[116];
+ xi = a[117];
+ yr = a[46];
+ yi = a[47];
+ a[116] = yr;
+ a[117] = yi;
+ a[46] = xr;
+ a[47] = xi;
+
+ xr = a[124];
+ xi = a[125];
+ yr = a[62];
+ yi = a[63];
+ a[124] = yr;
+ a[125] = yi;
+ a[62] = xr;
+ a[63] = xi;
+
+ xr = a[110];
+ xi = a[111];
+ yr = a[118];
+ yi = a[119];
+ a[110] = yr;
+ a[111] = yi;
+ a[118] = xr;
+ a[119] = xi;
+}
+
+static void cft1st_128_mips(float* a) {
+ float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14;
+ int a_ptr, p1_rdft, p2_rdft, count;
+ const float* first = rdft_wk3ri_first;
+ const float* second = rdft_wk3ri_second;
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ // first 8
+ "lwc1 %[f0], 0(%[a]) \n\t"
+ "lwc1 %[f1], 4(%[a]) \n\t"
+ "lwc1 %[f2], 8(%[a]) \n\t"
+ "lwc1 %[f3], 12(%[a]) \n\t"
+ "lwc1 %[f4], 16(%[a]) \n\t"
+ "lwc1 %[f5], 20(%[a]) \n\t"
+ "lwc1 %[f6], 24(%[a]) \n\t"
+ "lwc1 %[f7], 28(%[a]) \n\t"
+ "add.s %[f8], %[f0], %[f2] \n\t"
+ "sub.s %[f0], %[f0], %[f2] \n\t"
+ "add.s %[f2], %[f4], %[f6] \n\t"
+ "sub.s %[f4], %[f4], %[f6] \n\t"
+ "add.s %[f6], %[f1], %[f3] \n\t"
+ "sub.s %[f1], %[f1], %[f3] \n\t"
+ "add.s %[f3], %[f5], %[f7] \n\t"
+ "sub.s %[f5], %[f5], %[f7] \n\t"
+ "add.s %[f7], %[f8], %[f2] \n\t"
+ "sub.s %[f8], %[f8], %[f2] \n\t"
+ "sub.s %[f2], %[f1], %[f4] \n\t"
+ "add.s %[f1], %[f1], %[f4] \n\t"
+ "add.s %[f4], %[f6], %[f3] \n\t"
+ "sub.s %[f6], %[f6], %[f3] \n\t"
+ "sub.s %[f3], %[f0], %[f5] \n\t"
+ "add.s %[f0], %[f0], %[f5] \n\t"
+ "swc1 %[f7], 0(%[a]) \n\t"
+ "swc1 %[f8], 16(%[a]) \n\t"
+ "swc1 %[f2], 28(%[a]) \n\t"
+ "swc1 %[f1], 12(%[a]) \n\t"
+ "swc1 %[f4], 4(%[a]) \n\t"
+ "swc1 %[f6], 20(%[a]) \n\t"
+ "swc1 %[f3], 8(%[a]) \n\t"
+ "swc1 %[f0], 24(%[a]) \n\t"
+ // second 8
+ "lwc1 %[f0], 32(%[a]) \n\t"
+ "lwc1 %[f1], 36(%[a]) \n\t"
+ "lwc1 %[f2], 40(%[a]) \n\t"
+ "lwc1 %[f3], 44(%[a]) \n\t"
+ "lwc1 %[f4], 48(%[a]) \n\t"
+ "lwc1 %[f5], 52(%[a]) \n\t"
+ "lwc1 %[f6], 56(%[a]) \n\t"
+ "lwc1 %[f7], 60(%[a]) \n\t"
+ "add.s %[f8], %[f4], %[f6] \n\t"
+ "sub.s %[f4], %[f4], %[f6] \n\t"
+ "add.s %[f6], %[f1], %[f3] \n\t"
+ "sub.s %[f1], %[f1], %[f3] \n\t"
+ "add.s %[f3], %[f0], %[f2] \n\t"
+ "sub.s %[f0], %[f0], %[f2] \n\t"
+ "add.s %[f2], %[f5], %[f7] \n\t"
+ "sub.s %[f5], %[f5], %[f7] \n\t"
+ "add.s %[f7], %[f4], %[f1] \n\t"
+ "sub.s %[f4], %[f4], %[f1] \n\t"
+ "add.s %[f1], %[f3], %[f8] \n\t"
+ "sub.s %[f3], %[f3], %[f8] \n\t"
+ "sub.s %[f8], %[f0], %[f5] \n\t"
+ "add.s %[f0], %[f0], %[f5] \n\t"
+ "add.s %[f5], %[f6], %[f2] \n\t"
+ "sub.s %[f6], %[f2], %[f6] \n\t"
+ "lwc1 %[f9], 8(%[rdft_w]) \n\t"
+ "sub.s %[f2], %[f8], %[f7] \n\t"
+ "add.s %[f8], %[f8], %[f7] \n\t"
+ "sub.s %[f7], %[f4], %[f0] \n\t"
+ "add.s %[f4], %[f4], %[f0] \n\t"
+ // prepare for loop
+ "addiu %[a_ptr], %[a], 64 \n\t"
+ "addiu %[p1_rdft], %[rdft_w], 8 \n\t"
+ "addiu %[p2_rdft], %[rdft_w], 16 \n\t"
+ "addiu %[count], $zero, 7 \n\t"
+ // finish second 8
+ "mul.s %[f2], %[f9], %[f2] \n\t"
+ "mul.s %[f8], %[f9], %[f8] \n\t"
+ "mul.s %[f7], %[f9], %[f7] \n\t"
+ "mul.s %[f4], %[f9], %[f4] \n\t"
+ "swc1 %[f1], 32(%[a]) \n\t"
+ "swc1 %[f3], 52(%[a]) \n\t"
+ "swc1 %[f5], 36(%[a]) \n\t"
+ "swc1 %[f6], 48(%[a]) \n\t"
+ "swc1 %[f2], 40(%[a]) \n\t"
+ "swc1 %[f8], 44(%[a]) \n\t"
+ "swc1 %[f7], 56(%[a]) \n\t"
+ "swc1 %[f4], 60(%[a]) \n\t"
+ // loop
+ "1: \n\t"
+ "lwc1 %[f0], 0(%[a_ptr]) \n\t"
+ "lwc1 %[f1], 4(%[a_ptr]) \n\t"
+ "lwc1 %[f2], 8(%[a_ptr]) \n\t"
+ "lwc1 %[f3], 12(%[a_ptr]) \n\t"
+ "lwc1 %[f4], 16(%[a_ptr]) \n\t"
+ "lwc1 %[f5], 20(%[a_ptr]) \n\t"
+ "lwc1 %[f6], 24(%[a_ptr]) \n\t"
+ "lwc1 %[f7], 28(%[a_ptr]) \n\t"
+ "add.s %[f8], %[f0], %[f2] \n\t"
+ "sub.s %[f0], %[f0], %[f2] \n\t"
+ "add.s %[f2], %[f4], %[f6] \n\t"
+ "sub.s %[f4], %[f4], %[f6] \n\t"
+ "add.s %[f6], %[f1], %[f3] \n\t"
+ "sub.s %[f1], %[f1], %[f3] \n\t"
+ "add.s %[f3], %[f5], %[f7] \n\t"
+ "sub.s %[f5], %[f5], %[f7] \n\t"
+ "lwc1 %[f10], 4(%[p1_rdft]) \n\t"
+ "lwc1 %[f11], 0(%[p2_rdft]) \n\t"
+ "lwc1 %[f12], 4(%[p2_rdft]) \n\t"
+ "lwc1 %[f13], 8(%[first]) \n\t"
+ "lwc1 %[f14], 12(%[first]) \n\t"
+ "add.s %[f7], %[f8], %[f2] \n\t"
+ "sub.s %[f8], %[f8], %[f2] \n\t"
+ "add.s %[f2], %[f6], %[f3] \n\t"
+ "sub.s %[f6], %[f6], %[f3] \n\t"
+ "add.s %[f3], %[f0], %[f5] \n\t"
+ "sub.s %[f0], %[f0], %[f5] \n\t"
+ "add.s %[f5], %[f1], %[f4] \n\t"
+ "sub.s %[f1], %[f1], %[f4] \n\t"
+ "swc1 %[f7], 0(%[a_ptr]) \n\t"
+ "swc1 %[f2], 4(%[a_ptr]) \n\t"
+ "mul.s %[f4], %[f9], %[f8] \n\t"
+#if defined(MIPS32_R2_LE)
+ "mul.s %[f8], %[f10], %[f8] \n\t"
+ "mul.s %[f7], %[f11], %[f0] \n\t"
+ "mul.s %[f0], %[f12], %[f0] \n\t"
+ "mul.s %[f2], %[f13], %[f3] \n\t"
+ "mul.s %[f3], %[f14], %[f3] \n\t"
+ "nmsub.s %[f4], %[f4], %[f10], %[f6] \n\t"
+ "madd.s %[f8], %[f8], %[f9], %[f6] \n\t"
+ "nmsub.s %[f7], %[f7], %[f12], %[f5] \n\t"
+ "madd.s %[f0], %[f0], %[f11], %[f5] \n\t"
+ "nmsub.s %[f2], %[f2], %[f14], %[f1] \n\t"
+ "madd.s %[f3], %[f3], %[f13], %[f1] \n\t"
+#else
+ "mul.s %[f7], %[f10], %[f6] \n\t"
+ "mul.s %[f6], %[f9], %[f6] \n\t"
+ "mul.s %[f8], %[f10], %[f8] \n\t"
+ "mul.s %[f2], %[f11], %[f0] \n\t"
+ "mul.s %[f11], %[f11], %[f5] \n\t"
+ "mul.s %[f5], %[f12], %[f5] \n\t"
+ "mul.s %[f0], %[f12], %[f0] \n\t"
+ "mul.s %[f12], %[f13], %[f3] \n\t"
+ "mul.s %[f13], %[f13], %[f1] \n\t"
+ "mul.s %[f1], %[f14], %[f1] \n\t"
+ "mul.s %[f3], %[f14], %[f3] \n\t"
+ "sub.s %[f4], %[f4], %[f7] \n\t"
+ "add.s %[f8], %[f6], %[f8] \n\t"
+ "sub.s %[f7], %[f2], %[f5] \n\t"
+ "add.s %[f0], %[f11], %[f0] \n\t"
+ "sub.s %[f2], %[f12], %[f1] \n\t"
+ "add.s %[f3], %[f13], %[f3] \n\t"
+#endif
+ "swc1 %[f4], 16(%[a_ptr]) \n\t"
+ "swc1 %[f8], 20(%[a_ptr]) \n\t"
+ "swc1 %[f7], 8(%[a_ptr]) \n\t"
+ "swc1 %[f0], 12(%[a_ptr]) \n\t"
+ "swc1 %[f2], 24(%[a_ptr]) \n\t"
+ "swc1 %[f3], 28(%[a_ptr]) \n\t"
+ "lwc1 %[f0], 32(%[a_ptr]) \n\t"
+ "lwc1 %[f1], 36(%[a_ptr]) \n\t"
+ "lwc1 %[f2], 40(%[a_ptr]) \n\t"
+ "lwc1 %[f3], 44(%[a_ptr]) \n\t"
+ "lwc1 %[f4], 48(%[a_ptr]) \n\t"
+ "lwc1 %[f5], 52(%[a_ptr]) \n\t"
+ "lwc1 %[f6], 56(%[a_ptr]) \n\t"
+ "lwc1 %[f7], 60(%[a_ptr]) \n\t"
+ "add.s %[f8], %[f0], %[f2] \n\t"
+ "sub.s %[f0], %[f0], %[f2] \n\t"
+ "add.s %[f2], %[f4], %[f6] \n\t"
+ "sub.s %[f4], %[f4], %[f6] \n\t"
+ "add.s %[f6], %[f1], %[f3] \n\t"
+ "sub.s %[f1], %[f1], %[f3] \n\t"
+ "add.s %[f3], %[f5], %[f7] \n\t"
+ "sub.s %[f5], %[f5], %[f7] \n\t"
+ "lwc1 %[f11], 8(%[p2_rdft]) \n\t"
+ "lwc1 %[f12], 12(%[p2_rdft]) \n\t"
+ "lwc1 %[f13], 8(%[second]) \n\t"
+ "lwc1 %[f14], 12(%[second]) \n\t"
+ "add.s %[f7], %[f8], %[f2] \n\t"
+ "sub.s %[f8], %[f2], %[f8] \n\t"
+ "add.s %[f2], %[f6], %[f3] \n\t"
+ "sub.s %[f6], %[f3], %[f6] \n\t"
+ "add.s %[f3], %[f0], %[f5] \n\t"
+ "sub.s %[f0], %[f0], %[f5] \n\t"
+ "add.s %[f5], %[f1], %[f4] \n\t"
+ "sub.s %[f1], %[f1], %[f4] \n\t"
+ "swc1 %[f7], 32(%[a_ptr]) \n\t"
+ "swc1 %[f2], 36(%[a_ptr]) \n\t"
+ "mul.s %[f4], %[f10], %[f8] \n\t"
+#if defined(MIPS32_R2_LE)
+ "mul.s %[f10], %[f10], %[f6] \n\t"
+ "mul.s %[f7], %[f11], %[f0] \n\t"
+ "mul.s %[f11], %[f11], %[f5] \n\t"
+ "mul.s %[f2], %[f13], %[f3] \n\t"
+ "mul.s %[f13], %[f13], %[f1] \n\t"
+ "madd.s %[f4], %[f4], %[f9], %[f6] \n\t"
+ "nmsub.s %[f10], %[f10], %[f9], %[f8] \n\t"
+ "nmsub.s %[f7], %[f7], %[f12], %[f5] \n\t"
+ "madd.s %[f11], %[f11], %[f12], %[f0] \n\t"
+ "nmsub.s %[f2], %[f2], %[f14], %[f1] \n\t"
+ "madd.s %[f13], %[f13], %[f14], %[f3] \n\t"
+#else
+ "mul.s %[f2], %[f9], %[f6] \n\t"
+ "mul.s %[f10], %[f10], %[f6] \n\t"
+ "mul.s %[f9], %[f9], %[f8] \n\t"
+ "mul.s %[f7], %[f11], %[f0] \n\t"
+ "mul.s %[f8], %[f12], %[f5] \n\t"
+ "mul.s %[f11], %[f11], %[f5] \n\t"
+ "mul.s %[f12], %[f12], %[f0] \n\t"
+ "mul.s %[f5], %[f13], %[f3] \n\t"
+ "mul.s %[f0], %[f14], %[f1] \n\t"
+ "mul.s %[f13], %[f13], %[f1] \n\t"
+ "mul.s %[f14], %[f14], %[f3] \n\t"
+ "add.s %[f4], %[f4], %[f2] \n\t"
+ "sub.s %[f10], %[f10], %[f9] \n\t"
+ "sub.s %[f7], %[f7], %[f8] \n\t"
+ "add.s %[f11], %[f11], %[f12] \n\t"
+ "sub.s %[f2], %[f5], %[f0] \n\t"
+ "add.s %[f13], %[f13], %[f14] \n\t"
+#endif
+ "swc1 %[f4], 48(%[a_ptr]) \n\t"
+ "swc1 %[f10], 52(%[a_ptr]) \n\t"
+ "swc1 %[f7], 40(%[a_ptr]) \n\t"
+ "swc1 %[f11], 44(%[a_ptr]) \n\t"
+ "swc1 %[f2], 56(%[a_ptr]) \n\t"
+ "swc1 %[f13], 60(%[a_ptr]) \n\t"
+ "addiu %[count], %[count], -1 \n\t"
+ "lwc1 %[f9], 8(%[p1_rdft]) \n\t"
+ "addiu %[a_ptr], %[a_ptr], 64 \n\t"
+ "addiu %[p1_rdft], %[p1_rdft], 8 \n\t"
+ "addiu %[p2_rdft], %[p2_rdft], 16 \n\t"
+ "addiu %[first], %[first], 8 \n\t"
+ "bgtz %[count], 1b \n\t"
+ " addiu %[second], %[second], 8 \n\t"
+ ".set pop \n\t"
+ : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3),
+ [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7),
+ [f8] "=&f" (f8), [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11),
+ [f12] "=&f" (f12), [f13] "=&f" (f13), [f14] "=&f" (f14),
+ [a_ptr] "=&r" (a_ptr), [p1_rdft] "=&r" (p1_rdft), [first] "+r" (first),
+ [p2_rdft] "=&r" (p2_rdft), [count] "=&r" (count), [second] "+r" (second)
+ : [a] "r" (a), [rdft_w] "r" (rdft_w)
+ : "memory"
+ );
+}
+
+static void cftmdl_128_mips(float* a) {
+ float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14;
+ int tmp_a, count;
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "addiu %[tmp_a], %[a], 0 \n\t"
+ "addiu %[count], $zero, 4 \n\t"
+ "1: \n\t"
+ "addiu %[count], %[count], -1 \n\t"
+ "lwc1 %[f0], 0(%[tmp_a]) \n\t"
+ "lwc1 %[f2], 32(%[tmp_a]) \n\t"
+ "lwc1 %[f4], 64(%[tmp_a]) \n\t"
+ "lwc1 %[f6], 96(%[tmp_a]) \n\t"
+ "lwc1 %[f1], 4(%[tmp_a]) \n\t"
+ "lwc1 %[f3], 36(%[tmp_a]) \n\t"
+ "lwc1 %[f5], 68(%[tmp_a]) \n\t"
+ "lwc1 %[f7], 100(%[tmp_a]) \n\t"
+ "add.s %[f8], %[f0], %[f2] \n\t"
+ "sub.s %[f0], %[f0], %[f2] \n\t"
+ "add.s %[f2], %[f4], %[f6] \n\t"
+ "sub.s %[f4], %[f4], %[f6] \n\t"
+ "add.s %[f6], %[f1], %[f3] \n\t"
+ "sub.s %[f1], %[f1], %[f3] \n\t"
+ "add.s %[f3], %[f5], %[f7] \n\t"
+ "sub.s %[f5], %[f5], %[f7] \n\t"
+ "add.s %[f7], %[f8], %[f2] \n\t"
+ "sub.s %[f8], %[f8], %[f2] \n\t"
+ "add.s %[f2], %[f1], %[f4] \n\t"
+ "sub.s %[f1], %[f1], %[f4] \n\t"
+ "add.s %[f4], %[f6], %[f3] \n\t"
+ "sub.s %[f6], %[f6], %[f3] \n\t"
+ "sub.s %[f3], %[f0], %[f5] \n\t"
+ "add.s %[f0], %[f0], %[f5] \n\t"
+ "swc1 %[f7], 0(%[tmp_a]) \n\t"
+ "swc1 %[f8], 64(%[tmp_a]) \n\t"
+ "swc1 %[f2], 36(%[tmp_a]) \n\t"
+ "swc1 %[f1], 100(%[tmp_a]) \n\t"
+ "swc1 %[f4], 4(%[tmp_a]) \n\t"
+ "swc1 %[f6], 68(%[tmp_a]) \n\t"
+ "swc1 %[f3], 32(%[tmp_a]) \n\t"
+ "swc1 %[f0], 96(%[tmp_a]) \n\t"
+ "bgtz %[count], 1b \n\t"
+ " addiu %[tmp_a], %[tmp_a], 8 \n\t"
+ ".set pop \n\t"
+ : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3),
+ [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7),
+ [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count)
+ : [a] "r" (a)
+ : "memory"
+ );
+ f9 = rdft_w[2];
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "addiu %[tmp_a], %[a], 128 \n\t"
+ "addiu %[count], $zero, 4 \n\t"
+ "1: \n\t"
+ "addiu %[count], %[count], -1 \n\t"
+ "lwc1 %[f0], 0(%[tmp_a]) \n\t"
+ "lwc1 %[f2], 32(%[tmp_a]) \n\t"
+ "lwc1 %[f5], 68(%[tmp_a]) \n\t"
+ "lwc1 %[f7], 100(%[tmp_a]) \n\t"
+ "lwc1 %[f1], 4(%[tmp_a]) \n\t"
+ "lwc1 %[f3], 36(%[tmp_a]) \n\t"
+ "lwc1 %[f4], 64(%[tmp_a]) \n\t"
+ "lwc1 %[f6], 96(%[tmp_a]) \n\t"
+ "sub.s %[f8], %[f0], %[f2] \n\t"
+ "add.s %[f0], %[f0], %[f2] \n\t"
+ "sub.s %[f2], %[f5], %[f7] \n\t"
+ "add.s %[f5], %[f5], %[f7] \n\t"
+ "sub.s %[f7], %[f1], %[f3] \n\t"
+ "add.s %[f1], %[f1], %[f3] \n\t"
+ "sub.s %[f3], %[f4], %[f6] \n\t"
+ "add.s %[f4], %[f4], %[f6] \n\t"
+ "sub.s %[f6], %[f8], %[f2] \n\t"
+ "add.s %[f8], %[f8], %[f2] \n\t"
+ "add.s %[f2], %[f5], %[f1] \n\t"
+ "sub.s %[f5], %[f5], %[f1] \n\t"
+ "add.s %[f1], %[f3], %[f7] \n\t"
+ "sub.s %[f3], %[f3], %[f7] \n\t"
+ "add.s %[f7], %[f0], %[f4] \n\t"
+ "sub.s %[f0], %[f0], %[f4] \n\t"
+ "sub.s %[f4], %[f6], %[f1] \n\t"
+ "add.s %[f6], %[f6], %[f1] \n\t"
+ "sub.s %[f1], %[f3], %[f8] \n\t"
+ "add.s %[f3], %[f3], %[f8] \n\t"
+ "mul.s %[f4], %[f4], %[f9] \n\t"
+ "mul.s %[f6], %[f6], %[f9] \n\t"
+ "mul.s %[f1], %[f1], %[f9] \n\t"
+ "mul.s %[f3], %[f3], %[f9] \n\t"
+ "swc1 %[f7], 0(%[tmp_a]) \n\t"
+ "swc1 %[f2], 4(%[tmp_a]) \n\t"
+ "swc1 %[f5], 64(%[tmp_a]) \n\t"
+ "swc1 %[f0], 68(%[tmp_a]) \n\t"
+ "swc1 %[f4], 32(%[tmp_a]) \n\t"
+ "swc1 %[f6], 36(%[tmp_a]) \n\t"
+ "swc1 %[f1], 96(%[tmp_a]) \n\t"
+ "swc1 %[f3], 100(%[tmp_a]) \n\t"
+ "bgtz %[count], 1b \n\t"
+ " addiu %[tmp_a], %[tmp_a], 8 \n\t"
+ ".set pop \n\t"
+ : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3),
+ [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7),
+ [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count)
+ : [a] "r" (a), [f9] "f" (f9)
+ : "memory"
+ );
+ f10 = rdft_w[3];
+ f11 = rdft_w[4];
+ f12 = rdft_w[5];
+ f13 = rdft_wk3ri_first[2];
+ f14 = rdft_wk3ri_first[3];
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "addiu %[tmp_a], %[a], 256 \n\t"
+ "addiu %[count], $zero, 4 \n\t"
+ "1: \n\t"
+ "addiu %[count], %[count], -1 \n\t"
+ "lwc1 %[f0], 0(%[tmp_a]) \n\t"
+ "lwc1 %[f2], 32(%[tmp_a]) \n\t"
+ "lwc1 %[f4], 64(%[tmp_a]) \n\t"
+ "lwc1 %[f6], 96(%[tmp_a]) \n\t"
+ "lwc1 %[f1], 4(%[tmp_a]) \n\t"
+ "lwc1 %[f3], 36(%[tmp_a]) \n\t"
+ "lwc1 %[f5], 68(%[tmp_a]) \n\t"
+ "lwc1 %[f7], 100(%[tmp_a]) \n\t"
+ "add.s %[f8], %[f0], %[f2] \n\t"
+ "sub.s %[f0], %[f0], %[f2] \n\t"
+ "add.s %[f2], %[f4], %[f6] \n\t"
+ "sub.s %[f4], %[f4], %[f6] \n\t"
+ "add.s %[f6], %[f1], %[f3] \n\t"
+ "sub.s %[f1], %[f1], %[f3] \n\t"
+ "add.s %[f3], %[f5], %[f7] \n\t"
+ "sub.s %[f5], %[f5], %[f7] \n\t"
+ "sub.s %[f7], %[f8], %[f2] \n\t"
+ "add.s %[f8], %[f8], %[f2] \n\t"
+ "add.s %[f2], %[f1], %[f4] \n\t"
+ "sub.s %[f1], %[f1], %[f4] \n\t"
+ "sub.s %[f4], %[f6], %[f3] \n\t"
+ "add.s %[f6], %[f6], %[f3] \n\t"
+ "sub.s %[f3], %[f0], %[f5] \n\t"
+ "add.s %[f0], %[f0], %[f5] \n\t"
+ "swc1 %[f8], 0(%[tmp_a]) \n\t"
+ "swc1 %[f6], 4(%[tmp_a]) \n\t"
+ "mul.s %[f5], %[f9], %[f7] \n\t"
+#if defined(MIPS32_R2_LE)
+ "mul.s %[f7], %[f10], %[f7] \n\t"
+ "mul.s %[f8], %[f11], %[f3] \n\t"
+ "mul.s %[f3], %[f12], %[f3] \n\t"
+ "mul.s %[f6], %[f13], %[f0] \n\t"
+ "mul.s %[f0], %[f14], %[f0] \n\t"
+ "nmsub.s %[f5], %[f5], %[f10], %[f4] \n\t"
+ "madd.s %[f7], %[f7], %[f9], %[f4] \n\t"
+ "nmsub.s %[f8], %[f8], %[f12], %[f2] \n\t"
+ "madd.s %[f3], %[f3], %[f11], %[f2] \n\t"
+ "nmsub.s %[f6], %[f6], %[f14], %[f1] \n\t"
+ "madd.s %[f0], %[f0], %[f13], %[f1] \n\t"
+ "swc1 %[f5], 64(%[tmp_a]) \n\t"
+ "swc1 %[f7], 68(%[tmp_a]) \n\t"
+#else
+ "mul.s %[f8], %[f10], %[f4] \n\t"
+ "mul.s %[f4], %[f9], %[f4] \n\t"
+ "mul.s %[f7], %[f10], %[f7] \n\t"
+ "mul.s %[f6], %[f11], %[f3] \n\t"
+ "mul.s %[f3], %[f12], %[f3] \n\t"
+ "sub.s %[f5], %[f5], %[f8] \n\t"
+ "mul.s %[f8], %[f12], %[f2] \n\t"
+ "mul.s %[f2], %[f11], %[f2] \n\t"
+ "add.s %[f7], %[f4], %[f7] \n\t"
+ "mul.s %[f4], %[f13], %[f0] \n\t"
+ "mul.s %[f0], %[f14], %[f0] \n\t"
+ "sub.s %[f8], %[f6], %[f8] \n\t"
+ "mul.s %[f6], %[f14], %[f1] \n\t"
+ "mul.s %[f1], %[f13], %[f1] \n\t"
+ "add.s %[f3], %[f2], %[f3] \n\t"
+ "swc1 %[f5], 64(%[tmp_a]) \n\t"
+ "swc1 %[f7], 68(%[tmp_a]) \n\t"
+ "sub.s %[f6], %[f4], %[f6] \n\t"
+ "add.s %[f0], %[f1], %[f0] \n\t"
+#endif
+ "swc1 %[f8], 32(%[tmp_a]) \n\t"
+ "swc1 %[f3], 36(%[tmp_a]) \n\t"
+ "swc1 %[f6], 96(%[tmp_a]) \n\t"
+ "swc1 %[f0], 100(%[tmp_a]) \n\t"
+ "bgtz %[count], 1b \n\t"
+ " addiu %[tmp_a], %[tmp_a], 8 \n\t"
+ ".set pop \n\t"
+ : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3),
+ [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7),
+ [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count)
+ : [a] "r" (a), [f9] "f" (f9), [f10] "f" (f10), [f11] "f" (f11),
+ [f12] "f" (f12), [f13] "f" (f13), [f14] "f" (f14)
+ : "memory"
+ );
+ f11 = rdft_w[6];
+ f12 = rdft_w[7];
+ f13 = rdft_wk3ri_second[2];
+ f14 = rdft_wk3ri_second[3];
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "addiu %[tmp_a], %[a], 384 \n\t"
+ "addiu %[count], $zero, 4 \n\t"
+ "1: \n\t"
+ "addiu %[count], %[count], -1 \n\t"
+ "lwc1 %[f0], 0(%[tmp_a]) \n\t"
+ "lwc1 %[f1], 4(%[tmp_a]) \n\t"
+ "lwc1 %[f2], 32(%[tmp_a]) \n\t"
+ "lwc1 %[f3], 36(%[tmp_a]) \n\t"
+ "lwc1 %[f4], 64(%[tmp_a]) \n\t"
+ "lwc1 %[f5], 68(%[tmp_a]) \n\t"
+ "lwc1 %[f6], 96(%[tmp_a]) \n\t"
+ "lwc1 %[f7], 100(%[tmp_a]) \n\t"
+ "add.s %[f8], %[f0], %[f2] \n\t"
+ "sub.s %[f0], %[f0], %[f2] \n\t"
+ "add.s %[f2], %[f4], %[f6] \n\t"
+ "sub.s %[f4], %[f4], %[f6] \n\t"
+ "add.s %[f6], %[f1], %[f3] \n\t"
+ "sub.s %[f1], %[f1], %[f3] \n\t"
+ "add.s %[f3], %[f5], %[f7] \n\t"
+ "sub.s %[f5], %[f5], %[f7] \n\t"
+ "sub.s %[f7], %[f2], %[f8] \n\t"
+ "add.s %[f2], %[f2], %[f8] \n\t"
+ "add.s %[f8], %[f1], %[f4] \n\t"
+ "sub.s %[f1], %[f1], %[f4] \n\t"
+ "sub.s %[f4], %[f3], %[f6] \n\t"
+ "add.s %[f3], %[f3], %[f6] \n\t"
+ "sub.s %[f6], %[f0], %[f5] \n\t"
+ "add.s %[f0], %[f0], %[f5] \n\t"
+ "swc1 %[f2], 0(%[tmp_a]) \n\t"
+ "swc1 %[f3], 4(%[tmp_a]) \n\t"
+ "mul.s %[f5], %[f10], %[f7] \n\t"
+#if defined(MIPS32_R2_LE)
+ "mul.s %[f7], %[f9], %[f7] \n\t"
+ "mul.s %[f2], %[f12], %[f8] \n\t"
+ "mul.s %[f8], %[f11], %[f8] \n\t"
+ "mul.s %[f3], %[f14], %[f1] \n\t"
+ "mul.s %[f1], %[f13], %[f1] \n\t"
+ "madd.s %[f5], %[f5], %[f9], %[f4] \n\t"
+ "msub.s %[f7], %[f7], %[f10], %[f4] \n\t"
+ "msub.s %[f2], %[f2], %[f11], %[f6] \n\t"
+ "madd.s %[f8], %[f8], %[f12], %[f6] \n\t"
+ "msub.s %[f3], %[f3], %[f13], %[f0] \n\t"
+ "madd.s %[f1], %[f1], %[f14], %[f0] \n\t"
+ "swc1 %[f5], 64(%[tmp_a]) \n\t"
+ "swc1 %[f7], 68(%[tmp_a]) \n\t"
+#else
+ "mul.s %[f2], %[f9], %[f4] \n\t"
+ "mul.s %[f4], %[f10], %[f4] \n\t"
+ "mul.s %[f7], %[f9], %[f7] \n\t"
+ "mul.s %[f3], %[f11], %[f6] \n\t"
+ "mul.s %[f6], %[f12], %[f6] \n\t"
+ "add.s %[f5], %[f5], %[f2] \n\t"
+ "sub.s %[f7], %[f4], %[f7] \n\t"
+ "mul.s %[f2], %[f12], %[f8] \n\t"
+ "mul.s %[f8], %[f11], %[f8] \n\t"
+ "mul.s %[f4], %[f14], %[f1] \n\t"
+ "mul.s %[f1], %[f13], %[f1] \n\t"
+ "sub.s %[f2], %[f3], %[f2] \n\t"
+ "mul.s %[f3], %[f13], %[f0] \n\t"
+ "mul.s %[f0], %[f14], %[f0] \n\t"
+ "add.s %[f8], %[f8], %[f6] \n\t"
+ "swc1 %[f5], 64(%[tmp_a]) \n\t"
+ "swc1 %[f7], 68(%[tmp_a]) \n\t"
+ "sub.s %[f3], %[f3], %[f4] \n\t"
+ "add.s %[f1], %[f1], %[f0] \n\t"
+#endif
+ "swc1 %[f2], 32(%[tmp_a]) \n\t"
+ "swc1 %[f8], 36(%[tmp_a]) \n\t"
+ "swc1 %[f3], 96(%[tmp_a]) \n\t"
+ "swc1 %[f1], 100(%[tmp_a]) \n\t"
+ "bgtz %[count], 1b \n\t"
+ " addiu %[tmp_a], %[tmp_a], 8 \n\t"
+ ".set pop \n\t"
+ : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3),
+ [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7),
+ [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count)
+ : [a] "r" (a), [f9] "f" (f9), [f10] "f" (f10), [f11] "f" (f11),
+ [f12] "f" (f12), [f13] "f" (f13), [f14] "f" (f14)
+ : "memory"
+ );
+}
+
+static void cftfsub_128_mips(float* a) {
+ float f0, f1, f2, f3, f4, f5, f6, f7, f8;
+ int tmp_a, count;
+
+ cft1st_128(a);
+ cftmdl_128(a);
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "addiu %[tmp_a], %[a], 0 \n\t"
+ "addiu %[count], $zero, 16 \n\t"
+ "1: \n\t"
+ "addiu %[count], %[count], -1 \n\t"
+ "lwc1 %[f0], 0(%[tmp_a]) \n\t"
+ "lwc1 %[f2], 128(%[tmp_a]) \n\t"
+ "lwc1 %[f4], 256(%[tmp_a]) \n\t"
+ "lwc1 %[f6], 384(%[tmp_a]) \n\t"
+ "lwc1 %[f1], 4(%[tmp_a]) \n\t"
+ "lwc1 %[f3], 132(%[tmp_a]) \n\t"
+ "lwc1 %[f5], 260(%[tmp_a]) \n\t"
+ "lwc1 %[f7], 388(%[tmp_a]) \n\t"
+ "add.s %[f8], %[f0], %[f2] \n\t"
+ "sub.s %[f0], %[f0], %[f2] \n\t"
+ "add.s %[f2], %[f4], %[f6] \n\t"
+ "sub.s %[f4], %[f4], %[f6] \n\t"
+ "add.s %[f6], %[f1], %[f3] \n\t"
+ "sub.s %[f1], %[f1], %[f3] \n\t"
+ "add.s %[f3], %[f5], %[f7] \n\t"
+ "sub.s %[f5], %[f5], %[f7] \n\t"
+ "add.s %[f7], %[f8], %[f2] \n\t"
+ "sub.s %[f8], %[f8], %[f2] \n\t"
+ "add.s %[f2], %[f1], %[f4] \n\t"
+ "sub.s %[f1], %[f1], %[f4] \n\t"
+ "add.s %[f4], %[f6], %[f3] \n\t"
+ "sub.s %[f6], %[f6], %[f3] \n\t"
+ "sub.s %[f3], %[f0], %[f5] \n\t"
+ "add.s %[f0], %[f0], %[f5] \n\t"
+ "swc1 %[f7], 0(%[tmp_a]) \n\t"
+ "swc1 %[f8], 256(%[tmp_a]) \n\t"
+ "swc1 %[f2], 132(%[tmp_a]) \n\t"
+ "swc1 %[f1], 388(%[tmp_a]) \n\t"
+ "swc1 %[f4], 4(%[tmp_a]) \n\t"
+ "swc1 %[f6], 260(%[tmp_a]) \n\t"
+ "swc1 %[f3], 128(%[tmp_a]) \n\t"
+ "swc1 %[f0], 384(%[tmp_a]) \n\t"
+ "bgtz %[count], 1b \n\t"
+ " addiu %[tmp_a], %[tmp_a], 8 \n\t"
+ ".set pop \n\t"
+ : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3),
+ [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7),
+ [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a),
+ [count] "=&r" (count)
+ : [a] "r" (a)
+ : "memory"
+ );
+}
+
+static void cftbsub_128_mips(float* a) {
+ float f0, f1, f2, f3, f4, f5, f6, f7, f8;
+ int tmp_a, count;
+
+ cft1st_128(a);
+ cftmdl_128(a);
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "addiu %[tmp_a], %[a], 0 \n\t"
+ "addiu %[count], $zero, 16 \n\t"
+ "1: \n\t"
+ "addiu %[count], %[count], -1 \n\t"
+ "lwc1 %[f0], 0(%[tmp_a]) \n\t"
+ "lwc1 %[f2], 128(%[tmp_a]) \n\t"
+ "lwc1 %[f4], 256(%[tmp_a]) \n\t"
+ "lwc1 %[f6], 384(%[tmp_a]) \n\t"
+ "lwc1 %[f1], 4(%[tmp_a]) \n\t"
+ "lwc1 %[f3], 132(%[tmp_a]) \n\t"
+ "lwc1 %[f5], 260(%[tmp_a]) \n\t"
+ "lwc1 %[f7], 388(%[tmp_a]) \n\t"
+ "add.s %[f8], %[f0], %[f2] \n\t"
+ "sub.s %[f0], %[f0], %[f2] \n\t"
+ "add.s %[f2], %[f4], %[f6] \n\t"
+ "sub.s %[f4], %[f4], %[f6] \n\t"
+ "add.s %[f6], %[f1], %[f3] \n\t"
+ "sub.s %[f1], %[f3], %[f1] \n\t"
+ "add.s %[f3], %[f5], %[f7] \n\t"
+ "sub.s %[f5], %[f5], %[f7] \n\t"
+ "add.s %[f7], %[f8], %[f2] \n\t"
+ "sub.s %[f8], %[f8], %[f2] \n\t"
+ "sub.s %[f2], %[f1], %[f4] \n\t"
+ "add.s %[f1], %[f1], %[f4] \n\t"
+ "add.s %[f4], %[f3], %[f6] \n\t"
+ "sub.s %[f6], %[f3], %[f6] \n\t"
+ "sub.s %[f3], %[f0], %[f5] \n\t"
+ "add.s %[f0], %[f0], %[f5] \n\t"
+ "neg.s %[f4], %[f4] \n\t"
+ "swc1 %[f7], 0(%[tmp_a]) \n\t"
+ "swc1 %[f8], 256(%[tmp_a]) \n\t"
+ "swc1 %[f2], 132(%[tmp_a]) \n\t"
+ "swc1 %[f1], 388(%[tmp_a]) \n\t"
+ "swc1 %[f6], 260(%[tmp_a]) \n\t"
+ "swc1 %[f3], 128(%[tmp_a]) \n\t"
+ "swc1 %[f0], 384(%[tmp_a]) \n\t"
+ "swc1 %[f4], 4(%[tmp_a]) \n\t"
+ "bgtz %[count], 1b \n\t"
+ " addiu %[tmp_a], %[tmp_a], 8 \n\t"
+ ".set pop \n\t"
+ : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3),
+ [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7),
+ [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count)
+ : [a] "r" (a)
+ : "memory"
+ );
+}
+
+static void rftfsub_128_mips(float* a) {
+ const float* c = rdft_w + 32;
+ const float f0 = 0.5f;
+ float* a1 = &a[2];
+ float* a2 = &a[126];
+ const float* c1 = &c[1];
+ const float* c2 = &c[31];
+ float f1, f2, f3 ,f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15;
+ int count;
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "lwc1 %[f6], 0(%[c2]) \n\t"
+ "lwc1 %[f1], 0(%[a1]) \n\t"
+ "lwc1 %[f2], 0(%[a2]) \n\t"
+ "lwc1 %[f3], 4(%[a1]) \n\t"
+ "lwc1 %[f4], 4(%[a2]) \n\t"
+ "lwc1 %[f5], 0(%[c1]) \n\t"
+ "sub.s %[f6], %[f0], %[f6] \n\t"
+ "sub.s %[f7], %[f1], %[f2] \n\t"
+ "add.s %[f8], %[f3], %[f4] \n\t"
+ "addiu %[count], $zero, 15 \n\t"
+ "mul.s %[f9], %[f6], %[f7] \n\t"
+ "mul.s %[f6], %[f6], %[f8] \n\t"
+#if !defined(MIPS32_R2_LE)
+ "mul.s %[f8], %[f5], %[f8] \n\t"
+ "mul.s %[f5], %[f5], %[f7] \n\t"
+ "sub.s %[f9], %[f9], %[f8] \n\t"
+ "add.s %[f6], %[f6], %[f5] \n\t"
+#else
+ "nmsub.s %[f9], %[f9], %[f5], %[f8] \n\t"
+ "madd.s %[f6], %[f6], %[f5], %[f7] \n\t"
+#endif
+ "sub.s %[f1], %[f1], %[f9] \n\t"
+ "add.s %[f2], %[f2], %[f9] \n\t"
+ "sub.s %[f3], %[f3], %[f6] \n\t"
+ "sub.s %[f4], %[f4], %[f6] \n\t"
+ "swc1 %[f1], 0(%[a1]) \n\t"
+ "swc1 %[f2], 0(%[a2]) \n\t"
+ "swc1 %[f3], 4(%[a1]) \n\t"
+ "swc1 %[f4], 4(%[a2]) \n\t"
+ "addiu %[a1], %[a1], 8 \n\t"
+ "addiu %[a2], %[a2], -8 \n\t"
+ "addiu %[c1], %[c1], 4 \n\t"
+ "addiu %[c2], %[c2], -4 \n\t"
+ "1: \n\t"
+ "lwc1 %[f6], 0(%[c2]) \n\t"
+ "lwc1 %[f1], 0(%[a1]) \n\t"
+ "lwc1 %[f2], 0(%[a2]) \n\t"
+ "lwc1 %[f3], 4(%[a1]) \n\t"
+ "lwc1 %[f4], 4(%[a2]) \n\t"
+ "lwc1 %[f5], 0(%[c1]) \n\t"
+ "sub.s %[f6], %[f0], %[f6] \n\t"
+ "sub.s %[f7], %[f1], %[f2] \n\t"
+ "add.s %[f8], %[f3], %[f4] \n\t"
+ "lwc1 %[f10], -4(%[c2]) \n\t"
+ "lwc1 %[f11], 8(%[a1]) \n\t"
+ "lwc1 %[f12], -8(%[a2]) \n\t"
+ "mul.s %[f9], %[f6], %[f7] \n\t"
+ "mul.s %[f6], %[f6], %[f8] \n\t"
+#if !defined(MIPS32_R2_LE)
+ "mul.s %[f8], %[f5], %[f8] \n\t"
+ "mul.s %[f5], %[f5], %[f7] \n\t"
+ "lwc1 %[f13], 12(%[a1]) \n\t"
+ "lwc1 %[f14], -4(%[a2]) \n\t"
+ "lwc1 %[f15], 4(%[c1]) \n\t"
+ "sub.s %[f9], %[f9], %[f8] \n\t"
+ "add.s %[f6], %[f6], %[f5] \n\t"
+#else
+ "lwc1 %[f13], 12(%[a1]) \n\t"
+ "lwc1 %[f14], -4(%[a2]) \n\t"
+ "lwc1 %[f15], 4(%[c1]) \n\t"
+ "nmsub.s %[f9], %[f9], %[f5], %[f8] \n\t"
+ "madd.s %[f6], %[f6], %[f5], %[f7] \n\t"
+#endif
+ "sub.s %[f10], %[f0], %[f10] \n\t"
+ "sub.s %[f5], %[f11], %[f12] \n\t"
+ "add.s %[f7], %[f13], %[f14] \n\t"
+ "sub.s %[f1], %[f1], %[f9] \n\t"
+ "add.s %[f2], %[f2], %[f9] \n\t"
+ "sub.s %[f3], %[f3], %[f6] \n\t"
+ "mul.s %[f8], %[f10], %[f5] \n\t"
+ "mul.s %[f10], %[f10], %[f7] \n\t"
+#if !defined(MIPS32_R2_LE)
+ "mul.s %[f9], %[f15], %[f7] \n\t"
+ "mul.s %[f15], %[f15], %[f5] \n\t"
+ "sub.s %[f4], %[f4], %[f6] \n\t"
+ "swc1 %[f1], 0(%[a1]) \n\t"
+ "swc1 %[f2], 0(%[a2]) \n\t"
+ "sub.s %[f8], %[f8], %[f9] \n\t"
+ "add.s %[f10], %[f10], %[f15] \n\t"
+#else
+ "swc1 %[f1], 0(%[a1]) \n\t"
+ "swc1 %[f2], 0(%[a2]) \n\t"
+ "sub.s %[f4], %[f4], %[f6] \n\t"
+ "nmsub.s %[f8], %[f8], %[f15], %[f7] \n\t"
+ "madd.s %[f10], %[f10], %[f15], %[f5] \n\t"
+#endif
+ "swc1 %[f3], 4(%[a1]) \n\t"
+ "swc1 %[f4], 4(%[a2]) \n\t"
+ "sub.s %[f11], %[f11], %[f8] \n\t"
+ "add.s %[f12], %[f12], %[f8] \n\t"
+ "sub.s %[f13], %[f13], %[f10] \n\t"
+ "sub.s %[f14], %[f14], %[f10] \n\t"
+ "addiu %[c2], %[c2], -8 \n\t"
+ "addiu %[c1], %[c1], 8 \n\t"
+ "swc1 %[f11], 8(%[a1]) \n\t"
+ "swc1 %[f12], -8(%[a2]) \n\t"
+ "swc1 %[f13], 12(%[a1]) \n\t"
+ "swc1 %[f14], -4(%[a2]) \n\t"
+ "addiu %[a1], %[a1], 16 \n\t"
+ "addiu %[count], %[count], -1 \n\t"
+ "bgtz %[count], 1b \n\t"
+ " addiu %[a2], %[a2], -16 \n\t"
+ ".set pop \n\t"
+ : [a1] "+r" (a1), [a2] "+r" (a2), [c1] "+r" (c1), [c2] "+r" (c2),
+ [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), [f4] "=&f" (f4),
+ [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8),
+ [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11), [f12] "=&f" (f12),
+ [f13] "=&f" (f13), [f14] "=&f" (f14), [f15] "=&f" (f15),
+ [count] "=&r" (count)
+ : [f0] "f" (f0)
+ : "memory"
+ );
+}
+
+static void rftbsub_128_mips(float* a) {
+ const float *c = rdft_w + 32;
+ const float f0 = 0.5f;
+ float* a1 = &a[2];
+ float* a2 = &a[126];
+ const float* c1 = &c[1];
+ const float* c2 = &c[31];
+ float f1, f2, f3 ,f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15;
+ int count;
+
+ a[1] = -a[1];
+ a[65] = -a[65];
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "lwc1 %[f6], 0(%[c2]) \n\t"
+ "lwc1 %[f1], 0(%[a1]) \n\t"
+ "lwc1 %[f2], 0(%[a2]) \n\t"
+ "lwc1 %[f3], 4(%[a1]) \n\t"
+ "lwc1 %[f4], 4(%[a2]) \n\t"
+ "lwc1 %[f5], 0(%[c1]) \n\t"
+ "sub.s %[f6], %[f0], %[f6] \n\t"
+ "sub.s %[f7], %[f1], %[f2] \n\t"
+ "add.s %[f8], %[f3], %[f4] \n\t"
+ "addiu %[count], $zero, 15 \n\t"
+ "mul.s %[f9], %[f6], %[f7] \n\t"
+ "mul.s %[f6], %[f6], %[f8] \n\t"
+#if !defined(MIPS32_R2_LE)
+ "mul.s %[f8], %[f5], %[f8] \n\t"
+ "mul.s %[f5], %[f5], %[f7] \n\t"
+ "add.s %[f9], %[f9], %[f8] \n\t"
+ "sub.s %[f6], %[f6], %[f5] \n\t"
+#else
+ "madd.s %[f9], %[f9], %[f5], %[f8] \n\t"
+ "nmsub.s %[f6], %[f6], %[f5], %[f7] \n\t"
+#endif
+ "sub.s %[f1], %[f1], %[f9] \n\t"
+ "add.s %[f2], %[f2], %[f9] \n\t"
+ "sub.s %[f3], %[f6], %[f3] \n\t"
+ "sub.s %[f4], %[f6], %[f4] \n\t"
+ "swc1 %[f1], 0(%[a1]) \n\t"
+ "swc1 %[f2], 0(%[a2]) \n\t"
+ "swc1 %[f3], 4(%[a1]) \n\t"
+ "swc1 %[f4], 4(%[a2]) \n\t"
+ "addiu %[a1], %[a1], 8 \n\t"
+ "addiu %[a2], %[a2], -8 \n\t"
+ "addiu %[c1], %[c1], 4 \n\t"
+ "addiu %[c2], %[c2], -4 \n\t"
+ "1: \n\t"
+ "lwc1 %[f6], 0(%[c2]) \n\t"
+ "lwc1 %[f1], 0(%[a1]) \n\t"
+ "lwc1 %[f2], 0(%[a2]) \n\t"
+ "lwc1 %[f3], 4(%[a1]) \n\t"
+ "lwc1 %[f4], 4(%[a2]) \n\t"
+ "lwc1 %[f5], 0(%[c1]) \n\t"
+ "sub.s %[f6], %[f0], %[f6] \n\t"
+ "sub.s %[f7], %[f1], %[f2] \n\t"
+ "add.s %[f8], %[f3], %[f4] \n\t"
+ "lwc1 %[f10], -4(%[c2]) \n\t"
+ "lwc1 %[f11], 8(%[a1]) \n\t"
+ "lwc1 %[f12], -8(%[a2]) \n\t"
+ "mul.s %[f9], %[f6], %[f7] \n\t"
+ "mul.s %[f6], %[f6], %[f8] \n\t"
+#if !defined(MIPS32_R2_LE)
+ "mul.s %[f8], %[f5], %[f8] \n\t"
+ "mul.s %[f5], %[f5], %[f7] \n\t"
+ "lwc1 %[f13], 12(%[a1]) \n\t"
+ "lwc1 %[f14], -4(%[a2]) \n\t"
+ "lwc1 %[f15], 4(%[c1]) \n\t"
+ "add.s %[f9], %[f9], %[f8] \n\t"
+ "sub.s %[f6], %[f6], %[f5] \n\t"
+#else
+ "lwc1 %[f13], 12(%[a1]) \n\t"
+ "lwc1 %[f14], -4(%[a2]) \n\t"
+ "lwc1 %[f15], 4(%[c1]) \n\t"
+ "madd.s %[f9], %[f9], %[f5], %[f8] \n\t"
+ "nmsub.s %[f6], %[f6], %[f5], %[f7] \n\t"
+#endif
+ "sub.s %[f10], %[f0], %[f10] \n\t"
+ "sub.s %[f5], %[f11], %[f12] \n\t"
+ "add.s %[f7], %[f13], %[f14] \n\t"
+ "sub.s %[f1], %[f1], %[f9] \n\t"
+ "add.s %[f2], %[f2], %[f9] \n\t"
+ "sub.s %[f3], %[f6], %[f3] \n\t"
+ "mul.s %[f8], %[f10], %[f5] \n\t"
+ "mul.s %[f10], %[f10], %[f7] \n\t"
+#if !defined(MIPS32_R2_LE)
+ "mul.s %[f9], %[f15], %[f7] \n\t"
+ "mul.s %[f15], %[f15], %[f5] \n\t"
+ "sub.s %[f4], %[f6], %[f4] \n\t"
+ "swc1 %[f1], 0(%[a1]) \n\t"
+ "swc1 %[f2], 0(%[a2]) \n\t"
+ "add.s %[f8], %[f8], %[f9] \n\t"
+ "sub.s %[f10], %[f10], %[f15] \n\t"
+#else
+ "swc1 %[f1], 0(%[a1]) \n\t"
+ "swc1 %[f2], 0(%[a2]) \n\t"
+ "sub.s %[f4], %[f6], %[f4] \n\t"
+ "madd.s %[f8], %[f8], %[f15], %[f7] \n\t"
+ "nmsub.s %[f10], %[f10], %[f15], %[f5] \n\t"
+#endif
+ "swc1 %[f3], 4(%[a1]) \n\t"
+ "swc1 %[f4], 4(%[a2]) \n\t"
+ "sub.s %[f11], %[f11], %[f8] \n\t"
+ "add.s %[f12], %[f12], %[f8] \n\t"
+ "sub.s %[f13], %[f10], %[f13] \n\t"
+ "sub.s %[f14], %[f10], %[f14] \n\t"
+ "addiu %[c2], %[c2], -8 \n\t"
+ "addiu %[c1], %[c1], 8 \n\t"
+ "swc1 %[f11], 8(%[a1]) \n\t"
+ "swc1 %[f12], -8(%[a2]) \n\t"
+ "swc1 %[f13], 12(%[a1]) \n\t"
+ "swc1 %[f14], -4(%[a2]) \n\t"
+ "addiu %[a1], %[a1], 16 \n\t"
+ "addiu %[count], %[count], -1 \n\t"
+ "bgtz %[count], 1b \n\t"
+ " addiu %[a2], %[a2], -16 \n\t"
+ ".set pop \n\t"
+ : [a1] "+r" (a1), [a2] "+r" (a2), [c1] "+r" (c1), [c2] "+r" (c2),
+ [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), [f4] "=&f" (f4),
+ [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8),
+ [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11), [f12] "=&f" (f12),
+ [f13] "=&f" (f13), [f14] "=&f" (f14), [f15] "=&f" (f15),
+ [count] "=&r" (count)
+ : [f0] "f" (f0)
+ : "memory"
+ );
+}
+
+void aec_rdft_init_mips(void) {
+ cft1st_128 = cft1st_128_mips;
+ cftmdl_128 = cftmdl_128_mips;
+ rftfsub_128 = rftfsub_128_mips;
+ rftbsub_128 = rftbsub_128_mips;
+ cftfsub_128 = cftfsub_128_mips;
+ cftbsub_128 = cftbsub_128_mips;
+ bitrv2_128 = bitrv2_128_mips;
+}
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft_neon.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft_neon.c
new file mode 100644
index 00000000..43b6a68c
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft_neon.c
@@ -0,0 +1,355 @@
+/*
+ * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * The rdft AEC algorithm, neon version of speed-critical functions.
+ *
+ * Based on the sse2 version.
+ */
+
+
+#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
+
+#include <arm_neon.h>
+
+static const ALIGN16_BEG float ALIGN16_END
+ k_swap_sign[4] = {-1.f, 1.f, -1.f, 1.f};
+
+static void cft1st_128_neon(float* a) {
+ const float32x4_t vec_swap_sign = vld1q_f32((float32_t*)k_swap_sign);
+ int j, k2;
+
+ for (k2 = 0, j = 0; j < 128; j += 16, k2 += 4) {
+ float32x4_t a00v = vld1q_f32(&a[j + 0]);
+ float32x4_t a04v = vld1q_f32(&a[j + 4]);
+ float32x4_t a08v = vld1q_f32(&a[j + 8]);
+ float32x4_t a12v = vld1q_f32(&a[j + 12]);
+ float32x4_t a01v = vcombine_f32(vget_low_f32(a00v), vget_low_f32(a08v));
+ float32x4_t a23v = vcombine_f32(vget_high_f32(a00v), vget_high_f32(a08v));
+ float32x4_t a45v = vcombine_f32(vget_low_f32(a04v), vget_low_f32(a12v));
+ float32x4_t a67v = vcombine_f32(vget_high_f32(a04v), vget_high_f32(a12v));
+ const float32x4_t wk1rv = vld1q_f32(&rdft_wk1r[k2]);
+ const float32x4_t wk1iv = vld1q_f32(&rdft_wk1i[k2]);
+ const float32x4_t wk2rv = vld1q_f32(&rdft_wk2r[k2]);
+ const float32x4_t wk2iv = vld1q_f32(&rdft_wk2i[k2]);
+ const float32x4_t wk3rv = vld1q_f32(&rdft_wk3r[k2]);
+ const float32x4_t wk3iv = vld1q_f32(&rdft_wk3i[k2]);
+ float32x4_t x0v = vaddq_f32(a01v, a23v);
+ const float32x4_t x1v = vsubq_f32(a01v, a23v);
+ const float32x4_t x2v = vaddq_f32(a45v, a67v);
+ const float32x4_t x3v = vsubq_f32(a45v, a67v);
+ const float32x4_t x3w = vrev64q_f32(x3v);
+ float32x4_t x0w;
+ a01v = vaddq_f32(x0v, x2v);
+ x0v = vsubq_f32(x0v, x2v);
+ x0w = vrev64q_f32(x0v);
+ a45v = vmulq_f32(wk2rv, x0v);
+ a45v = vmlaq_f32(a45v, wk2iv, x0w);
+ x0v = vmlaq_f32(x1v, x3w, vec_swap_sign);
+ x0w = vrev64q_f32(x0v);
+ a23v = vmulq_f32(wk1rv, x0v);
+ a23v = vmlaq_f32(a23v, wk1iv, x0w);
+ x0v = vmlsq_f32(x1v, x3w, vec_swap_sign);
+ x0w = vrev64q_f32(x0v);
+ a67v = vmulq_f32(wk3rv, x0v);
+ a67v = vmlaq_f32(a67v, wk3iv, x0w);
+ a00v = vcombine_f32(vget_low_f32(a01v), vget_low_f32(a23v));
+ a04v = vcombine_f32(vget_low_f32(a45v), vget_low_f32(a67v));
+ a08v = vcombine_f32(vget_high_f32(a01v), vget_high_f32(a23v));
+ a12v = vcombine_f32(vget_high_f32(a45v), vget_high_f32(a67v));
+ vst1q_f32(&a[j + 0], a00v);
+ vst1q_f32(&a[j + 4], a04v);
+ vst1q_f32(&a[j + 8], a08v);
+ vst1q_f32(&a[j + 12], a12v);
+ }
+}
+
+static void cftmdl_128_neon(float* a) {
+ int j;
+ const int l = 8;
+ const float32x4_t vec_swap_sign = vld1q_f32((float32_t*)k_swap_sign);
+ float32x4_t wk1rv = vld1q_f32(cftmdl_wk1r);
+
+ for (j = 0; j < l; j += 2) {
+ const float32x2_t a_00 = vld1_f32(&a[j + 0]);
+ const float32x2_t a_08 = vld1_f32(&a[j + 8]);
+ const float32x2_t a_32 = vld1_f32(&a[j + 32]);
+ const float32x2_t a_40 = vld1_f32(&a[j + 40]);
+ const float32x4_t a_00_32 = vcombine_f32(a_00, a_32);
+ const float32x4_t a_08_40 = vcombine_f32(a_08, a_40);
+ const float32x4_t x0r0_0i0_0r1_x0i1 = vaddq_f32(a_00_32, a_08_40);
+ const float32x4_t x1r0_1i0_1r1_x1i1 = vsubq_f32(a_00_32, a_08_40);
+ const float32x2_t a_16 = vld1_f32(&a[j + 16]);
+ const float32x2_t a_24 = vld1_f32(&a[j + 24]);
+ const float32x2_t a_48 = vld1_f32(&a[j + 48]);
+ const float32x2_t a_56 = vld1_f32(&a[j + 56]);
+ const float32x4_t a_16_48 = vcombine_f32(a_16, a_48);
+ const float32x4_t a_24_56 = vcombine_f32(a_24, a_56);
+ const float32x4_t x2r0_2i0_2r1_x2i1 = vaddq_f32(a_16_48, a_24_56);
+ const float32x4_t x3r0_3i0_3r1_x3i1 = vsubq_f32(a_16_48, a_24_56);
+ const float32x4_t xx0 = vaddq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+ const float32x4_t xx1 = vsubq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+ const float32x4_t x3i0_3r0_3i1_x3r1 = vrev64q_f32(x3r0_3i0_3r1_x3i1);
+ const float32x4_t x1_x3_add =
+ vmlaq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
+ const float32x4_t x1_x3_sub =
+ vmlsq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
+ const float32x2_t yy0_a = vdup_lane_f32(vget_high_f32(x1_x3_add), 0);
+ const float32x2_t yy0_s = vdup_lane_f32(vget_high_f32(x1_x3_sub), 0);
+ const float32x4_t yy0_as = vcombine_f32(yy0_a, yy0_s);
+ const float32x2_t yy1_a = vdup_lane_f32(vget_high_f32(x1_x3_add), 1);
+ const float32x2_t yy1_s = vdup_lane_f32(vget_high_f32(x1_x3_sub), 1);
+ const float32x4_t yy1_as = vcombine_f32(yy1_a, yy1_s);
+ const float32x4_t yy0 = vmlaq_f32(yy0_as, vec_swap_sign, yy1_as);
+ const float32x4_t yy4 = vmulq_f32(wk1rv, yy0);
+ const float32x4_t xx1_rev = vrev64q_f32(xx1);
+ const float32x4_t yy4_rev = vrev64q_f32(yy4);
+
+ vst1_f32(&a[j + 0], vget_low_f32(xx0));
+ vst1_f32(&a[j + 32], vget_high_f32(xx0));
+ vst1_f32(&a[j + 16], vget_low_f32(xx1));
+ vst1_f32(&a[j + 48], vget_high_f32(xx1_rev));
+
+ a[j + 48] = -a[j + 48];
+
+ vst1_f32(&a[j + 8], vget_low_f32(x1_x3_add));
+ vst1_f32(&a[j + 24], vget_low_f32(x1_x3_sub));
+ vst1_f32(&a[j + 40], vget_low_f32(yy4));
+ vst1_f32(&a[j + 56], vget_high_f32(yy4_rev));
+ }
+
+ {
+ const int k = 64;
+ const int k1 = 2;
+ const int k2 = 2 * k1;
+ const float32x4_t wk2rv = vld1q_f32(&rdft_wk2r[k2 + 0]);
+ const float32x4_t wk2iv = vld1q_f32(&rdft_wk2i[k2 + 0]);
+ const float32x4_t wk1iv = vld1q_f32(&rdft_wk1i[k2 + 0]);
+ const float32x4_t wk3rv = vld1q_f32(&rdft_wk3r[k2 + 0]);
+ const float32x4_t wk3iv = vld1q_f32(&rdft_wk3i[k2 + 0]);
+ wk1rv = vld1q_f32(&rdft_wk1r[k2 + 0]);
+ for (j = k; j < l + k; j += 2) {
+ const float32x2_t a_00 = vld1_f32(&a[j + 0]);
+ const float32x2_t a_08 = vld1_f32(&a[j + 8]);
+ const float32x2_t a_32 = vld1_f32(&a[j + 32]);
+ const float32x2_t a_40 = vld1_f32(&a[j + 40]);
+ const float32x4_t a_00_32 = vcombine_f32(a_00, a_32);
+ const float32x4_t a_08_40 = vcombine_f32(a_08, a_40);
+ const float32x4_t x0r0_0i0_0r1_x0i1 = vaddq_f32(a_00_32, a_08_40);
+ const float32x4_t x1r0_1i0_1r1_x1i1 = vsubq_f32(a_00_32, a_08_40);
+ const float32x2_t a_16 = vld1_f32(&a[j + 16]);
+ const float32x2_t a_24 = vld1_f32(&a[j + 24]);
+ const float32x2_t a_48 = vld1_f32(&a[j + 48]);
+ const float32x2_t a_56 = vld1_f32(&a[j + 56]);
+ const float32x4_t a_16_48 = vcombine_f32(a_16, a_48);
+ const float32x4_t a_24_56 = vcombine_f32(a_24, a_56);
+ const float32x4_t x2r0_2i0_2r1_x2i1 = vaddq_f32(a_16_48, a_24_56);
+ const float32x4_t x3r0_3i0_3r1_x3i1 = vsubq_f32(a_16_48, a_24_56);
+ const float32x4_t xx = vaddq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+ const float32x4_t xx1 = vsubq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+ const float32x4_t x3i0_3r0_3i1_x3r1 = vrev64q_f32(x3r0_3i0_3r1_x3i1);
+ const float32x4_t x1_x3_add =
+ vmlaq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
+ const float32x4_t x1_x3_sub =
+ vmlsq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
+ float32x4_t xx4 = vmulq_f32(wk2rv, xx1);
+ float32x4_t xx12 = vmulq_f32(wk1rv, x1_x3_add);
+ float32x4_t xx22 = vmulq_f32(wk3rv, x1_x3_sub);
+ xx4 = vmlaq_f32(xx4, wk2iv, vrev64q_f32(xx1));
+ xx12 = vmlaq_f32(xx12, wk1iv, vrev64q_f32(x1_x3_add));
+ xx22 = vmlaq_f32(xx22, wk3iv, vrev64q_f32(x1_x3_sub));
+
+ vst1_f32(&a[j + 0], vget_low_f32(xx));
+ vst1_f32(&a[j + 32], vget_high_f32(xx));
+ vst1_f32(&a[j + 16], vget_low_f32(xx4));
+ vst1_f32(&a[j + 48], vget_high_f32(xx4));
+ vst1_f32(&a[j + 8], vget_low_f32(xx12));
+ vst1_f32(&a[j + 40], vget_high_f32(xx12));
+ vst1_f32(&a[j + 24], vget_low_f32(xx22));
+ vst1_f32(&a[j + 56], vget_high_f32(xx22));
+ }
+ }
+}
+
+__inline static float32x4_t reverse_order_f32x4(float32x4_t in) {
+ // A B C D -> C D A B
+ const float32x4_t rev = vcombine_f32(vget_high_f32(in), vget_low_f32(in));
+ // C D A B -> D C B A
+ return vrev64q_f32(rev);
+}
+
+static void rftfsub_128_neon(float* a) {
+ const float* c = rdft_w + 32;
+ int j1, j2;
+ const float32x4_t mm_half = vdupq_n_f32(0.5f);
+
+ // Vectorized code (four at once).
+ // Note: commented number are indexes for the first iteration of the loop.
+ for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) {
+ // Load 'wk'.
+ const float32x4_t c_j1 = vld1q_f32(&c[j1]); // 1, 2, 3, 4,
+ const float32x4_t c_k1 = vld1q_f32(&c[29 - j1]); // 28, 29, 30, 31,
+ const float32x4_t wkrt = vsubq_f32(mm_half, c_k1); // 28, 29, 30, 31,
+ const float32x4_t wkr_ = reverse_order_f32x4(wkrt); // 31, 30, 29, 28,
+ const float32x4_t wki_ = c_j1; // 1, 2, 3, 4,
+ // Load and shuffle 'a'.
+ // 2, 4, 6, 8, 3, 5, 7, 9
+ float32x4x2_t a_j2_p = vld2q_f32(&a[0 + j2]);
+ // 120, 122, 124, 126, 121, 123, 125, 127,
+ const float32x4x2_t k2_0_4 = vld2q_f32(&a[122 - j2]);
+ // 126, 124, 122, 120
+ const float32x4_t a_k2_p0 = reverse_order_f32x4(k2_0_4.val[0]);
+ // 127, 125, 123, 121
+ const float32x4_t a_k2_p1 = reverse_order_f32x4(k2_0_4.val[1]);
+ // Calculate 'x'.
+ const float32x4_t xr_ = vsubq_f32(a_j2_p.val[0], a_k2_p0);
+ // 2-126, 4-124, 6-122, 8-120,
+ const float32x4_t xi_ = vaddq_f32(a_j2_p.val[1], a_k2_p1);
+ // 3-127, 5-125, 7-123, 9-121,
+ // Calculate product into 'y'.
+ // yr = wkr * xr - wki * xi;
+ // yi = wkr * xi + wki * xr;
+ const float32x4_t a_ = vmulq_f32(wkr_, xr_);
+ const float32x4_t b_ = vmulq_f32(wki_, xi_);
+ const float32x4_t c_ = vmulq_f32(wkr_, xi_);
+ const float32x4_t d_ = vmulq_f32(wki_, xr_);
+ const float32x4_t yr_ = vsubq_f32(a_, b_); // 2-126, 4-124, 6-122, 8-120,
+ const float32x4_t yi_ = vaddq_f32(c_, d_); // 3-127, 5-125, 7-123, 9-121,
+ // Update 'a'.
+ // a[j2 + 0] -= yr;
+ // a[j2 + 1] -= yi;
+ // a[k2 + 0] += yr;
+ // a[k2 + 1] -= yi;
+ // 126, 124, 122, 120,
+ const float32x4_t a_k2_p0n = vaddq_f32(a_k2_p0, yr_);
+ // 127, 125, 123, 121,
+ const float32x4_t a_k2_p1n = vsubq_f32(a_k2_p1, yi_);
+ // Shuffle in right order and store.
+ const float32x4_t a_k2_p0nr = vrev64q_f32(a_k2_p0n);
+ const float32x4_t a_k2_p1nr = vrev64q_f32(a_k2_p1n);
+ // 124, 125, 126, 127, 120, 121, 122, 123
+ const float32x4x2_t a_k2_n = vzipq_f32(a_k2_p0nr, a_k2_p1nr);
+ // 2, 4, 6, 8,
+ a_j2_p.val[0] = vsubq_f32(a_j2_p.val[0], yr_);
+ // 3, 5, 7, 9,
+ a_j2_p.val[1] = vsubq_f32(a_j2_p.val[1], yi_);
+ // 2, 3, 4, 5, 6, 7, 8, 9,
+ vst2q_f32(&a[0 + j2], a_j2_p);
+
+ vst1q_f32(&a[122 - j2], a_k2_n.val[1]);
+ vst1q_f32(&a[126 - j2], a_k2_n.val[0]);
+ }
+
+ // Scalar code for the remaining items.
+ for (; j2 < 64; j1 += 1, j2 += 2) {
+ const int k2 = 128 - j2;
+ const int k1 = 32 - j1;
+ const float wkr = 0.5f - c[k1];
+ const float wki = c[j1];
+ const float xr = a[j2 + 0] - a[k2 + 0];
+ const float xi = a[j2 + 1] + a[k2 + 1];
+ const float yr = wkr * xr - wki * xi;
+ const float yi = wkr * xi + wki * xr;
+ a[j2 + 0] -= yr;
+ a[j2 + 1] -= yi;
+ a[k2 + 0] += yr;
+ a[k2 + 1] -= yi;
+ }
+}
+
+static void rftbsub_128_neon(float* a) {
+ const float* c = rdft_w + 32;
+ int j1, j2;
+ const float32x4_t mm_half = vdupq_n_f32(0.5f);
+
+ a[1] = -a[1];
+ // Vectorized code (four at once).
+ // Note: commented number are indexes for the first iteration of the loop.
+ for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) {
+ // Load 'wk'.
+ const float32x4_t c_j1 = vld1q_f32(&c[j1]); // 1, 2, 3, 4,
+ const float32x4_t c_k1 = vld1q_f32(&c[29 - j1]); // 28, 29, 30, 31,
+ const float32x4_t wkrt = vsubq_f32(mm_half, c_k1); // 28, 29, 30, 31,
+ const float32x4_t wkr_ = reverse_order_f32x4(wkrt); // 31, 30, 29, 28,
+ const float32x4_t wki_ = c_j1; // 1, 2, 3, 4,
+ // Load and shuffle 'a'.
+ // 2, 4, 6, 8, 3, 5, 7, 9
+ float32x4x2_t a_j2_p = vld2q_f32(&a[0 + j2]);
+ // 120, 122, 124, 126, 121, 123, 125, 127,
+ const float32x4x2_t k2_0_4 = vld2q_f32(&a[122 - j2]);
+ // 126, 124, 122, 120
+ const float32x4_t a_k2_p0 = reverse_order_f32x4(k2_0_4.val[0]);
+ // 127, 125, 123, 121
+ const float32x4_t a_k2_p1 = reverse_order_f32x4(k2_0_4.val[1]);
+ // Calculate 'x'.
+ const float32x4_t xr_ = vsubq_f32(a_j2_p.val[0], a_k2_p0);
+ // 2-126, 4-124, 6-122, 8-120,
+ const float32x4_t xi_ = vaddq_f32(a_j2_p.val[1], a_k2_p1);
+ // 3-127, 5-125, 7-123, 9-121,
+ // Calculate product into 'y'.
+ // yr = wkr * xr - wki * xi;
+ // yi = wkr * xi + wki * xr;
+ const float32x4_t a_ = vmulq_f32(wkr_, xr_);
+ const float32x4_t b_ = vmulq_f32(wki_, xi_);
+ const float32x4_t c_ = vmulq_f32(wkr_, xi_);
+ const float32x4_t d_ = vmulq_f32(wki_, xr_);
+ const float32x4_t yr_ = vaddq_f32(a_, b_); // 2-126, 4-124, 6-122, 8-120,
+ const float32x4_t yi_ = vsubq_f32(c_, d_); // 3-127, 5-125, 7-123, 9-121,
+ // Update 'a'.
+ // a[j2 + 0] -= yr;
+ // a[j2 + 1] -= yi;
+ // a[k2 + 0] += yr;
+ // a[k2 + 1] -= yi;
+ // 126, 124, 122, 120,
+ const float32x4_t a_k2_p0n = vaddq_f32(a_k2_p0, yr_);
+ // 127, 125, 123, 121,
+ const float32x4_t a_k2_p1n = vsubq_f32(yi_, a_k2_p1);
+ // Shuffle in right order and store.
+ // 2, 3, 4, 5, 6, 7, 8, 9,
+ const float32x4_t a_k2_p0nr = vrev64q_f32(a_k2_p0n);
+ const float32x4_t a_k2_p1nr = vrev64q_f32(a_k2_p1n);
+ // 124, 125, 126, 127, 120, 121, 122, 123
+ const float32x4x2_t a_k2_n = vzipq_f32(a_k2_p0nr, a_k2_p1nr);
+ // 2, 4, 6, 8,
+ a_j2_p.val[0] = vsubq_f32(a_j2_p.val[0], yr_);
+ // 3, 5, 7, 9,
+ a_j2_p.val[1] = vsubq_f32(yi_, a_j2_p.val[1]);
+ // 2, 3, 4, 5, 6, 7, 8, 9,
+ vst2q_f32(&a[0 + j2], a_j2_p);
+
+ vst1q_f32(&a[122 - j2], a_k2_n.val[1]);
+ vst1q_f32(&a[126 - j2], a_k2_n.val[0]);
+ }
+
+ // Scalar code for the remaining items.
+ for (; j2 < 64; j1 += 1, j2 += 2) {
+ const int k2 = 128 - j2;
+ const int k1 = 32 - j1;
+ const float wkr = 0.5f - c[k1];
+ const float wki = c[j1];
+ const float xr = a[j2 + 0] - a[k2 + 0];
+ const float xi = a[j2 + 1] + a[k2 + 1];
+ const float yr = wkr * xr + wki * xi;
+ const float yi = wkr * xi - wki * xr;
+ a[j2 + 0] = a[j2 + 0] - yr;
+ a[j2 + 1] = yi - a[j2 + 1];
+ a[k2 + 0] = yr + a[k2 + 0];
+ a[k2 + 1] = yi - a[k2 + 1];
+ }
+ a[65] = -a[65];
+}
+
+void aec_rdft_init_neon(void) {
+ cft1st_128 = cft1st_128_neon;
+ cftmdl_128 = cftmdl_128_neon;
+ rftfsub_128 = rftfsub_128_neon;
+ rftbsub_128 = rftbsub_128_neon;
+}
+
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft_sse2.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft_sse2.c
new file mode 100644
index 00000000..b4e453ff
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft_sse2.c
@@ -0,0 +1,427 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
+
+#include <emmintrin.h>
+
+static const ALIGN16_BEG float ALIGN16_END
+ k_swap_sign[4] = {-1.f, 1.f, -1.f, 1.f};
+
+static void cft1st_128_SSE2(float* a) {
+ const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign);
+ int j, k2;
+
+ for (k2 = 0, j = 0; j < 128; j += 16, k2 += 4) {
+ __m128 a00v = _mm_loadu_ps(&a[j + 0]);
+ __m128 a04v = _mm_loadu_ps(&a[j + 4]);
+ __m128 a08v = _mm_loadu_ps(&a[j + 8]);
+ __m128 a12v = _mm_loadu_ps(&a[j + 12]);
+ __m128 a01v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(1, 0, 1, 0));
+ __m128 a23v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(3, 2, 3, 2));
+ __m128 a45v = _mm_shuffle_ps(a04v, a12v, _MM_SHUFFLE(1, 0, 1, 0));
+ __m128 a67v = _mm_shuffle_ps(a04v, a12v, _MM_SHUFFLE(3, 2, 3, 2));
+
+ const __m128 wk1rv = _mm_load_ps(&rdft_wk1r[k2]);
+ const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2]);
+ const __m128 wk2rv = _mm_load_ps(&rdft_wk2r[k2]);
+ const __m128 wk2iv = _mm_load_ps(&rdft_wk2i[k2]);
+ const __m128 wk3rv = _mm_load_ps(&rdft_wk3r[k2]);
+ const __m128 wk3iv = _mm_load_ps(&rdft_wk3i[k2]);
+ __m128 x0v = _mm_add_ps(a01v, a23v);
+ const __m128 x1v = _mm_sub_ps(a01v, a23v);
+ const __m128 x2v = _mm_add_ps(a45v, a67v);
+ const __m128 x3v = _mm_sub_ps(a45v, a67v);
+ __m128 x0w;
+ a01v = _mm_add_ps(x0v, x2v);
+ x0v = _mm_sub_ps(x0v, x2v);
+ x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1));
+ {
+ const __m128 a45_0v = _mm_mul_ps(wk2rv, x0v);
+ const __m128 a45_1v = _mm_mul_ps(wk2iv, x0w);
+ a45v = _mm_add_ps(a45_0v, a45_1v);
+ }
+ {
+ __m128 a23_0v, a23_1v;
+ const __m128 x3w = _mm_shuffle_ps(x3v, x3v, _MM_SHUFFLE(2, 3, 0, 1));
+ const __m128 x3s = _mm_mul_ps(mm_swap_sign, x3w);
+ x0v = _mm_add_ps(x1v, x3s);
+ x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1));
+ a23_0v = _mm_mul_ps(wk1rv, x0v);
+ a23_1v = _mm_mul_ps(wk1iv, x0w);
+ a23v = _mm_add_ps(a23_0v, a23_1v);
+
+ x0v = _mm_sub_ps(x1v, x3s);
+ x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1));
+ }
+ {
+ const __m128 a67_0v = _mm_mul_ps(wk3rv, x0v);
+ const __m128 a67_1v = _mm_mul_ps(wk3iv, x0w);
+ a67v = _mm_add_ps(a67_0v, a67_1v);
+ }
+
+ a00v = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(1, 0, 1, 0));
+ a04v = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(1, 0, 1, 0));
+ a08v = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(3, 2, 3, 2));
+ a12v = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(3, 2, 3, 2));
+ _mm_storeu_ps(&a[j + 0], a00v);
+ _mm_storeu_ps(&a[j + 4], a04v);
+ _mm_storeu_ps(&a[j + 8], a08v);
+ _mm_storeu_ps(&a[j + 12], a12v);
+ }
+}
+
+static void cftmdl_128_SSE2(float* a) {
+ const int l = 8;
+ const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign);
+ int j0;
+
+ __m128 wk1rv = _mm_load_ps(cftmdl_wk1r);
+ for (j0 = 0; j0 < l; j0 += 2) {
+ const __m128i a_00 = _mm_loadl_epi64((__m128i*)&a[j0 + 0]);
+ const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]);
+ const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]);
+ const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]);
+ const __m128 a_00_32 = _mm_shuffle_ps(_mm_castsi128_ps(a_00),
+ _mm_castsi128_ps(a_32),
+ _MM_SHUFFLE(1, 0, 1, 0));
+ const __m128 a_08_40 = _mm_shuffle_ps(_mm_castsi128_ps(a_08),
+ _mm_castsi128_ps(a_40),
+ _MM_SHUFFLE(1, 0, 1, 0));
+ __m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40);
+ const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40);
+
+ const __m128i a_16 = _mm_loadl_epi64((__m128i*)&a[j0 + 16]);
+ const __m128i a_24 = _mm_loadl_epi64((__m128i*)&a[j0 + 24]);
+ const __m128i a_48 = _mm_loadl_epi64((__m128i*)&a[j0 + 48]);
+ const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]);
+ const __m128 a_16_48 = _mm_shuffle_ps(_mm_castsi128_ps(a_16),
+ _mm_castsi128_ps(a_48),
+ _MM_SHUFFLE(1, 0, 1, 0));
+ const __m128 a_24_56 = _mm_shuffle_ps(_mm_castsi128_ps(a_24),
+ _mm_castsi128_ps(a_56),
+ _MM_SHUFFLE(1, 0, 1, 0));
+ const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56);
+ const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56);
+
+ const __m128 xx0 = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+ const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+
+ const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(_mm_shuffle_epi32(
+ _mm_castps_si128(x3r0_3i0_3r1_x3i1), _MM_SHUFFLE(2, 3, 0, 1)));
+ const __m128 x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1);
+ const __m128 x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
+ const __m128 x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
+
+ const __m128 yy0 =
+ _mm_shuffle_ps(x1_x3_add, x1_x3_sub, _MM_SHUFFLE(2, 2, 2, 2));
+ const __m128 yy1 =
+ _mm_shuffle_ps(x1_x3_add, x1_x3_sub, _MM_SHUFFLE(3, 3, 3, 3));
+ const __m128 yy2 = _mm_mul_ps(mm_swap_sign, yy1);
+ const __m128 yy3 = _mm_add_ps(yy0, yy2);
+ const __m128 yy4 = _mm_mul_ps(wk1rv, yy3);
+
+ _mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx0));
+ _mm_storel_epi64(
+ (__m128i*)&a[j0 + 32],
+ _mm_shuffle_epi32(_mm_castps_si128(xx0), _MM_SHUFFLE(3, 2, 3, 2)));
+
+ _mm_storel_epi64((__m128i*)&a[j0 + 16], _mm_castps_si128(xx1));
+ _mm_storel_epi64(
+ (__m128i*)&a[j0 + 48],
+ _mm_shuffle_epi32(_mm_castps_si128(xx1), _MM_SHUFFLE(2, 3, 2, 3)));
+ a[j0 + 48] = -a[j0 + 48];
+
+ _mm_storel_epi64((__m128i*)&a[j0 + 8], _mm_castps_si128(x1_x3_add));
+ _mm_storel_epi64((__m128i*)&a[j0 + 24], _mm_castps_si128(x1_x3_sub));
+
+ _mm_storel_epi64((__m128i*)&a[j0 + 40], _mm_castps_si128(yy4));
+ _mm_storel_epi64(
+ (__m128i*)&a[j0 + 56],
+ _mm_shuffle_epi32(_mm_castps_si128(yy4), _MM_SHUFFLE(2, 3, 2, 3)));
+ }
+
+ {
+ int k = 64;
+ int k1 = 2;
+ int k2 = 2 * k1;
+ const __m128 wk2rv = _mm_load_ps(&rdft_wk2r[k2 + 0]);
+ const __m128 wk2iv = _mm_load_ps(&rdft_wk2i[k2 + 0]);
+ const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2 + 0]);
+ const __m128 wk3rv = _mm_load_ps(&rdft_wk3r[k2 + 0]);
+ const __m128 wk3iv = _mm_load_ps(&rdft_wk3i[k2 + 0]);
+ wk1rv = _mm_load_ps(&rdft_wk1r[k2 + 0]);
+ for (j0 = k; j0 < l + k; j0 += 2) {
+ const __m128i a_00 = _mm_loadl_epi64((__m128i*)&a[j0 + 0]);
+ const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]);
+ const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]);
+ const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]);
+ const __m128 a_00_32 = _mm_shuffle_ps(_mm_castsi128_ps(a_00),
+ _mm_castsi128_ps(a_32),
+ _MM_SHUFFLE(1, 0, 1, 0));
+ const __m128 a_08_40 = _mm_shuffle_ps(_mm_castsi128_ps(a_08),
+ _mm_castsi128_ps(a_40),
+ _MM_SHUFFLE(1, 0, 1, 0));
+ __m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40);
+ const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40);
+
+ const __m128i a_16 = _mm_loadl_epi64((__m128i*)&a[j0 + 16]);
+ const __m128i a_24 = _mm_loadl_epi64((__m128i*)&a[j0 + 24]);
+ const __m128i a_48 = _mm_loadl_epi64((__m128i*)&a[j0 + 48]);
+ const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]);
+ const __m128 a_16_48 = _mm_shuffle_ps(_mm_castsi128_ps(a_16),
+ _mm_castsi128_ps(a_48),
+ _MM_SHUFFLE(1, 0, 1, 0));
+ const __m128 a_24_56 = _mm_shuffle_ps(_mm_castsi128_ps(a_24),
+ _mm_castsi128_ps(a_56),
+ _MM_SHUFFLE(1, 0, 1, 0));
+ const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56);
+ const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56);
+
+ const __m128 xx = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+ const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+ const __m128 xx2 = _mm_mul_ps(xx1, wk2rv);
+ const __m128 xx3 =
+ _mm_mul_ps(wk2iv,
+ _mm_castsi128_ps(_mm_shuffle_epi32(
+ _mm_castps_si128(xx1), _MM_SHUFFLE(2, 3, 0, 1))));
+ const __m128 xx4 = _mm_add_ps(xx2, xx3);
+
+ const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(_mm_shuffle_epi32(
+ _mm_castps_si128(x3r0_3i0_3r1_x3i1), _MM_SHUFFLE(2, 3, 0, 1)));
+ const __m128 x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1);
+ const __m128 x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
+ const __m128 x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
+
+ const __m128 xx10 = _mm_mul_ps(x1_x3_add, wk1rv);
+ const __m128 xx11 = _mm_mul_ps(
+ wk1iv,
+ _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_add),
+ _MM_SHUFFLE(2, 3, 0, 1))));
+ const __m128 xx12 = _mm_add_ps(xx10, xx11);
+
+ const __m128 xx20 = _mm_mul_ps(x1_x3_sub, wk3rv);
+ const __m128 xx21 = _mm_mul_ps(
+ wk3iv,
+ _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_sub),
+ _MM_SHUFFLE(2, 3, 0, 1))));
+ const __m128 xx22 = _mm_add_ps(xx20, xx21);
+
+ _mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx));
+ _mm_storel_epi64(
+ (__m128i*)&a[j0 + 32],
+ _mm_shuffle_epi32(_mm_castps_si128(xx), _MM_SHUFFLE(3, 2, 3, 2)));
+
+ _mm_storel_epi64((__m128i*)&a[j0 + 16], _mm_castps_si128(xx4));
+ _mm_storel_epi64(
+ (__m128i*)&a[j0 + 48],
+ _mm_shuffle_epi32(_mm_castps_si128(xx4), _MM_SHUFFLE(3, 2, 3, 2)));
+
+ _mm_storel_epi64((__m128i*)&a[j0 + 8], _mm_castps_si128(xx12));
+ _mm_storel_epi64(
+ (__m128i*)&a[j0 + 40],
+ _mm_shuffle_epi32(_mm_castps_si128(xx12), _MM_SHUFFLE(3, 2, 3, 2)));
+
+ _mm_storel_epi64((__m128i*)&a[j0 + 24], _mm_castps_si128(xx22));
+ _mm_storel_epi64(
+ (__m128i*)&a[j0 + 56],
+ _mm_shuffle_epi32(_mm_castps_si128(xx22), _MM_SHUFFLE(3, 2, 3, 2)));
+ }
+ }
+}
+
+static void rftfsub_128_SSE2(float* a) {
+ const float* c = rdft_w + 32;
+ int j1, j2, k1, k2;
+ float wkr, wki, xr, xi, yr, yi;
+
+ static const ALIGN16_BEG float ALIGN16_END
+ k_half[4] = {0.5f, 0.5f, 0.5f, 0.5f};
+ const __m128 mm_half = _mm_load_ps(k_half);
+
+ // Vectorized code (four at once).
+ // Note: commented number are indexes for the first iteration of the loop.
+ for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) {
+ // Load 'wk'.
+ const __m128 c_j1 = _mm_loadu_ps(&c[j1]); // 1, 2, 3, 4,
+ const __m128 c_k1 = _mm_loadu_ps(&c[29 - j1]); // 28, 29, 30, 31,
+ const __m128 wkrt = _mm_sub_ps(mm_half, c_k1); // 28, 29, 30, 31,
+ const __m128 wkr_ =
+ _mm_shuffle_ps(wkrt, wkrt, _MM_SHUFFLE(0, 1, 2, 3)); // 31, 30, 29, 28,
+ const __m128 wki_ = c_j1; // 1, 2, 3, 4,
+ // Load and shuffle 'a'.
+ const __m128 a_j2_0 = _mm_loadu_ps(&a[0 + j2]); // 2, 3, 4, 5,
+ const __m128 a_j2_4 = _mm_loadu_ps(&a[4 + j2]); // 6, 7, 8, 9,
+ const __m128 a_k2_0 = _mm_loadu_ps(&a[122 - j2]); // 120, 121, 122, 123,
+ const __m128 a_k2_4 = _mm_loadu_ps(&a[126 - j2]); // 124, 125, 126, 127,
+ const __m128 a_j2_p0 = _mm_shuffle_ps(
+ a_j2_0, a_j2_4, _MM_SHUFFLE(2, 0, 2, 0)); // 2, 4, 6, 8,
+ const __m128 a_j2_p1 = _mm_shuffle_ps(
+ a_j2_0, a_j2_4, _MM_SHUFFLE(3, 1, 3, 1)); // 3, 5, 7, 9,
+ const __m128 a_k2_p0 = _mm_shuffle_ps(
+ a_k2_4, a_k2_0, _MM_SHUFFLE(0, 2, 0, 2)); // 126, 124, 122, 120,
+ const __m128 a_k2_p1 = _mm_shuffle_ps(
+ a_k2_4, a_k2_0, _MM_SHUFFLE(1, 3, 1, 3)); // 127, 125, 123, 121,
+ // Calculate 'x'.
+ const __m128 xr_ = _mm_sub_ps(a_j2_p0, a_k2_p0);
+ // 2-126, 4-124, 6-122, 8-120,
+ const __m128 xi_ = _mm_add_ps(a_j2_p1, a_k2_p1);
+ // 3-127, 5-125, 7-123, 9-121,
+ // Calculate product into 'y'.
+ // yr = wkr * xr - wki * xi;
+ // yi = wkr * xi + wki * xr;
+ const __m128 a_ = _mm_mul_ps(wkr_, xr_);
+ const __m128 b_ = _mm_mul_ps(wki_, xi_);
+ const __m128 c_ = _mm_mul_ps(wkr_, xi_);
+ const __m128 d_ = _mm_mul_ps(wki_, xr_);
+ const __m128 yr_ = _mm_sub_ps(a_, b_); // 2-126, 4-124, 6-122, 8-120,
+ const __m128 yi_ = _mm_add_ps(c_, d_); // 3-127, 5-125, 7-123, 9-121,
+ // Update 'a'.
+ // a[j2 + 0] -= yr;
+ // a[j2 + 1] -= yi;
+ // a[k2 + 0] += yr;
+ // a[k2 + 1] -= yi;
+ const __m128 a_j2_p0n = _mm_sub_ps(a_j2_p0, yr_); // 2, 4, 6, 8,
+ const __m128 a_j2_p1n = _mm_sub_ps(a_j2_p1, yi_); // 3, 5, 7, 9,
+ const __m128 a_k2_p0n = _mm_add_ps(a_k2_p0, yr_); // 126, 124, 122, 120,
+ const __m128 a_k2_p1n = _mm_sub_ps(a_k2_p1, yi_); // 127, 125, 123, 121,
+ // Shuffle in right order and store.
+ const __m128 a_j2_0n = _mm_unpacklo_ps(a_j2_p0n, a_j2_p1n);
+ // 2, 3, 4, 5,
+ const __m128 a_j2_4n = _mm_unpackhi_ps(a_j2_p0n, a_j2_p1n);
+ // 6, 7, 8, 9,
+ const __m128 a_k2_0nt = _mm_unpackhi_ps(a_k2_p0n, a_k2_p1n);
+ // 122, 123, 120, 121,
+ const __m128 a_k2_4nt = _mm_unpacklo_ps(a_k2_p0n, a_k2_p1n);
+ // 126, 127, 124, 125,
+ const __m128 a_k2_0n = _mm_shuffle_ps(
+ a_k2_0nt, a_k2_0nt, _MM_SHUFFLE(1, 0, 3, 2)); // 120, 121, 122, 123,
+ const __m128 a_k2_4n = _mm_shuffle_ps(
+ a_k2_4nt, a_k2_4nt, _MM_SHUFFLE(1, 0, 3, 2)); // 124, 125, 126, 127,
+ _mm_storeu_ps(&a[0 + j2], a_j2_0n);
+ _mm_storeu_ps(&a[4 + j2], a_j2_4n);
+ _mm_storeu_ps(&a[122 - j2], a_k2_0n);
+ _mm_storeu_ps(&a[126 - j2], a_k2_4n);
+ }
+ // Scalar code for the remaining items.
+ for (; j2 < 64; j1 += 1, j2 += 2) {
+ k2 = 128 - j2;
+ k1 = 32 - j1;
+ wkr = 0.5f - c[k1];
+ wki = c[j1];
+ xr = a[j2 + 0] - a[k2 + 0];
+ xi = a[j2 + 1] + a[k2 + 1];
+ yr = wkr * xr - wki * xi;
+ yi = wkr * xi + wki * xr;
+ a[j2 + 0] -= yr;
+ a[j2 + 1] -= yi;
+ a[k2 + 0] += yr;
+ a[k2 + 1] -= yi;
+ }
+}
+
+static void rftbsub_128_SSE2(float* a) {
+ const float* c = rdft_w + 32;
+ int j1, j2, k1, k2;
+ float wkr, wki, xr, xi, yr, yi;
+
+ static const ALIGN16_BEG float ALIGN16_END
+ k_half[4] = {0.5f, 0.5f, 0.5f, 0.5f};
+ const __m128 mm_half = _mm_load_ps(k_half);
+
+ a[1] = -a[1];
+ // Vectorized code (four at once).
+ // Note: commented number are indexes for the first iteration of the loop.
+ for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) {
+ // Load 'wk'.
+ const __m128 c_j1 = _mm_loadu_ps(&c[j1]); // 1, 2, 3, 4,
+ const __m128 c_k1 = _mm_loadu_ps(&c[29 - j1]); // 28, 29, 30, 31,
+ const __m128 wkrt = _mm_sub_ps(mm_half, c_k1); // 28, 29, 30, 31,
+ const __m128 wkr_ =
+ _mm_shuffle_ps(wkrt, wkrt, _MM_SHUFFLE(0, 1, 2, 3)); // 31, 30, 29, 28,
+ const __m128 wki_ = c_j1; // 1, 2, 3, 4,
+ // Load and shuffle 'a'.
+ const __m128 a_j2_0 = _mm_loadu_ps(&a[0 + j2]); // 2, 3, 4, 5,
+ const __m128 a_j2_4 = _mm_loadu_ps(&a[4 + j2]); // 6, 7, 8, 9,
+ const __m128 a_k2_0 = _mm_loadu_ps(&a[122 - j2]); // 120, 121, 122, 123,
+ const __m128 a_k2_4 = _mm_loadu_ps(&a[126 - j2]); // 124, 125, 126, 127,
+ const __m128 a_j2_p0 = _mm_shuffle_ps(
+ a_j2_0, a_j2_4, _MM_SHUFFLE(2, 0, 2, 0)); // 2, 4, 6, 8,
+ const __m128 a_j2_p1 = _mm_shuffle_ps(
+ a_j2_0, a_j2_4, _MM_SHUFFLE(3, 1, 3, 1)); // 3, 5, 7, 9,
+ const __m128 a_k2_p0 = _mm_shuffle_ps(
+ a_k2_4, a_k2_0, _MM_SHUFFLE(0, 2, 0, 2)); // 126, 124, 122, 120,
+ const __m128 a_k2_p1 = _mm_shuffle_ps(
+ a_k2_4, a_k2_0, _MM_SHUFFLE(1, 3, 1, 3)); // 127, 125, 123, 121,
+ // Calculate 'x'.
+ const __m128 xr_ = _mm_sub_ps(a_j2_p0, a_k2_p0);
+ // 2-126, 4-124, 6-122, 8-120,
+ const __m128 xi_ = _mm_add_ps(a_j2_p1, a_k2_p1);
+ // 3-127, 5-125, 7-123, 9-121,
+ // Calculate product into 'y'.
+ // yr = wkr * xr + wki * xi;
+ // yi = wkr * xi - wki * xr;
+ const __m128 a_ = _mm_mul_ps(wkr_, xr_);
+ const __m128 b_ = _mm_mul_ps(wki_, xi_);
+ const __m128 c_ = _mm_mul_ps(wkr_, xi_);
+ const __m128 d_ = _mm_mul_ps(wki_, xr_);
+ const __m128 yr_ = _mm_add_ps(a_, b_); // 2-126, 4-124, 6-122, 8-120,
+ const __m128 yi_ = _mm_sub_ps(c_, d_); // 3-127, 5-125, 7-123, 9-121,
+ // Update 'a'.
+ // a[j2 + 0] = a[j2 + 0] - yr;
+ // a[j2 + 1] = yi - a[j2 + 1];
+ // a[k2 + 0] = yr + a[k2 + 0];
+ // a[k2 + 1] = yi - a[k2 + 1];
+ const __m128 a_j2_p0n = _mm_sub_ps(a_j2_p0, yr_); // 2, 4, 6, 8,
+ const __m128 a_j2_p1n = _mm_sub_ps(yi_, a_j2_p1); // 3, 5, 7, 9,
+ const __m128 a_k2_p0n = _mm_add_ps(a_k2_p0, yr_); // 126, 124, 122, 120,
+ const __m128 a_k2_p1n = _mm_sub_ps(yi_, a_k2_p1); // 127, 125, 123, 121,
+ // Shuffle in right order and store.
+ const __m128 a_j2_0n = _mm_unpacklo_ps(a_j2_p0n, a_j2_p1n);
+ // 2, 3, 4, 5,
+ const __m128 a_j2_4n = _mm_unpackhi_ps(a_j2_p0n, a_j2_p1n);
+ // 6, 7, 8, 9,
+ const __m128 a_k2_0nt = _mm_unpackhi_ps(a_k2_p0n, a_k2_p1n);
+ // 122, 123, 120, 121,
+ const __m128 a_k2_4nt = _mm_unpacklo_ps(a_k2_p0n, a_k2_p1n);
+ // 126, 127, 124, 125,
+ const __m128 a_k2_0n = _mm_shuffle_ps(
+ a_k2_0nt, a_k2_0nt, _MM_SHUFFLE(1, 0, 3, 2)); // 120, 121, 122, 123,
+ const __m128 a_k2_4n = _mm_shuffle_ps(
+ a_k2_4nt, a_k2_4nt, _MM_SHUFFLE(1, 0, 3, 2)); // 124, 125, 126, 127,
+ _mm_storeu_ps(&a[0 + j2], a_j2_0n);
+ _mm_storeu_ps(&a[4 + j2], a_j2_4n);
+ _mm_storeu_ps(&a[122 - j2], a_k2_0n);
+ _mm_storeu_ps(&a[126 - j2], a_k2_4n);
+ }
+ // Scalar code for the remaining items.
+ for (; j2 < 64; j1 += 1, j2 += 2) {
+ k2 = 128 - j2;
+ k1 = 32 - j1;
+ wkr = 0.5f - c[k1];
+ wki = c[j1];
+ xr = a[j2 + 0] - a[k2 + 0];
+ xi = a[j2 + 1] + a[k2 + 1];
+ yr = wkr * xr + wki * xi;
+ yi = wkr * xi - wki * xr;
+ a[j2 + 0] = a[j2 + 0] - yr;
+ a[j2 + 1] = yi - a[j2 + 1];
+ a[k2 + 0] = yr + a[k2 + 0];
+ a[k2 + 1] = yi - a[k2 + 1];
+ }
+ a[65] = -a[65];
+}
+
+void aec_rdft_init_sse2(void) {
+ cft1st_128 = cft1st_128_SSE2;
+ cftmdl_128 = cftmdl_128_SSE2;
+ rftfsub_128 = rftfsub_128_SSE2;
+ rftbsub_128 = rftbsub_128_SSE2;
+}
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_resampler.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_resampler.c
new file mode 100644
index 00000000..99c39efa
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_resampler.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+/* Resamples a signal to an arbitrary rate. Used by the AEC to compensate for
+ * clock skew by resampling the farend signal.
+ */
+
+#include "webrtc/modules/audio_processing/aec/aec_resampler.h"
+
+#include <assert.h>
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "webrtc/modules/audio_processing/aec/aec_core.h"
+
+enum {
+ kEstimateLengthFrames = 400
+};
+
+typedef struct {
+ float buffer[kResamplerBufferSize];
+ float position;
+
+ int deviceSampleRateHz;
+ int skewData[kEstimateLengthFrames];
+ int skewDataIndex;
+ float skewEstimate;
+} AecResampler;
+
+static int EstimateSkew(const int* rawSkew,
+ int size,
+ int absLimit,
+ float* skewEst);
+
+void* WebRtcAec_CreateResampler() {
+ return malloc(sizeof(AecResampler));
+}
+
+int WebRtcAec_InitResampler(void* resampInst, int deviceSampleRateHz) {
+ AecResampler* obj = (AecResampler*)resampInst;
+ memset(obj->buffer, 0, sizeof(obj->buffer));
+ obj->position = 0.0;
+
+ obj->deviceSampleRateHz = deviceSampleRateHz;
+ memset(obj->skewData, 0, sizeof(obj->skewData));
+ obj->skewDataIndex = 0;
+ obj->skewEstimate = 0.0;
+
+ return 0;
+}
+
+void WebRtcAec_FreeResampler(void* resampInst) {
+ AecResampler* obj = (AecResampler*)resampInst;
+ free(obj);
+}
+
+void WebRtcAec_ResampleLinear(void* resampInst,
+ const float* inspeech,
+ size_t size,
+ float skew,
+ float* outspeech,
+ size_t* size_out) {
+ AecResampler* obj = (AecResampler*)resampInst;
+
+ float* y;
+ float be, tnew;
+ size_t tn, mm;
+
+ assert(size <= 2 * FRAME_LEN);
+ assert(resampInst != NULL);
+ assert(inspeech != NULL);
+ assert(outspeech != NULL);
+ assert(size_out != NULL);
+
+ // Add new frame data in lookahead
+ memcpy(&obj->buffer[FRAME_LEN + kResamplingDelay],
+ inspeech,
+ size * sizeof(inspeech[0]));
+
+ // Sample rate ratio
+ be = 1 + skew;
+
+ // Loop over input frame
+ mm = 0;
+ y = &obj->buffer[FRAME_LEN]; // Point at current frame
+
+ tnew = be * mm + obj->position;
+ tn = (size_t)tnew;
+
+ while (tn < size) {
+
+ // Interpolation
+ outspeech[mm] = y[tn] + (tnew - tn) * (y[tn + 1] - y[tn]);
+ mm++;
+
+ tnew = be * mm + obj->position;
+ tn = (int)tnew;
+ }
+
+ *size_out = mm;
+ obj->position += (*size_out) * be - size;
+
+ // Shift buffer
+ memmove(obj->buffer,
+ &obj->buffer[size],
+ (kResamplerBufferSize - size) * sizeof(obj->buffer[0]));
+}
+
+int WebRtcAec_GetSkew(void* resampInst, int rawSkew, float* skewEst) {
+ AecResampler* obj = (AecResampler*)resampInst;
+ int err = 0;
+
+ if (obj->skewDataIndex < kEstimateLengthFrames) {
+ obj->skewData[obj->skewDataIndex] = rawSkew;
+ obj->skewDataIndex++;
+ } else if (obj->skewDataIndex == kEstimateLengthFrames) {
+ err = EstimateSkew(
+ obj->skewData, kEstimateLengthFrames, obj->deviceSampleRateHz, skewEst);
+ obj->skewEstimate = *skewEst;
+ obj->skewDataIndex++;
+ } else {
+ *skewEst = obj->skewEstimate;
+ }
+
+ return err;
+}
+
+int EstimateSkew(const int* rawSkew,
+ int size,
+ int deviceSampleRateHz,
+ float* skewEst) {
+ const int absLimitOuter = (int)(0.04f * deviceSampleRateHz);
+ const int absLimitInner = (int)(0.0025f * deviceSampleRateHz);
+ int i = 0;
+ int n = 0;
+ float rawAvg = 0;
+ float err = 0;
+ float rawAbsDev = 0;
+ int upperLimit = 0;
+ int lowerLimit = 0;
+ float cumSum = 0;
+ float x = 0;
+ float x2 = 0;
+ float y = 0;
+ float xy = 0;
+ float xAvg = 0;
+ float denom = 0;
+ float skew = 0;
+
+ *skewEst = 0; // Set in case of error below.
+ for (i = 0; i < size; i++) {
+ if ((rawSkew[i] < absLimitOuter && rawSkew[i] > -absLimitOuter)) {
+ n++;
+ rawAvg += rawSkew[i];
+ }
+ }
+
+ if (n == 0) {
+ return -1;
+ }
+ assert(n > 0);
+ rawAvg /= n;
+
+ for (i = 0; i < size; i++) {
+ if ((rawSkew[i] < absLimitOuter && rawSkew[i] > -absLimitOuter)) {
+ err = rawSkew[i] - rawAvg;
+ rawAbsDev += err >= 0 ? err : -err;
+ }
+ }
+ assert(n > 0);
+ rawAbsDev /= n;
+ upperLimit = (int)(rawAvg + 5 * rawAbsDev + 1); // +1 for ceiling.
+ lowerLimit = (int)(rawAvg - 5 * rawAbsDev - 1); // -1 for floor.
+
+ n = 0;
+ for (i = 0; i < size; i++) {
+ if ((rawSkew[i] < absLimitInner && rawSkew[i] > -absLimitInner) ||
+ (rawSkew[i] < upperLimit && rawSkew[i] > lowerLimit)) {
+ n++;
+ cumSum += rawSkew[i];
+ x += n;
+ x2 += n * n;
+ y += cumSum;
+ xy += n * cumSum;
+ }
+ }
+
+ if (n == 0) {
+ return -1;
+ }
+ assert(n > 0);
+ xAvg = x / n;
+ denom = x2 - xAvg * x;
+
+ if (denom != 0) {
+ skew = (xy - xAvg * y) / denom;
+ }
+
+ *skewEst = skew;
+ return 0;
+}
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_resampler.h b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_resampler.h
new file mode 100644
index 00000000..a5002c15
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_resampler.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_RESAMPLER_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_RESAMPLER_H_
+
+#include "webrtc/modules/audio_processing/aec/aec_core.h"
+
+enum {
+ kResamplingDelay = 1
+};
+enum {
+ kResamplerBufferSize = FRAME_LEN * 4
+};
+
+// Unless otherwise specified, functions return 0 on success and -1 on error.
+void* WebRtcAec_CreateResampler(); // Returns NULL on error.
+int WebRtcAec_InitResampler(void* resampInst, int deviceSampleRateHz);
+void WebRtcAec_FreeResampler(void* resampInst);
+
+// Estimates skew from raw measurement.
+int WebRtcAec_GetSkew(void* resampInst, int rawSkew, float* skewEst);
+
+// Resamples input using linear interpolation.
+void WebRtcAec_ResampleLinear(void* resampInst,
+ const float* inspeech,
+ size_t size,
+ float skew,
+ float* outspeech,
+ size_t* size_out);
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_RESAMPLER_H_
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation.c
new file mode 100644
index 00000000..0f5cd31d
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation.c
@@ -0,0 +1,923 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * Contains the API functions for the AEC.
+ */
+#include "webrtc/modules/audio_processing/aec/include/echo_cancellation.h"
+
+#include <math.h>
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+#include <stdio.h>
+#endif
+#include <stdlib.h>
+#include <string.h>
+
+#include "webrtc/common_audio/ring_buffer.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_processing/aec/aec_core.h"
+#include "webrtc/modules/audio_processing/aec/aec_resampler.h"
+#include "webrtc/modules/audio_processing/aec/echo_cancellation_internal.h"
+#include "webrtc/typedefs.h"
+
+// Measured delays [ms]
+// Device Chrome GTP
+// MacBook Air 10
+// MacBook Retina 10 100
+// MacPro 30?
+//
+// Win7 Desktop 70 80?
+// Win7 T430s 110
+// Win8 T420s 70
+//
+// Daisy 50
+// Pixel (w/ preproc?) 240
+// Pixel (w/o preproc?) 110 110
+
+// The extended filter mode gives us the flexibility to ignore the system's
+// reported delays. We do this for platforms which we believe provide results
+// which are incompatible with the AEC's expectations. Based on measurements
+// (some provided above) we set a conservative (i.e. lower than measured)
+// fixed delay.
+//
+// WEBRTC_UNTRUSTED_DELAY will only have an impact when |extended_filter_mode|
+// is enabled. See the note along with |DelayCorrection| in
+// echo_cancellation_impl.h for more details on the mode.
+//
+// Justification:
+// Chromium/Mac: Here, the true latency is so low (~10-20 ms), that it plays
+// havoc with the AEC's buffering. To avoid this, we set a fixed delay of 20 ms
+// and then compensate by rewinding by 10 ms (in wideband) through
+// kDelayDiffOffsetSamples. This trick does not seem to work for larger rewind
+// values, but fortunately this is sufficient.
+//
+// Chromium/Linux(ChromeOS): The values we get on this platform don't correspond
+// well to reality. The variance doesn't match the AEC's buffer changes, and the
+// bulk values tend to be too low. However, the range across different hardware
+// appears to be too large to choose a single value.
+//
+// GTP/Linux(ChromeOS): TBD, but for the moment we will trust the values.
+#if defined(WEBRTC_CHROMIUM_BUILD) && defined(WEBRTC_MAC)
+#define WEBRTC_UNTRUSTED_DELAY
+#endif
+
+#if defined(WEBRTC_UNTRUSTED_DELAY) && defined(WEBRTC_MAC)
+static const int kDelayDiffOffsetSamples = -160;
+#else
+// Not enabled for now.
+static const int kDelayDiffOffsetSamples = 0;
+#endif
+
+#if defined(WEBRTC_MAC)
+static const int kFixedDelayMs = 20;
+#else
+static const int kFixedDelayMs = 50;
+#endif
+#if !defined(WEBRTC_UNTRUSTED_DELAY)
+static const int kMinTrustedDelayMs = 20;
+#endif
+static const int kMaxTrustedDelayMs = 500;
+
+// Maximum length of resampled signal. Must be an integer multiple of frames
+// (ceil(1/(1 + MIN_SKEW)*2) + 1)*FRAME_LEN
+// The factor of 2 handles wb, and the + 1 is as a safety margin
+// TODO(bjornv): Replace with kResamplerBufferSize
+#define MAX_RESAMP_LEN (5 * FRAME_LEN)
+
+static const int kMaxBufSizeStart = 62; // In partitions
+static const int sampMsNb = 8; // samples per ms in nb
+static const int initCheck = 42;
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+int webrtc_aec_instance_count = 0;
+#endif
+
+// Estimates delay to set the position of the far-end buffer read pointer
+// (controlled by knownDelay)
+static void EstBufDelayNormal(Aec* aecInst);
+static void EstBufDelayExtended(Aec* aecInst);
+static int ProcessNormal(Aec* self,
+ const float* const* near,
+ size_t num_bands,
+ float* const* out,
+ size_t num_samples,
+ int16_t reported_delay_ms,
+ int32_t skew);
+static void ProcessExtended(Aec* self,
+ const float* const* near,
+ size_t num_bands,
+ float* const* out,
+ size_t num_samples,
+ int16_t reported_delay_ms,
+ int32_t skew);
+
+void* WebRtcAec_Create() {
+ Aec* aecpc = malloc(sizeof(Aec));
+
+ if (!aecpc) {
+ return NULL;
+ }
+
+ aecpc->aec = WebRtcAec_CreateAec();
+ if (!aecpc->aec) {
+ WebRtcAec_Free(aecpc);
+ return NULL;
+ }
+ aecpc->resampler = WebRtcAec_CreateResampler();
+ if (!aecpc->resampler) {
+ WebRtcAec_Free(aecpc);
+ return NULL;
+ }
+ // Create far-end pre-buffer. The buffer size has to be large enough for
+ // largest possible drift compensation (kResamplerBufferSize) + "almost" an
+ // FFT buffer (PART_LEN2 - 1).
+ aecpc->far_pre_buf =
+ WebRtc_CreateBuffer(PART_LEN2 + kResamplerBufferSize, sizeof(float));
+ if (!aecpc->far_pre_buf) {
+ WebRtcAec_Free(aecpc);
+ return NULL;
+ }
+
+ aecpc->initFlag = 0;
+ aecpc->lastError = 0;
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+ {
+ char filename[64];
+ sprintf(filename, "aec_buf%d.dat", webrtc_aec_instance_count);
+ aecpc->bufFile = fopen(filename, "wb");
+ sprintf(filename, "aec_skew%d.dat", webrtc_aec_instance_count);
+ aecpc->skewFile = fopen(filename, "wb");
+ sprintf(filename, "aec_delay%d.dat", webrtc_aec_instance_count);
+ aecpc->delayFile = fopen(filename, "wb");
+ webrtc_aec_instance_count++;
+ }
+#endif
+
+ return aecpc;
+}
+
+void WebRtcAec_Free(void* aecInst) {
+ Aec* aecpc = aecInst;
+
+ if (aecpc == NULL) {
+ return;
+ }
+
+ WebRtc_FreeBuffer(aecpc->far_pre_buf);
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+ fclose(aecpc->bufFile);
+ fclose(aecpc->skewFile);
+ fclose(aecpc->delayFile);
+#endif
+
+ WebRtcAec_FreeAec(aecpc->aec);
+ WebRtcAec_FreeResampler(aecpc->resampler);
+ free(aecpc);
+}
+
+int32_t WebRtcAec_Init(void* aecInst, int32_t sampFreq, int32_t scSampFreq) {
+ Aec* aecpc = aecInst;
+ AecConfig aecConfig;
+
+ if (sampFreq != 8000 &&
+ sampFreq != 16000 &&
+ sampFreq != 32000 &&
+ sampFreq != 48000) {
+ aecpc->lastError = AEC_BAD_PARAMETER_ERROR;
+ return -1;
+ }
+ aecpc->sampFreq = sampFreq;
+
+ if (scSampFreq < 1 || scSampFreq > 96000) {
+ aecpc->lastError = AEC_BAD_PARAMETER_ERROR;
+ return -1;
+ }
+ aecpc->scSampFreq = scSampFreq;
+
+ // Initialize echo canceller core
+ if (WebRtcAec_InitAec(aecpc->aec, aecpc->sampFreq) == -1) {
+ aecpc->lastError = AEC_UNSPECIFIED_ERROR;
+ return -1;
+ }
+
+ if (WebRtcAec_InitResampler(aecpc->resampler, aecpc->scSampFreq) == -1) {
+ aecpc->lastError = AEC_UNSPECIFIED_ERROR;
+ return -1;
+ }
+
+ WebRtc_InitBuffer(aecpc->far_pre_buf);
+ WebRtc_MoveReadPtr(aecpc->far_pre_buf, -PART_LEN); // Start overlap.
+
+ aecpc->initFlag = initCheck; // indicates that initialization has been done
+
+ if (aecpc->sampFreq == 32000 || aecpc->sampFreq == 48000) {
+ aecpc->splitSampFreq = 16000;
+ } else {
+ aecpc->splitSampFreq = sampFreq;
+ }
+
+ aecpc->delayCtr = 0;
+ aecpc->sampFactor = (aecpc->scSampFreq * 1.0f) / aecpc->splitSampFreq;
+ // Sampling frequency multiplier (SWB is processed as 160 frame size).
+ aecpc->rate_factor = aecpc->splitSampFreq / 8000;
+
+ aecpc->sum = 0;
+ aecpc->counter = 0;
+ aecpc->checkBuffSize = 1;
+ aecpc->firstVal = 0;
+
+ // We skip the startup_phase completely (setting to 0) if DA-AEC is enabled,
+ // but not extended_filter mode.
+ aecpc->startup_phase = WebRtcAec_extended_filter_enabled(aecpc->aec) ||
+ !WebRtcAec_delay_agnostic_enabled(aecpc->aec);
+ aecpc->bufSizeStart = 0;
+ aecpc->checkBufSizeCtr = 0;
+ aecpc->msInSndCardBuf = 0;
+ aecpc->filtDelay = -1; // -1 indicates an initialized state.
+ aecpc->timeForDelayChange = 0;
+ aecpc->knownDelay = 0;
+ aecpc->lastDelayDiff = 0;
+
+ aecpc->skewFrCtr = 0;
+ aecpc->resample = kAecFalse;
+ aecpc->highSkewCtr = 0;
+ aecpc->skew = 0;
+
+ aecpc->farend_started = 0;
+
+ // Default settings.
+ aecConfig.nlpMode = kAecNlpModerate;
+ aecConfig.skewMode = kAecFalse;
+ aecConfig.metricsMode = kAecFalse;
+ aecConfig.delay_logging = kAecFalse;
+
+ if (WebRtcAec_set_config(aecpc, aecConfig) == -1) {
+ aecpc->lastError = AEC_UNSPECIFIED_ERROR;
+ return -1;
+ }
+
+ return 0;
+}
+
+// only buffer L band for farend
+int32_t WebRtcAec_BufferFarend(void* aecInst,
+ const float* farend,
+ size_t nrOfSamples) {
+ Aec* aecpc = aecInst;
+ size_t newNrOfSamples = nrOfSamples;
+ float new_farend[MAX_RESAMP_LEN];
+ const float* farend_ptr = farend;
+
+ if (farend == NULL) {
+ aecpc->lastError = AEC_NULL_POINTER_ERROR;
+ return -1;
+ }
+
+ if (aecpc->initFlag != initCheck) {
+ aecpc->lastError = AEC_UNINITIALIZED_ERROR;
+ return -1;
+ }
+
+ // number of samples == 160 for SWB input
+ if (nrOfSamples != 80 && nrOfSamples != 160) {
+ aecpc->lastError = AEC_BAD_PARAMETER_ERROR;
+ return -1;
+ }
+
+ if (aecpc->skewMode == kAecTrue && aecpc->resample == kAecTrue) {
+ // Resample and get a new number of samples
+ WebRtcAec_ResampleLinear(aecpc->resampler,
+ farend,
+ nrOfSamples,
+ aecpc->skew,
+ new_farend,
+ &newNrOfSamples);
+ farend_ptr = new_farend;
+ }
+
+ aecpc->farend_started = 1;
+ WebRtcAec_SetSystemDelay(
+ aecpc->aec, WebRtcAec_system_delay(aecpc->aec) + (int)newNrOfSamples);
+
+ // Write the time-domain data to |far_pre_buf|.
+ WebRtc_WriteBuffer(aecpc->far_pre_buf, farend_ptr, newNrOfSamples);
+
+ // Transform to frequency domain if we have enough data.
+ while (WebRtc_available_read(aecpc->far_pre_buf) >= PART_LEN2) {
+ // We have enough data to pass to the FFT, hence read PART_LEN2 samples.
+ {
+ float* ptmp = NULL;
+ float tmp[PART_LEN2];
+ WebRtc_ReadBuffer(aecpc->far_pre_buf, (void**)&ptmp, tmp, PART_LEN2);
+ WebRtcAec_BufferFarendPartition(aecpc->aec, ptmp);
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+ WebRtc_WriteBuffer(
+ WebRtcAec_far_time_buf(aecpc->aec), &ptmp[PART_LEN], 1);
+#endif
+ }
+
+ // Rewind |far_pre_buf| PART_LEN samples for overlap before continuing.
+ WebRtc_MoveReadPtr(aecpc->far_pre_buf, -PART_LEN);
+ }
+
+ return 0;
+}
+
+int32_t WebRtcAec_Process(void* aecInst,
+ const float* const* nearend,
+ size_t num_bands,
+ float* const* out,
+ size_t nrOfSamples,
+ int16_t msInSndCardBuf,
+ int32_t skew) {
+ Aec* aecpc = aecInst;
+ int32_t retVal = 0;
+
+ if (out == NULL) {
+ aecpc->lastError = AEC_NULL_POINTER_ERROR;
+ return -1;
+ }
+
+ if (aecpc->initFlag != initCheck) {
+ aecpc->lastError = AEC_UNINITIALIZED_ERROR;
+ return -1;
+ }
+
+ // number of samples == 160 for SWB input
+ if (nrOfSamples != 80 && nrOfSamples != 160) {
+ aecpc->lastError = AEC_BAD_PARAMETER_ERROR;
+ return -1;
+ }
+
+ if (msInSndCardBuf < 0) {
+ msInSndCardBuf = 0;
+ aecpc->lastError = AEC_BAD_PARAMETER_WARNING;
+ retVal = -1;
+ } else if (msInSndCardBuf > kMaxTrustedDelayMs) {
+ // The clamping is now done in ProcessExtended/Normal().
+ aecpc->lastError = AEC_BAD_PARAMETER_WARNING;
+ retVal = -1;
+ }
+
+ // This returns the value of aec->extended_filter_enabled.
+ if (WebRtcAec_extended_filter_enabled(aecpc->aec)) {
+ ProcessExtended(aecpc,
+ nearend,
+ num_bands,
+ out,
+ nrOfSamples,
+ msInSndCardBuf,
+ skew);
+ } else {
+ if (ProcessNormal(aecpc,
+ nearend,
+ num_bands,
+ out,
+ nrOfSamples,
+ msInSndCardBuf,
+ skew) != 0) {
+ retVal = -1;
+ }
+ }
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+ {
+ int16_t far_buf_size_ms = (int16_t)(WebRtcAec_system_delay(aecpc->aec) /
+ (sampMsNb * aecpc->rate_factor));
+ (void)fwrite(&far_buf_size_ms, 2, 1, aecpc->bufFile);
+ (void)fwrite(
+ &aecpc->knownDelay, sizeof(aecpc->knownDelay), 1, aecpc->delayFile);
+ }
+#endif
+
+ return retVal;
+}
+
+int WebRtcAec_set_config(void* handle, AecConfig config) {
+ Aec* self = (Aec*)handle;
+ if (self->initFlag != initCheck) {
+ self->lastError = AEC_UNINITIALIZED_ERROR;
+ return -1;
+ }
+
+ if (config.skewMode != kAecFalse && config.skewMode != kAecTrue) {
+ self->lastError = AEC_BAD_PARAMETER_ERROR;
+ return -1;
+ }
+ self->skewMode = config.skewMode;
+
+ if (config.nlpMode != kAecNlpConservative &&
+ config.nlpMode != kAecNlpModerate &&
+ config.nlpMode != kAecNlpAggressive) {
+ self->lastError = AEC_BAD_PARAMETER_ERROR;
+ return -1;
+ }
+
+ if (config.metricsMode != kAecFalse && config.metricsMode != kAecTrue) {
+ self->lastError = AEC_BAD_PARAMETER_ERROR;
+ return -1;
+ }
+
+ if (config.delay_logging != kAecFalse && config.delay_logging != kAecTrue) {
+ self->lastError = AEC_BAD_PARAMETER_ERROR;
+ return -1;
+ }
+
+ WebRtcAec_SetConfigCore(
+ self->aec, config.nlpMode, config.metricsMode, config.delay_logging);
+ return 0;
+}
+
+int WebRtcAec_get_echo_status(void* handle, int* status) {
+ Aec* self = (Aec*)handle;
+ if (status == NULL) {
+ self->lastError = AEC_NULL_POINTER_ERROR;
+ return -1;
+ }
+ if (self->initFlag != initCheck) {
+ self->lastError = AEC_UNINITIALIZED_ERROR;
+ return -1;
+ }
+
+ *status = WebRtcAec_echo_state(self->aec);
+
+ return 0;
+}
+
+int WebRtcAec_GetMetrics(void* handle, AecMetrics* metrics) {
+ const float kUpWeight = 0.7f;
+ float dtmp;
+ int stmp;
+ Aec* self = (Aec*)handle;
+ Stats erl;
+ Stats erle;
+ Stats a_nlp;
+
+ if (handle == NULL) {
+ return -1;
+ }
+ if (metrics == NULL) {
+ self->lastError = AEC_NULL_POINTER_ERROR;
+ return -1;
+ }
+ if (self->initFlag != initCheck) {
+ self->lastError = AEC_UNINITIALIZED_ERROR;
+ return -1;
+ }
+
+ WebRtcAec_GetEchoStats(self->aec, &erl, &erle, &a_nlp);
+
+ // ERL
+ metrics->erl.instant = (int)erl.instant;
+
+ if ((erl.himean > kOffsetLevel) && (erl.average > kOffsetLevel)) {
+ // Use a mix between regular average and upper part average.
+ dtmp = kUpWeight * erl.himean + (1 - kUpWeight) * erl.average;
+ metrics->erl.average = (int)dtmp;
+ } else {
+ metrics->erl.average = kOffsetLevel;
+ }
+
+ metrics->erl.max = (int)erl.max;
+
+ if (erl.min < (kOffsetLevel * (-1))) {
+ metrics->erl.min = (int)erl.min;
+ } else {
+ metrics->erl.min = kOffsetLevel;
+ }
+
+ // ERLE
+ metrics->erle.instant = (int)erle.instant;
+
+ if ((erle.himean > kOffsetLevel) && (erle.average > kOffsetLevel)) {
+ // Use a mix between regular average and upper part average.
+ dtmp = kUpWeight * erle.himean + (1 - kUpWeight) * erle.average;
+ metrics->erle.average = (int)dtmp;
+ } else {
+ metrics->erle.average = kOffsetLevel;
+ }
+
+ metrics->erle.max = (int)erle.max;
+
+ if (erle.min < (kOffsetLevel * (-1))) {
+ metrics->erle.min = (int)erle.min;
+ } else {
+ metrics->erle.min = kOffsetLevel;
+ }
+
+ // RERL
+ if ((metrics->erl.average > kOffsetLevel) &&
+ (metrics->erle.average > kOffsetLevel)) {
+ stmp = metrics->erl.average + metrics->erle.average;
+ } else {
+ stmp = kOffsetLevel;
+ }
+ metrics->rerl.average = stmp;
+
+ // No other statistics needed, but returned for completeness.
+ metrics->rerl.instant = stmp;
+ metrics->rerl.max = stmp;
+ metrics->rerl.min = stmp;
+
+ // A_NLP
+ metrics->aNlp.instant = (int)a_nlp.instant;
+
+ if ((a_nlp.himean > kOffsetLevel) && (a_nlp.average > kOffsetLevel)) {
+ // Use a mix between regular average and upper part average.
+ dtmp = kUpWeight * a_nlp.himean + (1 - kUpWeight) * a_nlp.average;
+ metrics->aNlp.average = (int)dtmp;
+ } else {
+ metrics->aNlp.average = kOffsetLevel;
+ }
+
+ metrics->aNlp.max = (int)a_nlp.max;
+
+ if (a_nlp.min < (kOffsetLevel * (-1))) {
+ metrics->aNlp.min = (int)a_nlp.min;
+ } else {
+ metrics->aNlp.min = kOffsetLevel;
+ }
+
+ return 0;
+}
+
+int WebRtcAec_GetDelayMetrics(void* handle,
+ int* median,
+ int* std,
+ float* fraction_poor_delays) {
+ Aec* self = handle;
+ if (median == NULL) {
+ self->lastError = AEC_NULL_POINTER_ERROR;
+ return -1;
+ }
+ if (std == NULL) {
+ self->lastError = AEC_NULL_POINTER_ERROR;
+ return -1;
+ }
+ if (self->initFlag != initCheck) {
+ self->lastError = AEC_UNINITIALIZED_ERROR;
+ return -1;
+ }
+ if (WebRtcAec_GetDelayMetricsCore(self->aec, median, std,
+ fraction_poor_delays) ==
+ -1) {
+ // Logging disabled.
+ self->lastError = AEC_UNSUPPORTED_FUNCTION_ERROR;
+ return -1;
+ }
+
+ return 0;
+}
+
+int32_t WebRtcAec_get_error_code(void* aecInst) {
+ Aec* aecpc = aecInst;
+ return aecpc->lastError;
+}
+
+AecCore* WebRtcAec_aec_core(void* handle) {
+ if (!handle) {
+ return NULL;
+ }
+ return ((Aec*)handle)->aec;
+}
+
+static int ProcessNormal(Aec* aecpc,
+ const float* const* nearend,
+ size_t num_bands,
+ float* const* out,
+ size_t nrOfSamples,
+ int16_t msInSndCardBuf,
+ int32_t skew) {
+ int retVal = 0;
+ size_t i;
+ size_t nBlocks10ms;
+ // Limit resampling to doubling/halving of signal
+ const float minSkewEst = -0.5f;
+ const float maxSkewEst = 1.0f;
+
+ msInSndCardBuf =
+ msInSndCardBuf > kMaxTrustedDelayMs ? kMaxTrustedDelayMs : msInSndCardBuf;
+ // TODO(andrew): we need to investigate if this +10 is really wanted.
+ msInSndCardBuf += 10;
+ aecpc->msInSndCardBuf = msInSndCardBuf;
+
+ if (aecpc->skewMode == kAecTrue) {
+ if (aecpc->skewFrCtr < 25) {
+ aecpc->skewFrCtr++;
+ } else {
+ retVal = WebRtcAec_GetSkew(aecpc->resampler, skew, &aecpc->skew);
+ if (retVal == -1) {
+ aecpc->skew = 0;
+ aecpc->lastError = AEC_BAD_PARAMETER_WARNING;
+ }
+
+ aecpc->skew /= aecpc->sampFactor * nrOfSamples;
+
+ if (aecpc->skew < 1.0e-3 && aecpc->skew > -1.0e-3) {
+ aecpc->resample = kAecFalse;
+ } else {
+ aecpc->resample = kAecTrue;
+ }
+
+ if (aecpc->skew < minSkewEst) {
+ aecpc->skew = minSkewEst;
+ } else if (aecpc->skew > maxSkewEst) {
+ aecpc->skew = maxSkewEst;
+ }
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+ (void)fwrite(&aecpc->skew, sizeof(aecpc->skew), 1, aecpc->skewFile);
+#endif
+ }
+ }
+
+ nBlocks10ms = nrOfSamples / (FRAME_LEN * aecpc->rate_factor);
+
+ if (aecpc->startup_phase) {
+ for (i = 0; i < num_bands; ++i) {
+ // Only needed if they don't already point to the same place.
+ if (nearend[i] != out[i]) {
+ memcpy(out[i], nearend[i], sizeof(nearend[i][0]) * nrOfSamples);
+ }
+ }
+
+ // The AEC is in the start up mode
+ // AEC is disabled until the system delay is OK
+
+ // Mechanism to ensure that the system delay is reasonably stable.
+ if (aecpc->checkBuffSize) {
+ aecpc->checkBufSizeCtr++;
+ // Before we fill up the far-end buffer we require the system delay
+ // to be stable (+/-8 ms) compared to the first value. This
+ // comparison is made during the following 6 consecutive 10 ms
+ // blocks. If it seems to be stable then we start to fill up the
+ // far-end buffer.
+ if (aecpc->counter == 0) {
+ aecpc->firstVal = aecpc->msInSndCardBuf;
+ aecpc->sum = 0;
+ }
+
+ if (abs(aecpc->firstVal - aecpc->msInSndCardBuf) <
+ WEBRTC_SPL_MAX(0.2 * aecpc->msInSndCardBuf, sampMsNb)) {
+ aecpc->sum += aecpc->msInSndCardBuf;
+ aecpc->counter++;
+ } else {
+ aecpc->counter = 0;
+ }
+
+ if (aecpc->counter * nBlocks10ms >= 6) {
+ // The far-end buffer size is determined in partitions of
+ // PART_LEN samples. Use 75% of the average value of the system
+ // delay as buffer size to start with.
+ aecpc->bufSizeStart =
+ WEBRTC_SPL_MIN((3 * aecpc->sum * aecpc->rate_factor * 8) /
+ (4 * aecpc->counter * PART_LEN),
+ kMaxBufSizeStart);
+ // Buffer size has now been determined.
+ aecpc->checkBuffSize = 0;
+ }
+
+ if (aecpc->checkBufSizeCtr * nBlocks10ms > 50) {
+ // For really bad systems, don't disable the echo canceller for
+ // more than 0.5 sec.
+ aecpc->bufSizeStart = WEBRTC_SPL_MIN(
+ (aecpc->msInSndCardBuf * aecpc->rate_factor * 3) / 40,
+ kMaxBufSizeStart);
+ aecpc->checkBuffSize = 0;
+ }
+ }
+
+ // If |checkBuffSize| changed in the if-statement above.
+ if (!aecpc->checkBuffSize) {
+ // The system delay is now reasonably stable (or has been unstable
+ // for too long). When the far-end buffer is filled with
+ // approximately the same amount of data as reported by the system
+ // we end the startup phase.
+ int overhead_elements =
+ WebRtcAec_system_delay(aecpc->aec) / PART_LEN - aecpc->bufSizeStart;
+ if (overhead_elements == 0) {
+ // Enable the AEC
+ aecpc->startup_phase = 0;
+ } else if (overhead_elements > 0) {
+ // TODO(bjornv): Do we need a check on how much we actually
+ // moved the read pointer? It should always be possible to move
+ // the pointer |overhead_elements| since we have only added data
+ // to the buffer and no delay compensation nor AEC processing
+ // has been done.
+ WebRtcAec_MoveFarReadPtr(aecpc->aec, overhead_elements);
+
+ // Enable the AEC
+ aecpc->startup_phase = 0;
+ }
+ }
+ } else {
+ // AEC is enabled.
+ EstBufDelayNormal(aecpc);
+
+ // Call the AEC.
+ // TODO(bjornv): Re-structure such that we don't have to pass
+ // |aecpc->knownDelay| as input. Change name to something like
+ // |system_buffer_diff|.
+ WebRtcAec_ProcessFrames(aecpc->aec,
+ nearend,
+ num_bands,
+ nrOfSamples,
+ aecpc->knownDelay,
+ out);
+ }
+
+ return retVal;
+}
+
+static void ProcessExtended(Aec* self,
+ const float* const* near,
+ size_t num_bands,
+ float* const* out,
+ size_t num_samples,
+ int16_t reported_delay_ms,
+ int32_t skew) {
+ size_t i;
+ const int delay_diff_offset = kDelayDiffOffsetSamples;
+#if defined(WEBRTC_UNTRUSTED_DELAY)
+ reported_delay_ms = kFixedDelayMs;
+#else
+ // This is the usual mode where we trust the reported system delay values.
+ // Due to the longer filter, we no longer add 10 ms to the reported delay
+ // to reduce chance of non-causality. Instead we apply a minimum here to avoid
+ // issues with the read pointer jumping around needlessly.
+ reported_delay_ms = reported_delay_ms < kMinTrustedDelayMs
+ ? kMinTrustedDelayMs
+ : reported_delay_ms;
+ // If the reported delay appears to be bogus, we attempt to recover by using
+ // the measured fixed delay values. We use >= here because higher layers
+ // may already clamp to this maximum value, and we would otherwise not
+ // detect it here.
+ reported_delay_ms = reported_delay_ms >= kMaxTrustedDelayMs
+ ? kFixedDelayMs
+ : reported_delay_ms;
+#endif
+ self->msInSndCardBuf = reported_delay_ms;
+
+ if (!self->farend_started) {
+ for (i = 0; i < num_bands; ++i) {
+ // Only needed if they don't already point to the same place.
+ if (near[i] != out[i]) {
+ memcpy(out[i], near[i], sizeof(near[i][0]) * num_samples);
+ }
+ }
+ return;
+ }
+ if (self->startup_phase) {
+ // In the extended mode, there isn't a startup "phase", just a special
+ // action on the first frame. In the trusted delay case, we'll take the
+ // current reported delay, unless it's less then our conservative
+ // measurement.
+ int startup_size_ms =
+ reported_delay_ms < kFixedDelayMs ? kFixedDelayMs : reported_delay_ms;
+#if defined(WEBRTC_ANDROID)
+ int target_delay = startup_size_ms * self->rate_factor * 8;
+#else
+ // To avoid putting the AEC in a non-causal state we're being slightly
+ // conservative and scale by 2. On Android we use a fixed delay and
+ // therefore there is no need to scale the target_delay.
+ int target_delay = startup_size_ms * self->rate_factor * 8 / 2;
+#endif
+ int overhead_elements =
+ (WebRtcAec_system_delay(self->aec) - target_delay) / PART_LEN;
+ WebRtcAec_MoveFarReadPtr(self->aec, overhead_elements);
+ self->startup_phase = 0;
+ }
+
+ EstBufDelayExtended(self);
+
+ {
+ // |delay_diff_offset| gives us the option to manually rewind the delay on
+ // very low delay platforms which can't be expressed purely through
+ // |reported_delay_ms|.
+ const int adjusted_known_delay =
+ WEBRTC_SPL_MAX(0, self->knownDelay + delay_diff_offset);
+
+ WebRtcAec_ProcessFrames(self->aec,
+ near,
+ num_bands,
+ num_samples,
+ adjusted_known_delay,
+ out);
+ }
+}
+
+static void EstBufDelayNormal(Aec* aecpc) {
+ int nSampSndCard = aecpc->msInSndCardBuf * sampMsNb * aecpc->rate_factor;
+ int current_delay = nSampSndCard - WebRtcAec_system_delay(aecpc->aec);
+ int delay_difference = 0;
+
+ // Before we proceed with the delay estimate filtering we:
+ // 1) Compensate for the frame that will be read.
+ // 2) Compensate for drift resampling.
+ // 3) Compensate for non-causality if needed, since the estimated delay can't
+ // be negative.
+
+ // 1) Compensating for the frame(s) that will be read/processed.
+ current_delay += FRAME_LEN * aecpc->rate_factor;
+
+ // 2) Account for resampling frame delay.
+ if (aecpc->skewMode == kAecTrue && aecpc->resample == kAecTrue) {
+ current_delay -= kResamplingDelay;
+ }
+
+ // 3) Compensate for non-causality, if needed, by flushing one block.
+ if (current_delay < PART_LEN) {
+ current_delay += WebRtcAec_MoveFarReadPtr(aecpc->aec, 1) * PART_LEN;
+ }
+
+ // We use -1 to signal an initialized state in the "extended" implementation;
+ // compensate for that.
+ aecpc->filtDelay = aecpc->filtDelay < 0 ? 0 : aecpc->filtDelay;
+ aecpc->filtDelay =
+ WEBRTC_SPL_MAX(0, (short)(0.8 * aecpc->filtDelay + 0.2 * current_delay));
+
+ delay_difference = aecpc->filtDelay - aecpc->knownDelay;
+ if (delay_difference > 224) {
+ if (aecpc->lastDelayDiff < 96) {
+ aecpc->timeForDelayChange = 0;
+ } else {
+ aecpc->timeForDelayChange++;
+ }
+ } else if (delay_difference < 96 && aecpc->knownDelay > 0) {
+ if (aecpc->lastDelayDiff > 224) {
+ aecpc->timeForDelayChange = 0;
+ } else {
+ aecpc->timeForDelayChange++;
+ }
+ } else {
+ aecpc->timeForDelayChange = 0;
+ }
+ aecpc->lastDelayDiff = delay_difference;
+
+ if (aecpc->timeForDelayChange > 25) {
+ aecpc->knownDelay = WEBRTC_SPL_MAX((int)aecpc->filtDelay - 160, 0);
+ }
+}
+
+static void EstBufDelayExtended(Aec* self) {
+ int reported_delay = self->msInSndCardBuf * sampMsNb * self->rate_factor;
+ int current_delay = reported_delay - WebRtcAec_system_delay(self->aec);
+ int delay_difference = 0;
+
+ // Before we proceed with the delay estimate filtering we:
+ // 1) Compensate for the frame that will be read.
+ // 2) Compensate for drift resampling.
+ // 3) Compensate for non-causality if needed, since the estimated delay can't
+ // be negative.
+
+ // 1) Compensating for the frame(s) that will be read/processed.
+ current_delay += FRAME_LEN * self->rate_factor;
+
+ // 2) Account for resampling frame delay.
+ if (self->skewMode == kAecTrue && self->resample == kAecTrue) {
+ current_delay -= kResamplingDelay;
+ }
+
+ // 3) Compensate for non-causality, if needed, by flushing two blocks.
+ if (current_delay < PART_LEN) {
+ current_delay += WebRtcAec_MoveFarReadPtr(self->aec, 2) * PART_LEN;
+ }
+
+ if (self->filtDelay == -1) {
+ self->filtDelay = WEBRTC_SPL_MAX(0, 0.5 * current_delay);
+ } else {
+ self->filtDelay = WEBRTC_SPL_MAX(
+ 0, (short)(0.95 * self->filtDelay + 0.05 * current_delay));
+ }
+
+ delay_difference = self->filtDelay - self->knownDelay;
+ if (delay_difference > 384) {
+ if (self->lastDelayDiff < 128) {
+ self->timeForDelayChange = 0;
+ } else {
+ self->timeForDelayChange++;
+ }
+ } else if (delay_difference < 128 && self->knownDelay > 0) {
+ if (self->lastDelayDiff > 384) {
+ self->timeForDelayChange = 0;
+ } else {
+ self->timeForDelayChange++;
+ }
+ } else {
+ self->timeForDelayChange = 0;
+ }
+ self->lastDelayDiff = delay_difference;
+
+ if (self->timeForDelayChange > 25) {
+ self->knownDelay = WEBRTC_SPL_MAX((int)self->filtDelay - 256, 0);
+ }
+}
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation_internal.h b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation_internal.h
new file mode 100644
index 00000000..95a6cf33
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation_internal.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_ECHO_CANCELLATION_INTERNAL_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_ECHO_CANCELLATION_INTERNAL_H_
+
+#include "webrtc/common_audio/ring_buffer.h"
+#include "webrtc/modules/audio_processing/aec/aec_core.h"
+
+typedef struct {
+ int delayCtr;
+ int sampFreq;
+ int splitSampFreq;
+ int scSampFreq;
+ float sampFactor; // scSampRate / sampFreq
+ short skewMode;
+ int bufSizeStart;
+ int knownDelay;
+ int rate_factor;
+
+ short initFlag; // indicates if AEC has been initialized
+
+ // Variables used for averaging far end buffer size
+ short counter;
+ int sum;
+ short firstVal;
+ short checkBufSizeCtr;
+
+ // Variables used for delay shifts
+ short msInSndCardBuf;
+ short filtDelay; // Filtered delay estimate.
+ int timeForDelayChange;
+ int startup_phase;
+ int checkBuffSize;
+ short lastDelayDiff;
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+ FILE* bufFile;
+ FILE* delayFile;
+ FILE* skewFile;
+#endif
+
+ // Structures
+ void* resampler;
+
+ int skewFrCtr;
+ int resample; // if the skew is small enough we don't resample
+ int highSkewCtr;
+ float skew;
+
+ RingBuffer* far_pre_buf; // Time domain far-end pre-buffer.
+
+ int lastError;
+
+ int farend_started;
+
+ AecCore* aec;
+} Aec;
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_ECHO_CANCELLATION_INTERNAL_H_
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation_unittest.cc b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation_unittest.cc
new file mode 100644
index 00000000..315ac3e9
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation_unittest.cc
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+// TODO(bjornv): Make this a comprehensive test.
+
+#include "webrtc/modules/audio_processing/aec/include/echo_cancellation.h"
+
+#include <stdlib.h>
+#include <time.h>
+
+extern "C" {
+#include "webrtc/modules/audio_processing/aec/aec_core.h"
+}
+
+#include "testing/gtest/include/gtest/gtest.h"
+#include "webrtc/base/checks.h"
+
+namespace webrtc {
+
+TEST(EchoCancellationTest, CreateAndFreeHasExpectedBehavior) {
+ void* handle = WebRtcAec_Create();
+ ASSERT_TRUE(handle);
+ WebRtcAec_Free(nullptr);
+ WebRtcAec_Free(handle);
+}
+
+TEST(EchoCancellationTest, ApplyAecCoreHandle) {
+ void* handle = WebRtcAec_Create();
+ ASSERT_TRUE(handle);
+ EXPECT_TRUE(WebRtcAec_aec_core(NULL) == NULL);
+ AecCore* aec_core = WebRtcAec_aec_core(handle);
+ EXPECT_TRUE(aec_core != NULL);
+ // A simple test to verify that we can set and get a value from the lower
+ // level |aec_core| handle.
+ int delay = 111;
+ WebRtcAec_SetSystemDelay(aec_core, delay);
+ EXPECT_EQ(delay, WebRtcAec_system_delay(aec_core));
+ WebRtcAec_Free(handle);
+}
+
+} // namespace webrtc
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/include/echo_cancellation.h b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/include/echo_cancellation.h
new file mode 100644
index 00000000..a340cf84
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/include/echo_cancellation.h
@@ -0,0 +1,245 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_INCLUDE_ECHO_CANCELLATION_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_INCLUDE_ECHO_CANCELLATION_H_
+
+#include <stddef.h>
+
+#include "webrtc/typedefs.h"
+
+// Errors
+#define AEC_UNSPECIFIED_ERROR 12000
+#define AEC_UNSUPPORTED_FUNCTION_ERROR 12001
+#define AEC_UNINITIALIZED_ERROR 12002
+#define AEC_NULL_POINTER_ERROR 12003
+#define AEC_BAD_PARAMETER_ERROR 12004
+
+// Warnings
+#define AEC_BAD_PARAMETER_WARNING 12050
+
+enum {
+ kAecNlpConservative = 0,
+ kAecNlpModerate,
+ kAecNlpAggressive
+};
+
+enum {
+ kAecFalse = 0,
+ kAecTrue
+};
+
+typedef struct {
+ int16_t nlpMode; // default kAecNlpModerate
+ int16_t skewMode; // default kAecFalse
+ int16_t metricsMode; // default kAecFalse
+ int delay_logging; // default kAecFalse
+ // float realSkew;
+} AecConfig;
+
+typedef struct {
+ int instant;
+ int average;
+ int max;
+ int min;
+} AecLevel;
+
+typedef struct {
+ AecLevel rerl;
+ AecLevel erl;
+ AecLevel erle;
+ AecLevel aNlp;
+} AecMetrics;
+
+struct AecCore;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Allocates the memory needed by the AEC. The memory needs to be initialized
+ * separately using the WebRtcAec_Init() function. Returns a pointer to the
+ * object or NULL on error.
+ */
+void* WebRtcAec_Create();
+
+/*
+ * This function releases the memory allocated by WebRtcAec_Create().
+ *
+ * Inputs Description
+ * -------------------------------------------------------------------
+ * void* aecInst Pointer to the AEC instance
+ */
+void WebRtcAec_Free(void* aecInst);
+
+/*
+ * Initializes an AEC instance.
+ *
+ * Inputs Description
+ * -------------------------------------------------------------------
+ * void* aecInst Pointer to the AEC instance
+ * int32_t sampFreq Sampling frequency of data
+ * int32_t scSampFreq Soundcard sampling frequency
+ *
+ * Outputs Description
+ * -------------------------------------------------------------------
+ * int32_t return 0: OK
+ * -1: error
+ */
+int32_t WebRtcAec_Init(void* aecInst, int32_t sampFreq, int32_t scSampFreq);
+
+/*
+ * Inserts an 80 or 160 sample block of data into the farend buffer.
+ *
+ * Inputs Description
+ * -------------------------------------------------------------------
+ * void* aecInst Pointer to the AEC instance
+ * const float* farend In buffer containing one frame of
+ * farend signal for L band
+ * int16_t nrOfSamples Number of samples in farend buffer
+ *
+ * Outputs Description
+ * -------------------------------------------------------------------
+ * int32_t return 0: OK
+ * -1: error
+ */
+int32_t WebRtcAec_BufferFarend(void* aecInst,
+ const float* farend,
+ size_t nrOfSamples);
+
+/*
+ * Runs the echo canceller on an 80 or 160 sample blocks of data.
+ *
+ * Inputs Description
+ * -------------------------------------------------------------------
+ * void* aecInst Pointer to the AEC instance
+ * float* const* nearend In buffer containing one frame of
+ * nearend+echo signal for each band
+ * int num_bands Number of bands in nearend buffer
+ * int16_t nrOfSamples Number of samples in nearend buffer
+ * int16_t msInSndCardBuf Delay estimate for sound card and
+ * system buffers
+ * int16_t skew Difference between number of samples played
+ * and recorded at the soundcard (for clock skew
+ * compensation)
+ *
+ * Outputs Description
+ * -------------------------------------------------------------------
+ * float* const* out Out buffer, one frame of processed nearend
+ * for each band
+ * int32_t return 0: OK
+ * -1: error
+ */
+int32_t WebRtcAec_Process(void* aecInst,
+ const float* const* nearend,
+ size_t num_bands,
+ float* const* out,
+ size_t nrOfSamples,
+ int16_t msInSndCardBuf,
+ int32_t skew);
+
+/*
+ * This function enables the user to set certain parameters on-the-fly.
+ *
+ * Inputs Description
+ * -------------------------------------------------------------------
+ * void* handle Pointer to the AEC instance
+ * AecConfig config Config instance that contains all
+ * properties to be set
+ *
+ * Outputs Description
+ * -------------------------------------------------------------------
+ * int return 0: OK
+ * -1: error
+ */
+int WebRtcAec_set_config(void* handle, AecConfig config);
+
+/*
+ * Gets the current echo status of the nearend signal.
+ *
+ * Inputs Description
+ * -------------------------------------------------------------------
+ * void* handle Pointer to the AEC instance
+ *
+ * Outputs Description
+ * -------------------------------------------------------------------
+ * int* status 0: Almost certainly nearend single-talk
+ * 1: Might not be neared single-talk
+ * int return 0: OK
+ * -1: error
+ */
+int WebRtcAec_get_echo_status(void* handle, int* status);
+
+/*
+ * Gets the current echo metrics for the session.
+ *
+ * Inputs Description
+ * -------------------------------------------------------------------
+ * void* handle Pointer to the AEC instance
+ *
+ * Outputs Description
+ * -------------------------------------------------------------------
+ * AecMetrics* metrics Struct which will be filled out with the
+ * current echo metrics.
+ * int return 0: OK
+ * -1: error
+ */
+int WebRtcAec_GetMetrics(void* handle, AecMetrics* metrics);
+
+/*
+ * Gets the current delay metrics for the session.
+ *
+ * Inputs Description
+ * -------------------------------------------------------------------
+ * void* handle Pointer to the AEC instance
+ *
+ * Outputs Description
+ * -------------------------------------------------------------------
+ * int* median Delay median value.
+ * int* std Delay standard deviation.
+ * float* fraction_poor_delays Fraction of the delay estimates that may
+ * cause the AEC to perform poorly.
+ *
+ * int return 0: OK
+ * -1: error
+ */
+int WebRtcAec_GetDelayMetrics(void* handle,
+ int* median,
+ int* std,
+ float* fraction_poor_delays);
+
+/*
+ * Gets the last error code.
+ *
+ * Inputs Description
+ * -------------------------------------------------------------------
+ * void* aecInst Pointer to the AEC instance
+ *
+ * Outputs Description
+ * -------------------------------------------------------------------
+ * int32_t return 11000-11100: error code
+ */
+int32_t WebRtcAec_get_error_code(void* aecInst);
+
+// Returns a pointer to the low level AEC handle.
+//
+// Input:
+// - handle : Pointer to the AEC instance.
+//
+// Return value:
+// - AecCore pointer : NULL for error.
+//
+struct AecCore* WebRtcAec_aec_core(void* handle);
+
+#ifdef __cplusplus
+}
+#endif
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_INCLUDE_ECHO_CANCELLATION_H_
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/system_delay_unittest.cc b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/system_delay_unittest.cc
new file mode 100644
index 00000000..07e3cf8a
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/system_delay_unittest.cc
@@ -0,0 +1,602 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "testing/gtest/include/gtest/gtest.h"
+extern "C" {
+#include "webrtc/modules/audio_processing/aec/aec_core.h"
+}
+#include "webrtc/modules/audio_processing/aec/echo_cancellation_internal.h"
+#include "webrtc/modules/audio_processing/aec/include/echo_cancellation.h"
+#include "webrtc/test/testsupport/gtest_disable.h"
+#include "webrtc/typedefs.h"
+
+namespace {
+
+class SystemDelayTest : public ::testing::Test {
+ protected:
+ SystemDelayTest();
+ virtual void SetUp();
+ virtual void TearDown();
+
+ // Initialization of AEC handle with respect to |sample_rate_hz|. Since the
+ // device sample rate is unimportant we set that value to 48000 Hz.
+ void Init(int sample_rate_hz);
+
+ // Makes one render call and one capture call in that specific order.
+ void RenderAndCapture(int device_buffer_ms);
+
+ // Fills up the far-end buffer with respect to the default device buffer size.
+ size_t BufferFillUp();
+
+ // Runs and verifies the behavior in a stable startup procedure.
+ void RunStableStartup();
+
+ // Maps buffer size in ms into samples, taking the unprocessed frame into
+ // account.
+ int MapBufferSizeToSamples(int size_in_ms, bool extended_filter);
+
+ void* handle_;
+ Aec* self_;
+ size_t samples_per_frame_;
+ // Dummy input/output speech data.
+ static const int kSamplesPerChunk = 160;
+ float far_[kSamplesPerChunk];
+ float near_[kSamplesPerChunk];
+ float out_[kSamplesPerChunk];
+ const float* near_ptr_;
+ float* out_ptr_;
+};
+
+SystemDelayTest::SystemDelayTest()
+ : handle_(NULL), self_(NULL), samples_per_frame_(0) {
+ // Dummy input data are set with more or less arbitrary non-zero values.
+ for (int i = 0; i < kSamplesPerChunk; i++) {
+ far_[i] = 257.0;
+ near_[i] = 514.0;
+ }
+ memset(out_, 0, sizeof(out_));
+ near_ptr_ = near_;
+ out_ptr_ = out_;
+}
+
+void SystemDelayTest::SetUp() {
+ handle_ = WebRtcAec_Create();
+ ASSERT_TRUE(handle_);
+ self_ = reinterpret_cast<Aec*>(handle_);
+}
+
+void SystemDelayTest::TearDown() {
+ // Free AEC
+ WebRtcAec_Free(handle_);
+ handle_ = NULL;
+}
+
+// In SWB mode nothing is added to the buffer handling with respect to
+// functionality compared to WB. We therefore only verify behavior in NB and WB.
+static const int kSampleRateHz[] = {8000, 16000};
+static const size_t kNumSampleRates =
+ sizeof(kSampleRateHz) / sizeof(*kSampleRateHz);
+
+// Default audio device buffer size used.
+static const int kDeviceBufMs = 100;
+
+// Requirement for a stable device convergence time in ms. Should converge in
+// less than |kStableConvergenceMs|.
+static const int kStableConvergenceMs = 100;
+
+// Maximum convergence time in ms. This means that we should leave the startup
+// phase after |kMaxConvergenceMs| independent of device buffer stability
+// conditions.
+static const int kMaxConvergenceMs = 500;
+
+void SystemDelayTest::Init(int sample_rate_hz) {
+ // Initialize AEC
+ EXPECT_EQ(0, WebRtcAec_Init(handle_, sample_rate_hz, 48000));
+ EXPECT_EQ(0, WebRtcAec_system_delay(self_->aec));
+
+ // One frame equals 10 ms of data.
+ samples_per_frame_ = static_cast<size_t>(sample_rate_hz / 100);
+}
+
+void SystemDelayTest::RenderAndCapture(int device_buffer_ms) {
+ EXPECT_EQ(0, WebRtcAec_BufferFarend(handle_, far_, samples_per_frame_));
+ EXPECT_EQ(0,
+ WebRtcAec_Process(handle_,
+ &near_ptr_,
+ 1,
+ &out_ptr_,
+ samples_per_frame_,
+ device_buffer_ms,
+ 0));
+}
+
+size_t SystemDelayTest::BufferFillUp() {
+ // To make sure we have a full buffer when we verify stability we first fill
+ // up the far-end buffer with the same amount as we will report in through
+ // Process().
+ size_t buffer_size = 0;
+ for (int i = 0; i < kDeviceBufMs / 10; i++) {
+ EXPECT_EQ(0, WebRtcAec_BufferFarend(handle_, far_, samples_per_frame_));
+ buffer_size += samples_per_frame_;
+ EXPECT_EQ(static_cast<int>(buffer_size),
+ WebRtcAec_system_delay(self_->aec));
+ }
+ return buffer_size;
+}
+
+void SystemDelayTest::RunStableStartup() {
+ // To make sure we have a full buffer when we verify stability we first fill
+ // up the far-end buffer with the same amount as we will report in through
+ // Process().
+ size_t buffer_size = BufferFillUp();
+
+ if (WebRtcAec_delay_agnostic_enabled(self_->aec) == 1) {
+ // In extended_filter mode we set the buffer size after the first processed
+ // 10 ms chunk. Hence, we don't need to wait for the reported system delay
+ // values to become stable.
+ RenderAndCapture(kDeviceBufMs);
+ buffer_size += samples_per_frame_;
+ EXPECT_EQ(0, self_->startup_phase);
+ } else {
+ // A stable device should be accepted and put in a regular process mode
+ // within |kStableConvergenceMs|.
+ int process_time_ms = 0;
+ for (; process_time_ms < kStableConvergenceMs; process_time_ms += 10) {
+ RenderAndCapture(kDeviceBufMs);
+ buffer_size += samples_per_frame_;
+ if (self_->startup_phase == 0) {
+ // We have left the startup phase.
+ break;
+ }
+ }
+ // Verify convergence time.
+ EXPECT_GT(kStableConvergenceMs, process_time_ms);
+ }
+ // Verify that the buffer has been flushed.
+ EXPECT_GE(static_cast<int>(buffer_size),
+ WebRtcAec_system_delay(self_->aec));
+}
+
+ int SystemDelayTest::MapBufferSizeToSamples(int size_in_ms,
+ bool extended_filter) {
+ // If extended_filter is disabled we add an extra 10 ms for the unprocessed
+ // frame. That is simply how the algorithm is constructed.
+ return static_cast<int>(
+ (size_in_ms + (extended_filter ? 0 : 10)) * samples_per_frame_ / 10);
+}
+
+// The tests should meet basic requirements and not be adjusted to what is
+// actually implemented. If we don't get good code coverage this way we either
+// lack in tests or have unnecessary code.
+// General requirements:
+// 1) If we add far-end data the system delay should be increased with the same
+// amount we add.
+// 2) If the far-end buffer is full we should flush the oldest data to make room
+// for the new. In this case the system delay is unaffected.
+// 3) There should exist a startup phase in which the buffer size is to be
+// determined. In this phase no cancellation should be performed.
+// 4) Under stable conditions (small variations in device buffer sizes) the AEC
+// should determine an appropriate local buffer size within
+// |kStableConvergenceMs| ms.
+// 5) Under unstable conditions the AEC should make a decision within
+// |kMaxConvergenceMs| ms.
+// 6) If the local buffer runs out of data we should stuff the buffer with older
+// frames.
+// 7) The system delay should within |kMaxConvergenceMs| ms heal from
+// disturbances like drift, data glitches, toggling events and outliers.
+// 8) The system delay should never become negative.
+
+TEST_F(SystemDelayTest, CorrectIncreaseWhenBufferFarend) {
+ // When we add data to the AEC buffer the internal system delay should be
+ // incremented with the same amount as the size of data.
+ // This process should be independent of DA-AEC and extended_filter mode.
+ for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) {
+ WebRtcAec_enable_extended_filter(self_->aec, extended_filter);
+ EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec));
+ for (int da_aec = 0; da_aec <= 1; ++da_aec) {
+ WebRtcAec_enable_delay_agnostic(self_->aec, da_aec);
+ EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec));
+ for (size_t i = 0; i < kNumSampleRates; i++) {
+ Init(kSampleRateHz[i]);
+ // Loop through a couple of calls to make sure the system delay
+ // increments correctly.
+ for (int j = 1; j <= 5; j++) {
+ EXPECT_EQ(0,
+ WebRtcAec_BufferFarend(handle_, far_, samples_per_frame_));
+ EXPECT_EQ(static_cast<int>(j * samples_per_frame_),
+ WebRtcAec_system_delay(self_->aec));
+ }
+ }
+ }
+ }
+}
+
+// TODO(bjornv): Add a test to verify behavior if the far-end buffer is full
+// when adding new data.
+
+TEST_F(SystemDelayTest, CorrectDelayAfterStableStartup) {
+ // We run the system in a stable startup. After that we verify that the system
+ // delay meets the requirements.
+ // This process should be independent of DA-AEC and extended_filter mode.
+ for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) {
+ WebRtcAec_enable_extended_filter(self_->aec, extended_filter);
+ EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec));
+ for (int da_aec = 0; da_aec <= 1; ++da_aec) {
+ WebRtcAec_enable_delay_agnostic(self_->aec, da_aec);
+ EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec));
+ for (size_t i = 0; i < kNumSampleRates; i++) {
+ Init(kSampleRateHz[i]);
+ RunStableStartup();
+
+ // Verify system delay with respect to requirements, i.e., the
+ // |system_delay| is in the interval [75%, 100%] of what's reported on
+ // the average.
+ // In extended_filter mode we target 50% and measure after one processed
+ // 10 ms chunk.
+ int average_reported_delay =
+ static_cast<int>(kDeviceBufMs * samples_per_frame_ / 10);
+ EXPECT_GE(average_reported_delay, WebRtcAec_system_delay(self_->aec));
+ int lower_bound = WebRtcAec_extended_filter_enabled(self_->aec)
+ ? average_reported_delay / 2 - samples_per_frame_
+ : average_reported_delay * 3 / 4;
+ EXPECT_LE(lower_bound, WebRtcAec_system_delay(self_->aec));
+ }
+ }
+ }
+}
+
+TEST_F(SystemDelayTest, CorrectDelayAfterUnstableStartup) {
+ // This test does not apply in extended_filter mode, since we only use the
+ // the first 10 ms chunk to determine a reasonable buffer size. Neither does
+ // it apply if DA-AEC is on because that overrides the startup procedure.
+ WebRtcAec_enable_extended_filter(self_->aec, 0);
+ EXPECT_EQ(0, WebRtcAec_extended_filter_enabled(self_->aec));
+ WebRtcAec_enable_delay_agnostic(self_->aec, 0);
+ EXPECT_EQ(0, WebRtcAec_delay_agnostic_enabled(self_->aec));
+
+ // In an unstable system we would start processing after |kMaxConvergenceMs|.
+ // On the last frame the AEC buffer is adjusted to 60% of the last reported
+ // device buffer size.
+ // We construct an unstable system by altering the device buffer size between
+ // two values |kDeviceBufMs| +- 25 ms.
+ for (size_t i = 0; i < kNumSampleRates; i++) {
+ Init(kSampleRateHz[i]);
+
+ // To make sure we have a full buffer when we verify stability we first fill
+ // up the far-end buffer with the same amount as we will report in on the
+ // average through Process().
+ size_t buffer_size = BufferFillUp();
+
+ int buffer_offset_ms = 25;
+ int reported_delay_ms = 0;
+ int process_time_ms = 0;
+ for (; process_time_ms <= kMaxConvergenceMs; process_time_ms += 10) {
+ reported_delay_ms = kDeviceBufMs + buffer_offset_ms;
+ RenderAndCapture(reported_delay_ms);
+ buffer_size += samples_per_frame_;
+ buffer_offset_ms = -buffer_offset_ms;
+ if (self_->startup_phase == 0) {
+ // We have left the startup phase.
+ break;
+ }
+ }
+ // Verify convergence time.
+ EXPECT_GE(kMaxConvergenceMs, process_time_ms);
+ // Verify that the buffer has been flushed.
+ EXPECT_GE(static_cast<int>(buffer_size),
+ WebRtcAec_system_delay(self_->aec));
+
+ // Verify system delay with respect to requirements, i.e., the
+ // |system_delay| is in the interval [60%, 100%] of what's last reported.
+ EXPECT_GE(static_cast<int>(reported_delay_ms * samples_per_frame_ / 10),
+ WebRtcAec_system_delay(self_->aec));
+ EXPECT_LE(
+ static_cast<int>(reported_delay_ms * samples_per_frame_ / 10 * 3 / 5),
+ WebRtcAec_system_delay(self_->aec));
+ }
+}
+
+TEST_F(SystemDelayTest, CorrectDelayAfterStableBufferBuildUp) {
+ // This test does not apply in extended_filter mode, since we only use the
+ // the first 10 ms chunk to determine a reasonable buffer size. Neither does
+ // it apply if DA-AEC is on because that overrides the startup procedure.
+ WebRtcAec_enable_extended_filter(self_->aec, 0);
+ EXPECT_EQ(0, WebRtcAec_extended_filter_enabled(self_->aec));
+ WebRtcAec_enable_delay_agnostic(self_->aec, 0);
+ EXPECT_EQ(0, WebRtcAec_delay_agnostic_enabled(self_->aec));
+
+ // In this test we start by establishing the device buffer size during stable
+ // conditions, but with an empty internal far-end buffer. Once that is done we
+ // verify that the system delay is increased correctly until we have reach an
+ // internal buffer size of 75% of what's been reported.
+ for (size_t i = 0; i < kNumSampleRates; i++) {
+ Init(kSampleRateHz[i]);
+
+ // We assume that running |kStableConvergenceMs| calls will put the
+ // algorithm in a state where the device buffer size has been determined. We
+ // can make that assumption since we have a separate stability test.
+ int process_time_ms = 0;
+ for (; process_time_ms < kStableConvergenceMs; process_time_ms += 10) {
+ EXPECT_EQ(0,
+ WebRtcAec_Process(handle_,
+ &near_ptr_,
+ 1,
+ &out_ptr_,
+ samples_per_frame_,
+ kDeviceBufMs,
+ 0));
+ }
+ // Verify that a buffer size has been established.
+ EXPECT_EQ(0, self_->checkBuffSize);
+
+ // We now have established the required buffer size. Let us verify that we
+ // fill up before leaving the startup phase for normal processing.
+ size_t buffer_size = 0;
+ size_t target_buffer_size = kDeviceBufMs * samples_per_frame_ / 10 * 3 / 4;
+ process_time_ms = 0;
+ for (; process_time_ms <= kMaxConvergenceMs; process_time_ms += 10) {
+ RenderAndCapture(kDeviceBufMs);
+ buffer_size += samples_per_frame_;
+ if (self_->startup_phase == 0) {
+ // We have left the startup phase.
+ break;
+ }
+ }
+ // Verify convergence time.
+ EXPECT_GT(kMaxConvergenceMs, process_time_ms);
+ // Verify that the buffer has reached the desired size.
+ EXPECT_LE(static_cast<int>(target_buffer_size),
+ WebRtcAec_system_delay(self_->aec));
+
+ // Verify normal behavior (system delay is kept constant) after startup by
+ // running a couple of calls to BufferFarend() and Process().
+ for (int j = 0; j < 6; j++) {
+ int system_delay_before_calls = WebRtcAec_system_delay(self_->aec);
+ RenderAndCapture(kDeviceBufMs);
+ EXPECT_EQ(system_delay_before_calls, WebRtcAec_system_delay(self_->aec));
+ }
+ }
+}
+
+TEST_F(SystemDelayTest, CorrectDelayWhenBufferUnderrun) {
+ // Here we test a buffer under run scenario. If we keep on calling
+ // WebRtcAec_Process() we will finally run out of data, but should
+ // automatically stuff the buffer. We verify this behavior by checking if the
+ // system delay goes negative.
+ // This process should be independent of DA-AEC and extended_filter mode.
+ for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) {
+ WebRtcAec_enable_extended_filter(self_->aec, extended_filter);
+ EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec));
+ for (int da_aec = 0; da_aec <= 1; ++da_aec) {
+ WebRtcAec_enable_delay_agnostic(self_->aec, da_aec);
+ EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec));
+ for (size_t i = 0; i < kNumSampleRates; i++) {
+ Init(kSampleRateHz[i]);
+ RunStableStartup();
+
+ // The AEC has now left the Startup phase. We now have at most
+ // |kStableConvergenceMs| in the buffer. Keep on calling Process() until
+ // we run out of data and verify that the system delay is non-negative.
+ for (int j = 0; j <= kStableConvergenceMs; j += 10) {
+ EXPECT_EQ(0, WebRtcAec_Process(handle_, &near_ptr_, 1, &out_ptr_,
+ samples_per_frame_, kDeviceBufMs, 0));
+ EXPECT_LE(0, WebRtcAec_system_delay(self_->aec));
+ }
+ }
+ }
+ }
+}
+
+TEST_F(SystemDelayTest, CorrectDelayDuringDrift) {
+ // This drift test should verify that the system delay is never exceeding the
+ // device buffer. The drift is simulated by decreasing the reported device
+ // buffer size by 1 ms every 100 ms. If the device buffer size goes below 30
+ // ms we jump (add) 10 ms to give a repeated pattern.
+
+ // This process should be independent of DA-AEC and extended_filter mode.
+ for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) {
+ WebRtcAec_enable_extended_filter(self_->aec, extended_filter);
+ EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec));
+ for (int da_aec = 0; da_aec <= 1; ++da_aec) {
+ WebRtcAec_enable_delay_agnostic(self_->aec, da_aec);
+ EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec));
+ for (size_t i = 0; i < kNumSampleRates; i++) {
+ Init(kSampleRateHz[i]);
+ RunStableStartup();
+
+ // We have left the startup phase and proceed with normal processing.
+ int jump = 0;
+ for (int j = 0; j < 1000; j++) {
+ // Drift = -1 ms per 100 ms of data.
+ int device_buf_ms = kDeviceBufMs - (j / 10) + jump;
+ int device_buf = MapBufferSizeToSamples(device_buf_ms,
+ extended_filter == 1);
+
+ if (device_buf_ms < 30) {
+ // Add 10 ms data, taking affect next frame.
+ jump += 10;
+ }
+ RenderAndCapture(device_buf_ms);
+
+ // Verify that the system delay does not exceed the device buffer.
+ EXPECT_GE(device_buf, WebRtcAec_system_delay(self_->aec));
+
+ // Verify that the system delay is non-negative.
+ EXPECT_LE(0, WebRtcAec_system_delay(self_->aec));
+ }
+ }
+ }
+ }
+}
+
+TEST_F(SystemDelayTest, ShouldRecoverAfterGlitch) {
+ // This glitch test should verify that the system delay recovers if there is
+ // a glitch in data. The data glitch is constructed as 200 ms of buffering
+ // after which the stable procedure continues. The glitch is never reported by
+ // the device.
+ // The system is said to be in a non-causal state if the difference between
+ // the device buffer and system delay is less than a block (64 samples).
+
+ // This process should be independent of DA-AEC and extended_filter mode.
+ for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) {
+ WebRtcAec_enable_extended_filter(self_->aec, extended_filter);
+ EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec));
+ for (int da_aec = 0; da_aec <= 1; ++da_aec) {
+ WebRtcAec_enable_delay_agnostic(self_->aec, da_aec);
+ EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec));
+ for (size_t i = 0; i < kNumSampleRates; i++) {
+ Init(kSampleRateHz[i]);
+ RunStableStartup();
+ int device_buf = MapBufferSizeToSamples(kDeviceBufMs,
+ extended_filter == 1);
+ // Glitch state.
+ for (int j = 0; j < 20; j++) {
+ EXPECT_EQ(0,
+ WebRtcAec_BufferFarend(handle_, far_, samples_per_frame_));
+ // No need to verify system delay, since that is done in a separate
+ // test.
+ }
+ // Verify that we are in a non-causal state, i.e.,
+ // |system_delay| > |device_buf|.
+ EXPECT_LT(device_buf, WebRtcAec_system_delay(self_->aec));
+
+ // Recover state. Should recover at least 4 ms of data per 10 ms, hence
+ // a glitch of 200 ms will take at most 200 * 10 / 4 = 500 ms to recover
+ // from.
+ bool non_causal = true; // We are currently in a non-causal state.
+ for (int j = 0; j < 50; j++) {
+ int system_delay_before = WebRtcAec_system_delay(self_->aec);
+ RenderAndCapture(kDeviceBufMs);
+ int system_delay_after = WebRtcAec_system_delay(self_->aec);
+ // We have recovered if
+ // |device_buf| - |system_delay_after| >= PART_LEN (1 block).
+ // During recovery, |system_delay_after| < |system_delay_before|,
+ // otherwise they are equal.
+ if (non_causal) {
+ EXPECT_LT(system_delay_after, system_delay_before);
+ if (device_buf - system_delay_after >= PART_LEN) {
+ non_causal = false;
+ }
+ } else {
+ EXPECT_EQ(system_delay_before, system_delay_after);
+ }
+ // Verify that the system delay is non-negative.
+ EXPECT_LE(0, WebRtcAec_system_delay(self_->aec));
+ }
+ // Check that we have recovered.
+ EXPECT_FALSE(non_causal);
+ }
+ }
+ }
+}
+
+TEST_F(SystemDelayTest, UnaffectedWhenSpuriousDeviceBufferValues) {
+ // This test does not apply in extended_filter mode, since we only use the
+ // the first 10 ms chunk to determine a reasonable buffer size.
+ const int extended_filter = 0;
+ WebRtcAec_enable_extended_filter(self_->aec, extended_filter);
+ EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec));
+
+ // Should be DA-AEC independent.
+ for (int da_aec = 0; da_aec <= 1; ++da_aec) {
+ WebRtcAec_enable_delay_agnostic(self_->aec, da_aec);
+ EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec));
+ // This spurious device buffer data test aims at verifying that the system
+ // delay is unaffected by large outliers.
+ // The system is said to be in a non-causal state if the difference between
+ // the device buffer and system delay is less than a block (64 samples).
+ for (size_t i = 0; i < kNumSampleRates; i++) {
+ Init(kSampleRateHz[i]);
+ RunStableStartup();
+ int device_buf = MapBufferSizeToSamples(kDeviceBufMs,
+ extended_filter == 1);
+
+ // Normal state. We are currently not in a non-causal state.
+ bool non_causal = false;
+
+ // Run 1 s and replace device buffer size with 500 ms every 100 ms.
+ for (int j = 0; j < 100; j++) {
+ int system_delay_before_calls = WebRtcAec_system_delay(self_->aec);
+ int device_buf_ms = j % 10 == 0 ? 500 : kDeviceBufMs;
+ RenderAndCapture(device_buf_ms);
+
+ // Check for non-causality.
+ if (device_buf - WebRtcAec_system_delay(self_->aec) < PART_LEN) {
+ non_causal = true;
+ }
+ EXPECT_FALSE(non_causal);
+ EXPECT_EQ(system_delay_before_calls,
+ WebRtcAec_system_delay(self_->aec));
+
+ // Verify that the system delay is non-negative.
+ EXPECT_LE(0, WebRtcAec_system_delay(self_->aec));
+ }
+ }
+ }
+}
+
+TEST_F(SystemDelayTest, CorrectImpactWhenTogglingDeviceBufferValues) {
+ // This test aims at verifying that the system delay is "unaffected" by
+ // toggling values reported by the device.
+ // The test is constructed such that every other device buffer value is zero
+ // and then 2 * |kDeviceBufMs|, hence the size is constant on the average. The
+ // zero values will force us into a non-causal state and thereby lowering the
+ // system delay until we basically run out of data. Once that happens the
+ // buffer will be stuffed.
+ // TODO(bjornv): This test will have a better impact if we verified that the
+ // delay estimate goes up when the system delay goes down to meet the average
+ // device buffer size.
+
+ // This test does not apply if DA-AEC is enabled and extended_filter mode
+ // disabled.
+ for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) {
+ WebRtcAec_enable_extended_filter(self_->aec, extended_filter);
+ EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec));
+ for (int da_aec = 0; da_aec <= 1; ++da_aec) {
+ WebRtcAec_enable_delay_agnostic(self_->aec, da_aec);
+ EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec));
+ if (extended_filter == 0 && da_aec == 1) {
+ continue;
+ }
+ for (size_t i = 0; i < kNumSampleRates; i++) {
+ Init(kSampleRateHz[i]);
+ RunStableStartup();
+ const int device_buf = MapBufferSizeToSamples(kDeviceBufMs,
+ extended_filter == 1);
+
+ // Normal state. We are currently not in a non-causal state.
+ bool non_causal = false;
+
+ // Loop through 100 frames (both render and capture), which equals 1 s
+ // of data. Every odd frame we set the device buffer size to
+ // 2 * |kDeviceBufMs| and even frames we set the device buffer size to
+ // zero.
+ for (int j = 0; j < 100; j++) {
+ int system_delay_before_calls = WebRtcAec_system_delay(self_->aec);
+ int device_buf_ms = 2 * (j % 2) * kDeviceBufMs;
+ RenderAndCapture(device_buf_ms);
+
+ // Check for non-causality, compared with the average device buffer
+ // size.
+ non_causal |= (device_buf - WebRtcAec_system_delay(self_->aec) < 64);
+ EXPECT_GE(system_delay_before_calls,
+ WebRtcAec_system_delay(self_->aec));
+
+ // Verify that the system delay is non-negative.
+ EXPECT_LE(0, WebRtcAec_system_delay(self_->aec));
+ }
+ // Verify we are not in a non-causal state.
+ EXPECT_FALSE(non_causal);
+ }
+ }
+ }
+}
+
+} // namespace
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core.c
new file mode 100644
index 00000000..b801f07a
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core.c
@@ -0,0 +1,1233 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/aecm/aecm_core.h"
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+#include "webrtc/common_audio/ring_buffer.h"
+#include "webrtc/common_audio/signal_processing/include/real_fft.h"
+#include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h"
+#include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h"
+#include "webrtc/system_wrappers/interface/compile_assert_c.h"
+#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h"
+#include "webrtc/typedefs.h"
+
+#ifdef AEC_DEBUG
+FILE *dfile;
+FILE *testfile;
+#endif
+
+const int16_t WebRtcAecm_kCosTable[] = {
+ 8192, 8190, 8187, 8180, 8172, 8160, 8147, 8130, 8112,
+ 8091, 8067, 8041, 8012, 7982, 7948, 7912, 7874, 7834,
+ 7791, 7745, 7697, 7647, 7595, 7540, 7483, 7424, 7362,
+ 7299, 7233, 7164, 7094, 7021, 6947, 6870, 6791, 6710,
+ 6627, 6542, 6455, 6366, 6275, 6182, 6087, 5991, 5892,
+ 5792, 5690, 5586, 5481, 5374, 5265, 5155, 5043, 4930,
+ 4815, 4698, 4580, 4461, 4341, 4219, 4096, 3971, 3845,
+ 3719, 3591, 3462, 3331, 3200, 3068, 2935, 2801, 2667,
+ 2531, 2395, 2258, 2120, 1981, 1842, 1703, 1563, 1422,
+ 1281, 1140, 998, 856, 713, 571, 428, 285, 142,
+ 0, -142, -285, -428, -571, -713, -856, -998, -1140,
+ -1281, -1422, -1563, -1703, -1842, -1981, -2120, -2258, -2395,
+ -2531, -2667, -2801, -2935, -3068, -3200, -3331, -3462, -3591,
+ -3719, -3845, -3971, -4095, -4219, -4341, -4461, -4580, -4698,
+ -4815, -4930, -5043, -5155, -5265, -5374, -5481, -5586, -5690,
+ -5792, -5892, -5991, -6087, -6182, -6275, -6366, -6455, -6542,
+ -6627, -6710, -6791, -6870, -6947, -7021, -7094, -7164, -7233,
+ -7299, -7362, -7424, -7483, -7540, -7595, -7647, -7697, -7745,
+ -7791, -7834, -7874, -7912, -7948, -7982, -8012, -8041, -8067,
+ -8091, -8112, -8130, -8147, -8160, -8172, -8180, -8187, -8190,
+ -8191, -8190, -8187, -8180, -8172, -8160, -8147, -8130, -8112,
+ -8091, -8067, -8041, -8012, -7982, -7948, -7912, -7874, -7834,
+ -7791, -7745, -7697, -7647, -7595, -7540, -7483, -7424, -7362,
+ -7299, -7233, -7164, -7094, -7021, -6947, -6870, -6791, -6710,
+ -6627, -6542, -6455, -6366, -6275, -6182, -6087, -5991, -5892,
+ -5792, -5690, -5586, -5481, -5374, -5265, -5155, -5043, -4930,
+ -4815, -4698, -4580, -4461, -4341, -4219, -4096, -3971, -3845,
+ -3719, -3591, -3462, -3331, -3200, -3068, -2935, -2801, -2667,
+ -2531, -2395, -2258, -2120, -1981, -1842, -1703, -1563, -1422,
+ -1281, -1140, -998, -856, -713, -571, -428, -285, -142,
+ 0, 142, 285, 428, 571, 713, 856, 998, 1140,
+ 1281, 1422, 1563, 1703, 1842, 1981, 2120, 2258, 2395,
+ 2531, 2667, 2801, 2935, 3068, 3200, 3331, 3462, 3591,
+ 3719, 3845, 3971, 4095, 4219, 4341, 4461, 4580, 4698,
+ 4815, 4930, 5043, 5155, 5265, 5374, 5481, 5586, 5690,
+ 5792, 5892, 5991, 6087, 6182, 6275, 6366, 6455, 6542,
+ 6627, 6710, 6791, 6870, 6947, 7021, 7094, 7164, 7233,
+ 7299, 7362, 7424, 7483, 7540, 7595, 7647, 7697, 7745,
+ 7791, 7834, 7874, 7912, 7948, 7982, 8012, 8041, 8067,
+ 8091, 8112, 8130, 8147, 8160, 8172, 8180, 8187, 8190
+};
+
+const int16_t WebRtcAecm_kSinTable[] = {
+ 0, 142, 285, 428, 571, 713, 856, 998,
+ 1140, 1281, 1422, 1563, 1703, 1842, 1981, 2120,
+ 2258, 2395, 2531, 2667, 2801, 2935, 3068, 3200,
+ 3331, 3462, 3591, 3719, 3845, 3971, 4095, 4219,
+ 4341, 4461, 4580, 4698, 4815, 4930, 5043, 5155,
+ 5265, 5374, 5481, 5586, 5690, 5792, 5892, 5991,
+ 6087, 6182, 6275, 6366, 6455, 6542, 6627, 6710,
+ 6791, 6870, 6947, 7021, 7094, 7164, 7233, 7299,
+ 7362, 7424, 7483, 7540, 7595, 7647, 7697, 7745,
+ 7791, 7834, 7874, 7912, 7948, 7982, 8012, 8041,
+ 8067, 8091, 8112, 8130, 8147, 8160, 8172, 8180,
+ 8187, 8190, 8191, 8190, 8187, 8180, 8172, 8160,
+ 8147, 8130, 8112, 8091, 8067, 8041, 8012, 7982,
+ 7948, 7912, 7874, 7834, 7791, 7745, 7697, 7647,
+ 7595, 7540, 7483, 7424, 7362, 7299, 7233, 7164,
+ 7094, 7021, 6947, 6870, 6791, 6710, 6627, 6542,
+ 6455, 6366, 6275, 6182, 6087, 5991, 5892, 5792,
+ 5690, 5586, 5481, 5374, 5265, 5155, 5043, 4930,
+ 4815, 4698, 4580, 4461, 4341, 4219, 4096, 3971,
+ 3845, 3719, 3591, 3462, 3331, 3200, 3068, 2935,
+ 2801, 2667, 2531, 2395, 2258, 2120, 1981, 1842,
+ 1703, 1563, 1422, 1281, 1140, 998, 856, 713,
+ 571, 428, 285, 142, 0, -142, -285, -428,
+ -571, -713, -856, -998, -1140, -1281, -1422, -1563,
+ -1703, -1842, -1981, -2120, -2258, -2395, -2531, -2667,
+ -2801, -2935, -3068, -3200, -3331, -3462, -3591, -3719,
+ -3845, -3971, -4095, -4219, -4341, -4461, -4580, -4698,
+ -4815, -4930, -5043, -5155, -5265, -5374, -5481, -5586,
+ -5690, -5792, -5892, -5991, -6087, -6182, -6275, -6366,
+ -6455, -6542, -6627, -6710, -6791, -6870, -6947, -7021,
+ -7094, -7164, -7233, -7299, -7362, -7424, -7483, -7540,
+ -7595, -7647, -7697, -7745, -7791, -7834, -7874, -7912,
+ -7948, -7982, -8012, -8041, -8067, -8091, -8112, -8130,
+ -8147, -8160, -8172, -8180, -8187, -8190, -8191, -8190,
+ -8187, -8180, -8172, -8160, -8147, -8130, -8112, -8091,
+ -8067, -8041, -8012, -7982, -7948, -7912, -7874, -7834,
+ -7791, -7745, -7697, -7647, -7595, -7540, -7483, -7424,
+ -7362, -7299, -7233, -7164, -7094, -7021, -6947, -6870,
+ -6791, -6710, -6627, -6542, -6455, -6366, -6275, -6182,
+ -6087, -5991, -5892, -5792, -5690, -5586, -5481, -5374,
+ -5265, -5155, -5043, -4930, -4815, -4698, -4580, -4461,
+ -4341, -4219, -4096, -3971, -3845, -3719, -3591, -3462,
+ -3331, -3200, -3068, -2935, -2801, -2667, -2531, -2395,
+ -2258, -2120, -1981, -1842, -1703, -1563, -1422, -1281,
+ -1140, -998, -856, -713, -571, -428, -285, -142
+};
+
+// Initialization table for echo channel in 8 kHz
+static const int16_t kChannelStored8kHz[PART_LEN1] = {
+ 2040, 1815, 1590, 1498, 1405, 1395, 1385, 1418,
+ 1451, 1506, 1562, 1644, 1726, 1804, 1882, 1918,
+ 1953, 1982, 2010, 2025, 2040, 2034, 2027, 2021,
+ 2014, 1997, 1980, 1925, 1869, 1800, 1732, 1683,
+ 1635, 1604, 1572, 1545, 1517, 1481, 1444, 1405,
+ 1367, 1331, 1294, 1270, 1245, 1239, 1233, 1247,
+ 1260, 1282, 1303, 1338, 1373, 1407, 1441, 1470,
+ 1499, 1524, 1549, 1565, 1582, 1601, 1621, 1649,
+ 1676
+};
+
+// Initialization table for echo channel in 16 kHz
+static const int16_t kChannelStored16kHz[PART_LEN1] = {
+ 2040, 1590, 1405, 1385, 1451, 1562, 1726, 1882,
+ 1953, 2010, 2040, 2027, 2014, 1980, 1869, 1732,
+ 1635, 1572, 1517, 1444, 1367, 1294, 1245, 1233,
+ 1260, 1303, 1373, 1441, 1499, 1549, 1582, 1621,
+ 1676, 1741, 1802, 1861, 1921, 1983, 2040, 2102,
+ 2170, 2265, 2375, 2515, 2651, 2781, 2922, 3075,
+ 3253, 3471, 3738, 3976, 4151, 4258, 4308, 4288,
+ 4270, 4253, 4237, 4179, 4086, 3947, 3757, 3484,
+ 3153
+};
+
+// Moves the pointer to the next entry and inserts |far_spectrum| and
+// corresponding Q-domain in its buffer.
+//
+// Inputs:
+// - self : Pointer to the delay estimation instance
+// - far_spectrum : Pointer to the far end spectrum
+// - far_q : Q-domain of far end spectrum
+//
+void WebRtcAecm_UpdateFarHistory(AecmCore* self,
+ uint16_t* far_spectrum,
+ int far_q) {
+ // Get new buffer position
+ self->far_history_pos++;
+ if (self->far_history_pos >= MAX_DELAY) {
+ self->far_history_pos = 0;
+ }
+ // Update Q-domain buffer
+ self->far_q_domains[self->far_history_pos] = far_q;
+ // Update far end spectrum buffer
+ memcpy(&(self->far_history[self->far_history_pos * PART_LEN1]),
+ far_spectrum,
+ sizeof(uint16_t) * PART_LEN1);
+}
+
+// Returns a pointer to the far end spectrum aligned to current near end
+// spectrum. The function WebRtc_DelayEstimatorProcessFix(...) should have been
+// called before AlignedFarend(...). Otherwise, you get the pointer to the
+// previous frame. The memory is only valid until the next call of
+// WebRtc_DelayEstimatorProcessFix(...).
+//
+// Inputs:
+// - self : Pointer to the AECM instance.
+// - delay : Current delay estimate.
+//
+// Output:
+// - far_q : The Q-domain of the aligned far end spectrum
+//
+// Return value:
+// - far_spectrum : Pointer to the aligned far end spectrum
+// NULL - Error
+//
+const uint16_t* WebRtcAecm_AlignedFarend(AecmCore* self,
+ int* far_q,
+ int delay) {
+ int buffer_position = 0;
+ assert(self != NULL);
+ buffer_position = self->far_history_pos - delay;
+
+ // Check buffer position
+ if (buffer_position < 0) {
+ buffer_position += MAX_DELAY;
+ }
+ // Get Q-domain
+ *far_q = self->far_q_domains[buffer_position];
+ // Return far end spectrum
+ return &(self->far_history[buffer_position * PART_LEN1]);
+}
+
+// Declare function pointers.
+CalcLinearEnergies WebRtcAecm_CalcLinearEnergies;
+StoreAdaptiveChannel WebRtcAecm_StoreAdaptiveChannel;
+ResetAdaptiveChannel WebRtcAecm_ResetAdaptiveChannel;
+
+AecmCore* WebRtcAecm_CreateCore() {
+ AecmCore* aecm = malloc(sizeof(AecmCore));
+
+ aecm->farFrameBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN,
+ sizeof(int16_t));
+ if (!aecm->farFrameBuf)
+ {
+ WebRtcAecm_FreeCore(aecm);
+ return NULL;
+ }
+
+ aecm->nearNoisyFrameBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN,
+ sizeof(int16_t));
+ if (!aecm->nearNoisyFrameBuf)
+ {
+ WebRtcAecm_FreeCore(aecm);
+ return NULL;
+ }
+
+ aecm->nearCleanFrameBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN,
+ sizeof(int16_t));
+ if (!aecm->nearCleanFrameBuf)
+ {
+ WebRtcAecm_FreeCore(aecm);
+ return NULL;
+ }
+
+ aecm->outFrameBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN,
+ sizeof(int16_t));
+ if (!aecm->outFrameBuf)
+ {
+ WebRtcAecm_FreeCore(aecm);
+ return NULL;
+ }
+
+ aecm->delay_estimator_farend = WebRtc_CreateDelayEstimatorFarend(PART_LEN1,
+ MAX_DELAY);
+ if (aecm->delay_estimator_farend == NULL) {
+ WebRtcAecm_FreeCore(aecm);
+ return NULL;
+ }
+ aecm->delay_estimator =
+ WebRtc_CreateDelayEstimator(aecm->delay_estimator_farend, 0);
+ if (aecm->delay_estimator == NULL) {
+ WebRtcAecm_FreeCore(aecm);
+ return NULL;
+ }
+ // TODO(bjornv): Explicitly disable robust delay validation until no
+ // performance regression has been established. Then remove the line.
+ WebRtc_enable_robust_validation(aecm->delay_estimator, 0);
+
+ aecm->real_fft = WebRtcSpl_CreateRealFFT(PART_LEN_SHIFT);
+ if (aecm->real_fft == NULL) {
+ WebRtcAecm_FreeCore(aecm);
+ return NULL;
+ }
+
+ // Init some aecm pointers. 16 and 32 byte alignment is only necessary
+ // for Neon code currently.
+ aecm->xBuf = (int16_t*) (((uintptr_t)aecm->xBuf_buf + 31) & ~ 31);
+ aecm->dBufClean = (int16_t*) (((uintptr_t)aecm->dBufClean_buf + 31) & ~ 31);
+ aecm->dBufNoisy = (int16_t*) (((uintptr_t)aecm->dBufNoisy_buf + 31) & ~ 31);
+ aecm->outBuf = (int16_t*) (((uintptr_t)aecm->outBuf_buf + 15) & ~ 15);
+ aecm->channelStored = (int16_t*) (((uintptr_t)
+ aecm->channelStored_buf + 15) & ~ 15);
+ aecm->channelAdapt16 = (int16_t*) (((uintptr_t)
+ aecm->channelAdapt16_buf + 15) & ~ 15);
+ aecm->channelAdapt32 = (int32_t*) (((uintptr_t)
+ aecm->channelAdapt32_buf + 31) & ~ 31);
+
+ return aecm;
+}
+
+void WebRtcAecm_InitEchoPathCore(AecmCore* aecm, const int16_t* echo_path) {
+ int i = 0;
+
+ // Reset the stored channel
+ memcpy(aecm->channelStored, echo_path, sizeof(int16_t) * PART_LEN1);
+ // Reset the adapted channels
+ memcpy(aecm->channelAdapt16, echo_path, sizeof(int16_t) * PART_LEN1);
+ for (i = 0; i < PART_LEN1; i++)
+ {
+ aecm->channelAdapt32[i] = (int32_t)aecm->channelAdapt16[i] << 16;
+ }
+
+ // Reset channel storing variables
+ aecm->mseAdaptOld = 1000;
+ aecm->mseStoredOld = 1000;
+ aecm->mseThreshold = WEBRTC_SPL_WORD32_MAX;
+ aecm->mseChannelCount = 0;
+}
+
+static void CalcLinearEnergiesC(AecmCore* aecm,
+ const uint16_t* far_spectrum,
+ int32_t* echo_est,
+ uint32_t* far_energy,
+ uint32_t* echo_energy_adapt,
+ uint32_t* echo_energy_stored) {
+ int i;
+
+ // Get energy for the delayed far end signal and estimated
+ // echo using both stored and adapted channels.
+ for (i = 0; i < PART_LEN1; i++)
+ {
+ echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],
+ far_spectrum[i]);
+ (*far_energy) += (uint32_t)(far_spectrum[i]);
+ *echo_energy_adapt += aecm->channelAdapt16[i] * far_spectrum[i];
+ (*echo_energy_stored) += (uint32_t)echo_est[i];
+ }
+}
+
+static void StoreAdaptiveChannelC(AecmCore* aecm,
+ const uint16_t* far_spectrum,
+ int32_t* echo_est) {
+ int i;
+
+ // During startup we store the channel every block.
+ memcpy(aecm->channelStored, aecm->channelAdapt16, sizeof(int16_t) * PART_LEN1);
+ // Recalculate echo estimate
+ for (i = 0; i < PART_LEN; i += 4)
+ {
+ echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],
+ far_spectrum[i]);
+ echo_est[i + 1] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 1],
+ far_spectrum[i + 1]);
+ echo_est[i + 2] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 2],
+ far_spectrum[i + 2]);
+ echo_est[i + 3] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 3],
+ far_spectrum[i + 3]);
+ }
+ echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],
+ far_spectrum[i]);
+}
+
+static void ResetAdaptiveChannelC(AecmCore* aecm) {
+ int i;
+
+ // The stored channel has a significantly lower MSE than the adaptive one for
+ // two consecutive calculations. Reset the adaptive channel.
+ memcpy(aecm->channelAdapt16, aecm->channelStored,
+ sizeof(int16_t) * PART_LEN1);
+ // Restore the W32 channel
+ for (i = 0; i < PART_LEN; i += 4)
+ {
+ aecm->channelAdapt32[i] = (int32_t)aecm->channelStored[i] << 16;
+ aecm->channelAdapt32[i + 1] = (int32_t)aecm->channelStored[i + 1] << 16;
+ aecm->channelAdapt32[i + 2] = (int32_t)aecm->channelStored[i + 2] << 16;
+ aecm->channelAdapt32[i + 3] = (int32_t)aecm->channelStored[i + 3] << 16;
+ }
+ aecm->channelAdapt32[i] = (int32_t)aecm->channelStored[i] << 16;
+}
+
+// Initialize function pointers for ARM Neon platform.
+#if (defined WEBRTC_DETECT_NEON || defined WEBRTC_HAS_NEON)
+static void WebRtcAecm_InitNeon(void)
+{
+ WebRtcAecm_StoreAdaptiveChannel = WebRtcAecm_StoreAdaptiveChannelNeon;
+ WebRtcAecm_ResetAdaptiveChannel = WebRtcAecm_ResetAdaptiveChannelNeon;
+ WebRtcAecm_CalcLinearEnergies = WebRtcAecm_CalcLinearEnergiesNeon;
+}
+#endif
+
+// Initialize function pointers for MIPS platform.
+#if defined(MIPS32_LE)
+static void WebRtcAecm_InitMips(void)
+{
+#if defined(MIPS_DSP_R1_LE)
+ WebRtcAecm_StoreAdaptiveChannel = WebRtcAecm_StoreAdaptiveChannel_mips;
+ WebRtcAecm_ResetAdaptiveChannel = WebRtcAecm_ResetAdaptiveChannel_mips;
+#endif
+ WebRtcAecm_CalcLinearEnergies = WebRtcAecm_CalcLinearEnergies_mips;
+}
+#endif
+
+// WebRtcAecm_InitCore(...)
+//
+// This function initializes the AECM instant created with WebRtcAecm_CreateCore(...)
+// Input:
+// - aecm : Pointer to the Echo Suppression instance
+// - samplingFreq : Sampling Frequency
+//
+// Output:
+// - aecm : Initialized instance
+//
+// Return value : 0 - Ok
+// -1 - Error
+//
+int WebRtcAecm_InitCore(AecmCore* const aecm, int samplingFreq) {
+ int i = 0;
+ int32_t tmp32 = PART_LEN1 * PART_LEN1;
+ int16_t tmp16 = PART_LEN1;
+
+ if (samplingFreq != 8000 && samplingFreq != 16000)
+ {
+ samplingFreq = 8000;
+ return -1;
+ }
+ // sanity check of sampling frequency
+ aecm->mult = (int16_t)samplingFreq / 8000;
+
+ aecm->farBufWritePos = 0;
+ aecm->farBufReadPos = 0;
+ aecm->knownDelay = 0;
+ aecm->lastKnownDelay = 0;
+
+ WebRtc_InitBuffer(aecm->farFrameBuf);
+ WebRtc_InitBuffer(aecm->nearNoisyFrameBuf);
+ WebRtc_InitBuffer(aecm->nearCleanFrameBuf);
+ WebRtc_InitBuffer(aecm->outFrameBuf);
+
+ memset(aecm->xBuf_buf, 0, sizeof(aecm->xBuf_buf));
+ memset(aecm->dBufClean_buf, 0, sizeof(aecm->dBufClean_buf));
+ memset(aecm->dBufNoisy_buf, 0, sizeof(aecm->dBufNoisy_buf));
+ memset(aecm->outBuf_buf, 0, sizeof(aecm->outBuf_buf));
+
+ aecm->seed = 666;
+ aecm->totCount = 0;
+
+ if (WebRtc_InitDelayEstimatorFarend(aecm->delay_estimator_farend) != 0) {
+ return -1;
+ }
+ if (WebRtc_InitDelayEstimator(aecm->delay_estimator) != 0) {
+ return -1;
+ }
+ // Set far end histories to zero
+ memset(aecm->far_history, 0, sizeof(uint16_t) * PART_LEN1 * MAX_DELAY);
+ memset(aecm->far_q_domains, 0, sizeof(int) * MAX_DELAY);
+ aecm->far_history_pos = MAX_DELAY;
+
+ aecm->nlpFlag = 1;
+ aecm->fixedDelay = -1;
+
+ aecm->dfaCleanQDomain = 0;
+ aecm->dfaCleanQDomainOld = 0;
+ aecm->dfaNoisyQDomain = 0;
+ aecm->dfaNoisyQDomainOld = 0;
+
+ memset(aecm->nearLogEnergy, 0, sizeof(aecm->nearLogEnergy));
+ aecm->farLogEnergy = 0;
+ memset(aecm->echoAdaptLogEnergy, 0, sizeof(aecm->echoAdaptLogEnergy));
+ memset(aecm->echoStoredLogEnergy, 0, sizeof(aecm->echoStoredLogEnergy));
+
+ // Initialize the echo channels with a stored shape.
+ if (samplingFreq == 8000)
+ {
+ WebRtcAecm_InitEchoPathCore(aecm, kChannelStored8kHz);
+ }
+ else
+ {
+ WebRtcAecm_InitEchoPathCore(aecm, kChannelStored16kHz);
+ }
+
+ memset(aecm->echoFilt, 0, sizeof(aecm->echoFilt));
+ memset(aecm->nearFilt, 0, sizeof(aecm->nearFilt));
+ aecm->noiseEstCtr = 0;
+
+ aecm->cngMode = AecmTrue;
+
+ memset(aecm->noiseEstTooLowCtr, 0, sizeof(aecm->noiseEstTooLowCtr));
+ memset(aecm->noiseEstTooHighCtr, 0, sizeof(aecm->noiseEstTooHighCtr));
+ // Shape the initial noise level to an approximate pink noise.
+ for (i = 0; i < (PART_LEN1 >> 1) - 1; i++)
+ {
+ aecm->noiseEst[i] = (tmp32 << 8);
+ tmp16--;
+ tmp32 -= (int32_t)((tmp16 << 1) + 1);
+ }
+ for (; i < PART_LEN1; i++)
+ {
+ aecm->noiseEst[i] = (tmp32 << 8);
+ }
+
+ aecm->farEnergyMin = WEBRTC_SPL_WORD16_MAX;
+ aecm->farEnergyMax = WEBRTC_SPL_WORD16_MIN;
+ aecm->farEnergyMaxMin = 0;
+ aecm->farEnergyVAD = FAR_ENERGY_MIN; // This prevents false speech detection at the
+ // beginning.
+ aecm->farEnergyMSE = 0;
+ aecm->currentVADValue = 0;
+ aecm->vadUpdateCount = 0;
+ aecm->firstVAD = 1;
+
+ aecm->startupState = 0;
+ aecm->supGain = SUPGAIN_DEFAULT;
+ aecm->supGainOld = SUPGAIN_DEFAULT;
+
+ aecm->supGainErrParamA = SUPGAIN_ERROR_PARAM_A;
+ aecm->supGainErrParamD = SUPGAIN_ERROR_PARAM_D;
+ aecm->supGainErrParamDiffAB = SUPGAIN_ERROR_PARAM_A - SUPGAIN_ERROR_PARAM_B;
+ aecm->supGainErrParamDiffBD = SUPGAIN_ERROR_PARAM_B - SUPGAIN_ERROR_PARAM_D;
+
+ // Assert a preprocessor definition at compile-time. It's an assumption
+ // used in assembly code, so check the assembly files before any change.
+ COMPILE_ASSERT(PART_LEN % 16 == 0);
+
+ // Initialize function pointers.
+ WebRtcAecm_CalcLinearEnergies = CalcLinearEnergiesC;
+ WebRtcAecm_StoreAdaptiveChannel = StoreAdaptiveChannelC;
+ WebRtcAecm_ResetAdaptiveChannel = ResetAdaptiveChannelC;
+
+#ifdef WEBRTC_DETECT_NEON
+ uint64_t features = WebRtc_GetCPUFeaturesARM();
+ if ((features & kCPUFeatureNEON) != 0)
+ {
+ WebRtcAecm_InitNeon();
+ }
+#elif defined(WEBRTC_HAS_NEON)
+ WebRtcAecm_InitNeon();
+#endif
+
+#if defined(MIPS32_LE)
+ WebRtcAecm_InitMips();
+#endif
+ return 0;
+}
+
+// TODO(bjornv): This function is currently not used. Add support for these
+// parameters from a higher level
+int WebRtcAecm_Control(AecmCore* aecm, int delay, int nlpFlag) {
+ aecm->nlpFlag = nlpFlag;
+ aecm->fixedDelay = delay;
+
+ return 0;
+}
+
+void WebRtcAecm_FreeCore(AecmCore* aecm) {
+ if (aecm == NULL) {
+ return;
+ }
+
+ WebRtc_FreeBuffer(aecm->farFrameBuf);
+ WebRtc_FreeBuffer(aecm->nearNoisyFrameBuf);
+ WebRtc_FreeBuffer(aecm->nearCleanFrameBuf);
+ WebRtc_FreeBuffer(aecm->outFrameBuf);
+
+ WebRtc_FreeDelayEstimator(aecm->delay_estimator);
+ WebRtc_FreeDelayEstimatorFarend(aecm->delay_estimator_farend);
+ WebRtcSpl_FreeRealFFT(aecm->real_fft);
+
+ free(aecm);
+}
+
+int WebRtcAecm_ProcessFrame(AecmCore* aecm,
+ const int16_t* farend,
+ const int16_t* nearendNoisy,
+ const int16_t* nearendClean,
+ int16_t* out) {
+ int16_t outBlock_buf[PART_LEN + 8]; // Align buffer to 8-byte boundary.
+ int16_t* outBlock = (int16_t*) (((uintptr_t) outBlock_buf + 15) & ~ 15);
+
+ int16_t farFrame[FRAME_LEN];
+ const int16_t* out_ptr = NULL;
+ int size = 0;
+
+ // Buffer the current frame.
+ // Fetch an older one corresponding to the delay.
+ WebRtcAecm_BufferFarFrame(aecm, farend, FRAME_LEN);
+ WebRtcAecm_FetchFarFrame(aecm, farFrame, FRAME_LEN, aecm->knownDelay);
+
+ // Buffer the synchronized far and near frames,
+ // to pass the smaller blocks individually.
+ WebRtc_WriteBuffer(aecm->farFrameBuf, farFrame, FRAME_LEN);
+ WebRtc_WriteBuffer(aecm->nearNoisyFrameBuf, nearendNoisy, FRAME_LEN);
+ if (nearendClean != NULL)
+ {
+ WebRtc_WriteBuffer(aecm->nearCleanFrameBuf, nearendClean, FRAME_LEN);
+ }
+
+ // Process as many blocks as possible.
+ while (WebRtc_available_read(aecm->farFrameBuf) >= PART_LEN)
+ {
+ int16_t far_block[PART_LEN];
+ const int16_t* far_block_ptr = NULL;
+ int16_t near_noisy_block[PART_LEN];
+ const int16_t* near_noisy_block_ptr = NULL;
+
+ WebRtc_ReadBuffer(aecm->farFrameBuf, (void**) &far_block_ptr, far_block,
+ PART_LEN);
+ WebRtc_ReadBuffer(aecm->nearNoisyFrameBuf,
+ (void**) &near_noisy_block_ptr,
+ near_noisy_block,
+ PART_LEN);
+ if (nearendClean != NULL)
+ {
+ int16_t near_clean_block[PART_LEN];
+ const int16_t* near_clean_block_ptr = NULL;
+
+ WebRtc_ReadBuffer(aecm->nearCleanFrameBuf,
+ (void**) &near_clean_block_ptr,
+ near_clean_block,
+ PART_LEN);
+ if (WebRtcAecm_ProcessBlock(aecm,
+ far_block_ptr,
+ near_noisy_block_ptr,
+ near_clean_block_ptr,
+ outBlock) == -1)
+ {
+ return -1;
+ }
+ } else
+ {
+ if (WebRtcAecm_ProcessBlock(aecm,
+ far_block_ptr,
+ near_noisy_block_ptr,
+ NULL,
+ outBlock) == -1)
+ {
+ return -1;
+ }
+ }
+
+ WebRtc_WriteBuffer(aecm->outFrameBuf, outBlock, PART_LEN);
+ }
+
+ // Stuff the out buffer if we have less than a frame to output.
+ // This should only happen for the first frame.
+ size = (int) WebRtc_available_read(aecm->outFrameBuf);
+ if (size < FRAME_LEN)
+ {
+ WebRtc_MoveReadPtr(aecm->outFrameBuf, size - FRAME_LEN);
+ }
+
+ // Obtain an output frame.
+ WebRtc_ReadBuffer(aecm->outFrameBuf, (void**) &out_ptr, out, FRAME_LEN);
+ if (out_ptr != out) {
+ // ReadBuffer() hasn't copied to |out| in this case.
+ memcpy(out, out_ptr, FRAME_LEN * sizeof(int16_t));
+ }
+
+ return 0;
+}
+
+// WebRtcAecm_AsymFilt(...)
+//
+// Performs asymmetric filtering.
+//
+// Inputs:
+// - filtOld : Previous filtered value.
+// - inVal : New input value.
+// - stepSizePos : Step size when we have a positive contribution.
+// - stepSizeNeg : Step size when we have a negative contribution.
+//
+// Output:
+//
+// Return: - Filtered value.
+//
+int16_t WebRtcAecm_AsymFilt(const int16_t filtOld, const int16_t inVal,
+ const int16_t stepSizePos,
+ const int16_t stepSizeNeg)
+{
+ int16_t retVal;
+
+ if ((filtOld == WEBRTC_SPL_WORD16_MAX) | (filtOld == WEBRTC_SPL_WORD16_MIN))
+ {
+ return inVal;
+ }
+ retVal = filtOld;
+ if (filtOld > inVal)
+ {
+ retVal -= (filtOld - inVal) >> stepSizeNeg;
+ } else
+ {
+ retVal += (inVal - filtOld) >> stepSizePos;
+ }
+
+ return retVal;
+}
+
+// ExtractFractionPart(a, zeros)
+//
+// returns the fraction part of |a|, with |zeros| number of leading zeros, as an
+// int16_t scaled to Q8. There is no sanity check of |a| in the sense that the
+// number of zeros match.
+static int16_t ExtractFractionPart(uint32_t a, int zeros) {
+ return (int16_t)(((a << zeros) & 0x7FFFFFFF) >> 23);
+}
+
+// Calculates and returns the log of |energy| in Q8. The input |energy| is
+// supposed to be in Q(|q_domain|).
+static int16_t LogOfEnergyInQ8(uint32_t energy, int q_domain) {
+ static const int16_t kLogLowValue = PART_LEN_SHIFT << 7;
+ int16_t log_energy_q8 = kLogLowValue;
+ if (energy > 0) {
+ int zeros = WebRtcSpl_NormU32(energy);
+ int16_t frac = ExtractFractionPart(energy, zeros);
+ // log2 of |energy| in Q8.
+ log_energy_q8 += ((31 - zeros) << 8) + frac - (q_domain << 8);
+ }
+ return log_energy_q8;
+}
+
+// WebRtcAecm_CalcEnergies(...)
+//
+// This function calculates the log of energies for nearend, farend and estimated
+// echoes. There is also an update of energy decision levels, i.e. internal VAD.
+//
+//
+// @param aecm [i/o] Handle of the AECM instance.
+// @param far_spectrum [in] Pointer to farend spectrum.
+// @param far_q [in] Q-domain of farend spectrum.
+// @param nearEner [in] Near end energy for current block in
+// Q(aecm->dfaQDomain).
+// @param echoEst [out] Estimated echo in Q(xfa_q+RESOLUTION_CHANNEL16).
+//
+void WebRtcAecm_CalcEnergies(AecmCore* aecm,
+ const uint16_t* far_spectrum,
+ const int16_t far_q,
+ const uint32_t nearEner,
+ int32_t* echoEst) {
+ // Local variables
+ uint32_t tmpAdapt = 0;
+ uint32_t tmpStored = 0;
+ uint32_t tmpFar = 0;
+
+ int i;
+
+ int16_t tmp16;
+ int16_t increase_max_shifts = 4;
+ int16_t decrease_max_shifts = 11;
+ int16_t increase_min_shifts = 11;
+ int16_t decrease_min_shifts = 3;
+
+ // Get log of near end energy and store in buffer
+
+ // Shift buffer
+ memmove(aecm->nearLogEnergy + 1, aecm->nearLogEnergy,
+ sizeof(int16_t) * (MAX_BUF_LEN - 1));
+
+ // Logarithm of integrated magnitude spectrum (nearEner)
+ aecm->nearLogEnergy[0] = LogOfEnergyInQ8(nearEner, aecm->dfaNoisyQDomain);
+
+ WebRtcAecm_CalcLinearEnergies(aecm, far_spectrum, echoEst, &tmpFar, &tmpAdapt, &tmpStored);
+
+ // Shift buffers
+ memmove(aecm->echoAdaptLogEnergy + 1, aecm->echoAdaptLogEnergy,
+ sizeof(int16_t) * (MAX_BUF_LEN - 1));
+ memmove(aecm->echoStoredLogEnergy + 1, aecm->echoStoredLogEnergy,
+ sizeof(int16_t) * (MAX_BUF_LEN - 1));
+
+ // Logarithm of delayed far end energy
+ aecm->farLogEnergy = LogOfEnergyInQ8(tmpFar, far_q);
+
+ // Logarithm of estimated echo energy through adapted channel
+ aecm->echoAdaptLogEnergy[0] = LogOfEnergyInQ8(tmpAdapt,
+ RESOLUTION_CHANNEL16 + far_q);
+
+ // Logarithm of estimated echo energy through stored channel
+ aecm->echoStoredLogEnergy[0] =
+ LogOfEnergyInQ8(tmpStored, RESOLUTION_CHANNEL16 + far_q);
+
+ // Update farend energy levels (min, max, vad, mse)
+ if (aecm->farLogEnergy > FAR_ENERGY_MIN)
+ {
+ if (aecm->startupState == 0)
+ {
+ increase_max_shifts = 2;
+ decrease_min_shifts = 2;
+ increase_min_shifts = 8;
+ }
+
+ aecm->farEnergyMin = WebRtcAecm_AsymFilt(aecm->farEnergyMin, aecm->farLogEnergy,
+ increase_min_shifts, decrease_min_shifts);
+ aecm->farEnergyMax = WebRtcAecm_AsymFilt(aecm->farEnergyMax, aecm->farLogEnergy,
+ increase_max_shifts, decrease_max_shifts);
+ aecm->farEnergyMaxMin = (aecm->farEnergyMax - aecm->farEnergyMin);
+
+ // Dynamic VAD region size
+ tmp16 = 2560 - aecm->farEnergyMin;
+ if (tmp16 > 0)
+ {
+ tmp16 = (int16_t)((tmp16 * FAR_ENERGY_VAD_REGION) >> 9);
+ } else
+ {
+ tmp16 = 0;
+ }
+ tmp16 += FAR_ENERGY_VAD_REGION;
+
+ if ((aecm->startupState == 0) | (aecm->vadUpdateCount > 1024))
+ {
+ // In startup phase or VAD update halted
+ aecm->farEnergyVAD = aecm->farEnergyMin + tmp16;
+ } else
+ {
+ if (aecm->farEnergyVAD > aecm->farLogEnergy)
+ {
+ aecm->farEnergyVAD +=
+ (aecm->farLogEnergy + tmp16 - aecm->farEnergyVAD) >> 6;
+ aecm->vadUpdateCount = 0;
+ } else
+ {
+ aecm->vadUpdateCount++;
+ }
+ }
+ // Put MSE threshold higher than VAD
+ aecm->farEnergyMSE = aecm->farEnergyVAD + (1 << 8);
+ }
+
+ // Update VAD variables
+ if (aecm->farLogEnergy > aecm->farEnergyVAD)
+ {
+ if ((aecm->startupState == 0) | (aecm->farEnergyMaxMin > FAR_ENERGY_DIFF))
+ {
+ // We are in startup or have significant dynamics in input speech level
+ aecm->currentVADValue = 1;
+ }
+ } else
+ {
+ aecm->currentVADValue = 0;
+ }
+ if ((aecm->currentVADValue) && (aecm->firstVAD))
+ {
+ aecm->firstVAD = 0;
+ if (aecm->echoAdaptLogEnergy[0] > aecm->nearLogEnergy[0])
+ {
+ // The estimated echo has higher energy than the near end signal.
+ // This means that the initialization was too aggressive. Scale
+ // down by a factor 8
+ for (i = 0; i < PART_LEN1; i++)
+ {
+ aecm->channelAdapt16[i] >>= 3;
+ }
+ // Compensate the adapted echo energy level accordingly.
+ aecm->echoAdaptLogEnergy[0] -= (3 << 8);
+ aecm->firstVAD = 1;
+ }
+ }
+}
+
+// WebRtcAecm_CalcStepSize(...)
+//
+// This function calculates the step size used in channel estimation
+//
+//
+// @param aecm [in] Handle of the AECM instance.
+// @param mu [out] (Return value) Stepsize in log2(), i.e. number of shifts.
+//
+//
+int16_t WebRtcAecm_CalcStepSize(AecmCore* const aecm) {
+ int32_t tmp32;
+ int16_t tmp16;
+ int16_t mu = MU_MAX;
+
+ // Here we calculate the step size mu used in the
+ // following NLMS based Channel estimation algorithm
+ if (!aecm->currentVADValue)
+ {
+ // Far end energy level too low, no channel update
+ mu = 0;
+ } else if (aecm->startupState > 0)
+ {
+ if (aecm->farEnergyMin >= aecm->farEnergyMax)
+ {
+ mu = MU_MIN;
+ } else
+ {
+ tmp16 = (aecm->farLogEnergy - aecm->farEnergyMin);
+ tmp32 = tmp16 * MU_DIFF;
+ tmp32 = WebRtcSpl_DivW32W16(tmp32, aecm->farEnergyMaxMin);
+ mu = MU_MIN - 1 - (int16_t)(tmp32);
+ // The -1 is an alternative to rounding. This way we get a larger
+ // stepsize, so we in some sense compensate for truncation in NLMS
+ }
+ if (mu < MU_MAX)
+ {
+ mu = MU_MAX; // Equivalent with maximum step size of 2^-MU_MAX
+ }
+ }
+
+ return mu;
+}
+
+// WebRtcAecm_UpdateChannel(...)
+//
+// This function performs channel estimation. NLMS and decision on channel storage.
+//
+//
+// @param aecm [i/o] Handle of the AECM instance.
+// @param far_spectrum [in] Absolute value of the farend signal in Q(far_q)
+// @param far_q [in] Q-domain of the farend signal
+// @param dfa [in] Absolute value of the nearend signal (Q[aecm->dfaQDomain])
+// @param mu [in] NLMS step size.
+// @param echoEst [i/o] Estimated echo in Q(far_q+RESOLUTION_CHANNEL16).
+//
+void WebRtcAecm_UpdateChannel(AecmCore* aecm,
+ const uint16_t* far_spectrum,
+ const int16_t far_q,
+ const uint16_t* const dfa,
+ const int16_t mu,
+ int32_t* echoEst) {
+ uint32_t tmpU32no1, tmpU32no2;
+ int32_t tmp32no1, tmp32no2;
+ int32_t mseStored;
+ int32_t mseAdapt;
+
+ int i;
+
+ int16_t zerosFar, zerosNum, zerosCh, zerosDfa;
+ int16_t shiftChFar, shiftNum, shift2ResChan;
+ int16_t tmp16no1;
+ int16_t xfaQ, dfaQ;
+
+ // This is the channel estimation algorithm. It is base on NLMS but has a variable step
+ // length, which was calculated above.
+ if (mu)
+ {
+ for (i = 0; i < PART_LEN1; i++)
+ {
+ // Determine norm of channel and farend to make sure we don't get overflow in
+ // multiplication
+ zerosCh = WebRtcSpl_NormU32(aecm->channelAdapt32[i]);
+ zerosFar = WebRtcSpl_NormU32((uint32_t)far_spectrum[i]);
+ if (zerosCh + zerosFar > 31)
+ {
+ // Multiplication is safe
+ tmpU32no1 = WEBRTC_SPL_UMUL_32_16(aecm->channelAdapt32[i],
+ far_spectrum[i]);
+ shiftChFar = 0;
+ } else
+ {
+ // We need to shift down before multiplication
+ shiftChFar = 32 - zerosCh - zerosFar;
+ tmpU32no1 = (aecm->channelAdapt32[i] >> shiftChFar) *
+ far_spectrum[i];
+ }
+ // Determine Q-domain of numerator
+ zerosNum = WebRtcSpl_NormU32(tmpU32no1);
+ if (dfa[i])
+ {
+ zerosDfa = WebRtcSpl_NormU32((uint32_t)dfa[i]);
+ } else
+ {
+ zerosDfa = 32;
+ }
+ tmp16no1 = zerosDfa - 2 + aecm->dfaNoisyQDomain -
+ RESOLUTION_CHANNEL32 - far_q + shiftChFar;
+ if (zerosNum > tmp16no1 + 1)
+ {
+ xfaQ = tmp16no1;
+ dfaQ = zerosDfa - 2;
+ } else
+ {
+ xfaQ = zerosNum - 2;
+ dfaQ = RESOLUTION_CHANNEL32 + far_q - aecm->dfaNoisyQDomain -
+ shiftChFar + xfaQ;
+ }
+ // Add in the same Q-domain
+ tmpU32no1 = WEBRTC_SPL_SHIFT_W32(tmpU32no1, xfaQ);
+ tmpU32no2 = WEBRTC_SPL_SHIFT_W32((uint32_t)dfa[i], dfaQ);
+ tmp32no1 = (int32_t)tmpU32no2 - (int32_t)tmpU32no1;
+ zerosNum = WebRtcSpl_NormW32(tmp32no1);
+ if ((tmp32no1) && (far_spectrum[i] > (CHANNEL_VAD << far_q)))
+ {
+ //
+ // Update is needed
+ //
+ // This is what we would like to compute
+ //
+ // tmp32no1 = dfa[i] - (aecm->channelAdapt[i] * far_spectrum[i])
+ // tmp32norm = (i + 1)
+ // aecm->channelAdapt[i] += (2^mu) * tmp32no1
+ // / (tmp32norm * far_spectrum[i])
+ //
+
+ // Make sure we don't get overflow in multiplication.
+ if (zerosNum + zerosFar > 31)
+ {
+ if (tmp32no1 > 0)
+ {
+ tmp32no2 = (int32_t)WEBRTC_SPL_UMUL_32_16(tmp32no1,
+ far_spectrum[i]);
+ } else
+ {
+ tmp32no2 = -(int32_t)WEBRTC_SPL_UMUL_32_16(-tmp32no1,
+ far_spectrum[i]);
+ }
+ shiftNum = 0;
+ } else
+ {
+ shiftNum = 32 - (zerosNum + zerosFar);
+ if (tmp32no1 > 0)
+ {
+ tmp32no2 = (tmp32no1 >> shiftNum) * far_spectrum[i];
+ } else
+ {
+ tmp32no2 = -((-tmp32no1 >> shiftNum) * far_spectrum[i]);
+ }
+ }
+ // Normalize with respect to frequency bin
+ tmp32no2 = WebRtcSpl_DivW32W16(tmp32no2, i + 1);
+ // Make sure we are in the right Q-domain
+ shift2ResChan = shiftNum + shiftChFar - xfaQ - mu - ((30 - zerosFar) << 1);
+ if (WebRtcSpl_NormW32(tmp32no2) < shift2ResChan)
+ {
+ tmp32no2 = WEBRTC_SPL_WORD32_MAX;
+ } else
+ {
+ tmp32no2 = WEBRTC_SPL_SHIFT_W32(tmp32no2, shift2ResChan);
+ }
+ aecm->channelAdapt32[i] =
+ WebRtcSpl_AddSatW32(aecm->channelAdapt32[i], tmp32no2);
+ if (aecm->channelAdapt32[i] < 0)
+ {
+ // We can never have negative channel gain
+ aecm->channelAdapt32[i] = 0;
+ }
+ aecm->channelAdapt16[i] =
+ (int16_t)(aecm->channelAdapt32[i] >> 16);
+ }
+ }
+ }
+ // END: Adaptive channel update
+
+ // Determine if we should store or restore the channel
+ if ((aecm->startupState == 0) & (aecm->currentVADValue))
+ {
+ // During startup we store the channel every block,
+ // and we recalculate echo estimate
+ WebRtcAecm_StoreAdaptiveChannel(aecm, far_spectrum, echoEst);
+ } else
+ {
+ if (aecm->farLogEnergy < aecm->farEnergyMSE)
+ {
+ aecm->mseChannelCount = 0;
+ } else
+ {
+ aecm->mseChannelCount++;
+ }
+ // Enough data for validation. Store channel if we can.
+ if (aecm->mseChannelCount >= (MIN_MSE_COUNT + 10))
+ {
+ // We have enough data.
+ // Calculate MSE of "Adapt" and "Stored" versions.
+ // It is actually not MSE, but average absolute error.
+ mseStored = 0;
+ mseAdapt = 0;
+ for (i = 0; i < MIN_MSE_COUNT; i++)
+ {
+ tmp32no1 = ((int32_t)aecm->echoStoredLogEnergy[i]
+ - (int32_t)aecm->nearLogEnergy[i]);
+ tmp32no2 = WEBRTC_SPL_ABS_W32(tmp32no1);
+ mseStored += tmp32no2;
+
+ tmp32no1 = ((int32_t)aecm->echoAdaptLogEnergy[i]
+ - (int32_t)aecm->nearLogEnergy[i]);
+ tmp32no2 = WEBRTC_SPL_ABS_W32(tmp32no1);
+ mseAdapt += tmp32no2;
+ }
+ if (((mseStored << MSE_RESOLUTION) < (MIN_MSE_DIFF * mseAdapt))
+ & ((aecm->mseStoredOld << MSE_RESOLUTION) < (MIN_MSE_DIFF
+ * aecm->mseAdaptOld)))
+ {
+ // The stored channel has a significantly lower MSE than the adaptive one for
+ // two consecutive calculations. Reset the adaptive channel.
+ WebRtcAecm_ResetAdaptiveChannel(aecm);
+ } else if (((MIN_MSE_DIFF * mseStored) > (mseAdapt << MSE_RESOLUTION)) & (mseAdapt
+ < aecm->mseThreshold) & (aecm->mseAdaptOld < aecm->mseThreshold))
+ {
+ // The adaptive channel has a significantly lower MSE than the stored one.
+ // The MSE for the adaptive channel has also been low for two consecutive
+ // calculations. Store the adaptive channel.
+ WebRtcAecm_StoreAdaptiveChannel(aecm, far_spectrum, echoEst);
+
+ // Update threshold
+ if (aecm->mseThreshold == WEBRTC_SPL_WORD32_MAX)
+ {
+ aecm->mseThreshold = (mseAdapt + aecm->mseAdaptOld);
+ } else
+ {
+ int scaled_threshold = aecm->mseThreshold * 5 / 8;
+ aecm->mseThreshold +=
+ ((mseAdapt - scaled_threshold) * 205) >> 8;
+ }
+
+ }
+
+ // Reset counter
+ aecm->mseChannelCount = 0;
+
+ // Store the MSE values.
+ aecm->mseStoredOld = mseStored;
+ aecm->mseAdaptOld = mseAdapt;
+ }
+ }
+ // END: Determine if we should store or reset channel estimate.
+}
+
+// CalcSuppressionGain(...)
+//
+// This function calculates the suppression gain that is used in the Wiener filter.
+//
+//
+// @param aecm [i/n] Handle of the AECM instance.
+// @param supGain [out] (Return value) Suppression gain with which to scale the noise
+// level (Q14).
+//
+//
+int16_t WebRtcAecm_CalcSuppressionGain(AecmCore* const aecm) {
+ int32_t tmp32no1;
+
+ int16_t supGain = SUPGAIN_DEFAULT;
+ int16_t tmp16no1;
+ int16_t dE = 0;
+
+ // Determine suppression gain used in the Wiener filter. The gain is based on a mix of far
+ // end energy and echo estimation error.
+ // Adjust for the far end signal level. A low signal level indicates no far end signal,
+ // hence we set the suppression gain to 0
+ if (!aecm->currentVADValue)
+ {
+ supGain = 0;
+ } else
+ {
+ // Adjust for possible double talk. If we have large variations in estimation error we
+ // likely have double talk (or poor channel).
+ tmp16no1 = (aecm->nearLogEnergy[0] - aecm->echoStoredLogEnergy[0] - ENERGY_DEV_OFFSET);
+ dE = WEBRTC_SPL_ABS_W16(tmp16no1);
+
+ if (dE < ENERGY_DEV_TOL)
+ {
+ // Likely no double talk. The better estimation, the more we can suppress signal.
+ // Update counters
+ if (dE < SUPGAIN_EPC_DT)
+ {
+ tmp32no1 = aecm->supGainErrParamDiffAB * dE;
+ tmp32no1 += (SUPGAIN_EPC_DT >> 1);
+ tmp16no1 = (int16_t)WebRtcSpl_DivW32W16(tmp32no1, SUPGAIN_EPC_DT);
+ supGain = aecm->supGainErrParamA - tmp16no1;
+ } else
+ {
+ tmp32no1 = aecm->supGainErrParamDiffBD * (ENERGY_DEV_TOL - dE);
+ tmp32no1 += ((ENERGY_DEV_TOL - SUPGAIN_EPC_DT) >> 1);
+ tmp16no1 = (int16_t)WebRtcSpl_DivW32W16(tmp32no1, (ENERGY_DEV_TOL
+ - SUPGAIN_EPC_DT));
+ supGain = aecm->supGainErrParamD + tmp16no1;
+ }
+ } else
+ {
+ // Likely in double talk. Use default value
+ supGain = aecm->supGainErrParamD;
+ }
+ }
+
+ if (supGain > aecm->supGainOld)
+ {
+ tmp16no1 = supGain;
+ } else
+ {
+ tmp16no1 = aecm->supGainOld;
+ }
+ aecm->supGainOld = supGain;
+ if (tmp16no1 < aecm->supGain)
+ {
+ aecm->supGain += (int16_t)((tmp16no1 - aecm->supGain) >> 4);
+ } else
+ {
+ aecm->supGain += (int16_t)((tmp16no1 - aecm->supGain) >> 4);
+ }
+
+ // END: Update suppression gain
+
+ return aecm->supGain;
+}
+
+void WebRtcAecm_BufferFarFrame(AecmCore* const aecm,
+ const int16_t* const farend,
+ const int farLen) {
+ int writeLen = farLen, writePos = 0;
+
+ // Check if the write position must be wrapped
+ while (aecm->farBufWritePos + writeLen > FAR_BUF_LEN)
+ {
+ // Write to remaining buffer space before wrapping
+ writeLen = FAR_BUF_LEN - aecm->farBufWritePos;
+ memcpy(aecm->farBuf + aecm->farBufWritePos, farend + writePos,
+ sizeof(int16_t) * writeLen);
+ aecm->farBufWritePos = 0;
+ writePos = writeLen;
+ writeLen = farLen - writeLen;
+ }
+
+ memcpy(aecm->farBuf + aecm->farBufWritePos, farend + writePos,
+ sizeof(int16_t) * writeLen);
+ aecm->farBufWritePos += writeLen;
+}
+
+void WebRtcAecm_FetchFarFrame(AecmCore* const aecm,
+ int16_t* const farend,
+ const int farLen,
+ const int knownDelay) {
+ int readLen = farLen;
+ int readPos = 0;
+ int delayChange = knownDelay - aecm->lastKnownDelay;
+
+ aecm->farBufReadPos -= delayChange;
+
+ // Check if delay forces a read position wrap
+ while (aecm->farBufReadPos < 0)
+ {
+ aecm->farBufReadPos += FAR_BUF_LEN;
+ }
+ while (aecm->farBufReadPos > FAR_BUF_LEN - 1)
+ {
+ aecm->farBufReadPos -= FAR_BUF_LEN;
+ }
+
+ aecm->lastKnownDelay = knownDelay;
+
+ // Check if read position must be wrapped
+ while (aecm->farBufReadPos + readLen > FAR_BUF_LEN)
+ {
+
+ // Read from remaining buffer space before wrapping
+ readLen = FAR_BUF_LEN - aecm->farBufReadPos;
+ memcpy(farend + readPos, aecm->farBuf + aecm->farBufReadPos,
+ sizeof(int16_t) * readLen);
+ aecm->farBufReadPos = 0;
+ readPos = readLen;
+ readLen = farLen - readLen;
+ }
+ memcpy(farend + readPos, aecm->farBuf + aecm->farBufReadPos,
+ sizeof(int16_t) * readLen);
+ aecm->farBufReadPos += readLen;
+}
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core.h b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core.h
new file mode 100644
index 00000000..b52bb62d
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core.h
@@ -0,0 +1,434 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+// Performs echo control (suppression) with fft routines in fixed-point.
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AECM_AECM_CORE_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AECM_AECM_CORE_H_
+
+#include "webrtc/common_audio/ring_buffer.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_processing/aecm/aecm_defines.h"
+#include "webrtc/typedefs.h"
+
+#ifdef _MSC_VER // visual c++
+#define ALIGN8_BEG __declspec(align(8))
+#define ALIGN8_END
+#else // gcc or icc
+#define ALIGN8_BEG
+#define ALIGN8_END __attribute__((aligned(8)))
+#endif
+
+typedef struct {
+ int16_t real;
+ int16_t imag;
+} ComplexInt16;
+
+typedef struct {
+ int farBufWritePos;
+ int farBufReadPos;
+ int knownDelay;
+ int lastKnownDelay;
+ int firstVAD; // Parameter to control poorly initialized channels
+
+ RingBuffer* farFrameBuf;
+ RingBuffer* nearNoisyFrameBuf;
+ RingBuffer* nearCleanFrameBuf;
+ RingBuffer* outFrameBuf;
+
+ int16_t farBuf[FAR_BUF_LEN];
+
+ int16_t mult;
+ uint32_t seed;
+
+ // Delay estimation variables
+ void* delay_estimator_farend;
+ void* delay_estimator;
+ uint16_t currentDelay;
+ // Far end history variables
+ // TODO(bjornv): Replace |far_history| with ring_buffer.
+ uint16_t far_history[PART_LEN1 * MAX_DELAY];
+ int far_history_pos;
+ int far_q_domains[MAX_DELAY];
+
+ int16_t nlpFlag;
+ int16_t fixedDelay;
+
+ uint32_t totCount;
+
+ int16_t dfaCleanQDomain;
+ int16_t dfaCleanQDomainOld;
+ int16_t dfaNoisyQDomain;
+ int16_t dfaNoisyQDomainOld;
+
+ int16_t nearLogEnergy[MAX_BUF_LEN];
+ int16_t farLogEnergy;
+ int16_t echoAdaptLogEnergy[MAX_BUF_LEN];
+ int16_t echoStoredLogEnergy[MAX_BUF_LEN];
+
+ // The extra 16 or 32 bytes in the following buffers are for alignment based
+ // Neon code.
+ // It's designed this way since the current GCC compiler can't align a
+ // buffer in 16 or 32 byte boundaries properly.
+ int16_t channelStored_buf[PART_LEN1 + 8];
+ int16_t channelAdapt16_buf[PART_LEN1 + 8];
+ int32_t channelAdapt32_buf[PART_LEN1 + 8];
+ int16_t xBuf_buf[PART_LEN2 + 16]; // farend
+ int16_t dBufClean_buf[PART_LEN2 + 16]; // nearend
+ int16_t dBufNoisy_buf[PART_LEN2 + 16]; // nearend
+ int16_t outBuf_buf[PART_LEN + 8];
+
+ // Pointers to the above buffers
+ int16_t *channelStored;
+ int16_t *channelAdapt16;
+ int32_t *channelAdapt32;
+ int16_t *xBuf;
+ int16_t *dBufClean;
+ int16_t *dBufNoisy;
+ int16_t *outBuf;
+
+ int32_t echoFilt[PART_LEN1];
+ int16_t nearFilt[PART_LEN1];
+ int32_t noiseEst[PART_LEN1];
+ int noiseEstTooLowCtr[PART_LEN1];
+ int noiseEstTooHighCtr[PART_LEN1];
+ int16_t noiseEstCtr;
+ int16_t cngMode;
+
+ int32_t mseAdaptOld;
+ int32_t mseStoredOld;
+ int32_t mseThreshold;
+
+ int16_t farEnergyMin;
+ int16_t farEnergyMax;
+ int16_t farEnergyMaxMin;
+ int16_t farEnergyVAD;
+ int16_t farEnergyMSE;
+ int currentVADValue;
+ int16_t vadUpdateCount;
+
+ int16_t startupState;
+ int16_t mseChannelCount;
+ int16_t supGain;
+ int16_t supGainOld;
+
+ int16_t supGainErrParamA;
+ int16_t supGainErrParamD;
+ int16_t supGainErrParamDiffAB;
+ int16_t supGainErrParamDiffBD;
+
+ struct RealFFT* real_fft;
+
+#ifdef AEC_DEBUG
+ FILE *farFile;
+ FILE *nearFile;
+ FILE *outFile;
+#endif
+} AecmCore;
+
+////////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_CreateCore()
+//
+// Allocates the memory needed by the AECM. The memory needs to be
+// initialized separately using the WebRtcAecm_InitCore() function.
+// Returns a pointer to the instance and a nullptr at failure.
+AecmCore* WebRtcAecm_CreateCore();
+
+////////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_InitCore(...)
+//
+// This function initializes the AECM instant created with
+// WebRtcAecm_CreateCore()
+// Input:
+// - aecm : Pointer to the AECM instance
+// - samplingFreq : Sampling Frequency
+//
+// Output:
+// - aecm : Initialized instance
+//
+// Return value : 0 - Ok
+// -1 - Error
+//
+int WebRtcAecm_InitCore(AecmCore* const aecm, int samplingFreq);
+
+////////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_FreeCore(...)
+//
+// This function releases the memory allocated by WebRtcAecm_CreateCore()
+// Input:
+// - aecm : Pointer to the AECM instance
+//
+void WebRtcAecm_FreeCore(AecmCore* aecm);
+
+int WebRtcAecm_Control(AecmCore* aecm, int delay, int nlpFlag);
+
+////////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_InitEchoPathCore(...)
+//
+// This function resets the echo channel adaptation with the specified channel.
+// Input:
+// - aecm : Pointer to the AECM instance
+// - echo_path : Pointer to the data that should initialize the echo
+// path
+//
+// Output:
+// - aecm : Initialized instance
+//
+void WebRtcAecm_InitEchoPathCore(AecmCore* aecm, const int16_t* echo_path);
+
+////////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_ProcessFrame(...)
+//
+// This function processes frames and sends blocks to
+// WebRtcAecm_ProcessBlock(...)
+//
+// Inputs:
+// - aecm : Pointer to the AECM instance
+// - farend : In buffer containing one frame of echo signal
+// - nearendNoisy : In buffer containing one frame of nearend+echo signal
+// without NS
+// - nearendClean : In buffer containing one frame of nearend+echo signal
+// with NS
+//
+// Output:
+// - out : Out buffer, one frame of nearend signal :
+//
+//
+int WebRtcAecm_ProcessFrame(AecmCore* aecm,
+ const int16_t* farend,
+ const int16_t* nearendNoisy,
+ const int16_t* nearendClean,
+ int16_t* out);
+
+////////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_ProcessBlock(...)
+//
+// This function is called for every block within one frame
+// This function is called by WebRtcAecm_ProcessFrame(...)
+//
+// Inputs:
+// - aecm : Pointer to the AECM instance
+// - farend : In buffer containing one block of echo signal
+// - nearendNoisy : In buffer containing one frame of nearend+echo signal
+// without NS
+// - nearendClean : In buffer containing one frame of nearend+echo signal
+// with NS
+//
+// Output:
+// - out : Out buffer, one block of nearend signal :
+//
+//
+int WebRtcAecm_ProcessBlock(AecmCore* aecm,
+ const int16_t* farend,
+ const int16_t* nearendNoisy,
+ const int16_t* noisyClean,
+ int16_t* out);
+
+////////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_BufferFarFrame()
+//
+// Inserts a frame of data into farend buffer.
+//
+// Inputs:
+// - aecm : Pointer to the AECM instance
+// - farend : In buffer containing one frame of farend signal
+// - farLen : Length of frame
+//
+void WebRtcAecm_BufferFarFrame(AecmCore* const aecm,
+ const int16_t* const farend,
+ const int farLen);
+
+////////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_FetchFarFrame()
+//
+// Read the farend buffer to account for known delay
+//
+// Inputs:
+// - aecm : Pointer to the AECM instance
+// - farend : In buffer containing one frame of farend signal
+// - farLen : Length of frame
+// - knownDelay : known delay
+//
+void WebRtcAecm_FetchFarFrame(AecmCore* const aecm,
+ int16_t* const farend,
+ const int farLen,
+ const int knownDelay);
+
+// All the functions below are intended to be private
+
+////////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_UpdateFarHistory()
+//
+// Moves the pointer to the next entry and inserts |far_spectrum| and
+// corresponding Q-domain in its buffer.
+//
+// Inputs:
+// - self : Pointer to the delay estimation instance
+// - far_spectrum : Pointer to the far end spectrum
+// - far_q : Q-domain of far end spectrum
+//
+void WebRtcAecm_UpdateFarHistory(AecmCore* self,
+ uint16_t* far_spectrum,
+ int far_q);
+
+////////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_AlignedFarend()
+//
+// Returns a pointer to the far end spectrum aligned to current near end
+// spectrum. The function WebRtc_DelayEstimatorProcessFix(...) should have been
+// called before AlignedFarend(...). Otherwise, you get the pointer to the
+// previous frame. The memory is only valid until the next call of
+// WebRtc_DelayEstimatorProcessFix(...).
+//
+// Inputs:
+// - self : Pointer to the AECM instance.
+// - delay : Current delay estimate.
+//
+// Output:
+// - far_q : The Q-domain of the aligned far end spectrum
+//
+// Return value:
+// - far_spectrum : Pointer to the aligned far end spectrum
+// NULL - Error
+//
+const uint16_t* WebRtcAecm_AlignedFarend(AecmCore* self, int* far_q, int delay);
+
+///////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_CalcSuppressionGain()
+//
+// This function calculates the suppression gain that is used in the
+// Wiener filter.
+//
+// Inputs:
+// - aecm : Pointer to the AECM instance.
+//
+// Return value:
+// - supGain : Suppression gain with which to scale the noise
+// level (Q14).
+//
+int16_t WebRtcAecm_CalcSuppressionGain(AecmCore* const aecm);
+
+///////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_CalcEnergies()
+//
+// This function calculates the log of energies for nearend, farend and
+// estimated echoes. There is also an update of energy decision levels,
+// i.e. internal VAD.
+//
+// Inputs:
+// - aecm : Pointer to the AECM instance.
+// - far_spectrum : Pointer to farend spectrum.
+// - far_q : Q-domain of farend spectrum.
+// - nearEner : Near end energy for current block in
+// Q(aecm->dfaQDomain).
+//
+// Output:
+// - echoEst : Estimated echo in Q(xfa_q+RESOLUTION_CHANNEL16).
+//
+void WebRtcAecm_CalcEnergies(AecmCore* aecm,
+ const uint16_t* far_spectrum,
+ const int16_t far_q,
+ const uint32_t nearEner,
+ int32_t* echoEst);
+
+///////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_CalcStepSize()
+//
+// This function calculates the step size used in channel estimation
+//
+// Inputs:
+// - aecm : Pointer to the AECM instance.
+//
+// Return value:
+// - mu : Stepsize in log2(), i.e. number of shifts.
+//
+int16_t WebRtcAecm_CalcStepSize(AecmCore* const aecm);
+
+///////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_UpdateChannel(...)
+//
+// This function performs channel estimation.
+// NLMS and decision on channel storage.
+//
+// Inputs:
+// - aecm : Pointer to the AECM instance.
+// - far_spectrum : Absolute value of the farend signal in Q(far_q)
+// - far_q : Q-domain of the farend signal
+// - dfa : Absolute value of the nearend signal
+// (Q[aecm->dfaQDomain])
+// - mu : NLMS step size.
+// Input/Output:
+// - echoEst : Estimated echo in Q(far_q+RESOLUTION_CHANNEL16).
+//
+void WebRtcAecm_UpdateChannel(AecmCore* aecm,
+ const uint16_t* far_spectrum,
+ const int16_t far_q,
+ const uint16_t* const dfa,
+ const int16_t mu,
+ int32_t* echoEst);
+
+extern const int16_t WebRtcAecm_kCosTable[];
+extern const int16_t WebRtcAecm_kSinTable[];
+
+///////////////////////////////////////////////////////////////////////////////
+// Some function pointers, for internal functions shared by ARM NEON and
+// generic C code.
+//
+typedef void (*CalcLinearEnergies)(AecmCore* aecm,
+ const uint16_t* far_spectrum,
+ int32_t* echoEst,
+ uint32_t* far_energy,
+ uint32_t* echo_energy_adapt,
+ uint32_t* echo_energy_stored);
+extern CalcLinearEnergies WebRtcAecm_CalcLinearEnergies;
+
+typedef void (*StoreAdaptiveChannel)(AecmCore* aecm,
+ const uint16_t* far_spectrum,
+ int32_t* echo_est);
+extern StoreAdaptiveChannel WebRtcAecm_StoreAdaptiveChannel;
+
+typedef void (*ResetAdaptiveChannel)(AecmCore* aecm);
+extern ResetAdaptiveChannel WebRtcAecm_ResetAdaptiveChannel;
+
+// For the above function pointers, functions for generic platforms are declared
+// and defined as static in file aecm_core.c, while those for ARM Neon platforms
+// are declared below and defined in file aecm_core_neon.c.
+#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON)
+void WebRtcAecm_CalcLinearEnergiesNeon(AecmCore* aecm,
+ const uint16_t* far_spectrum,
+ int32_t* echo_est,
+ uint32_t* far_energy,
+ uint32_t* echo_energy_adapt,
+ uint32_t* echo_energy_stored);
+
+void WebRtcAecm_StoreAdaptiveChannelNeon(AecmCore* aecm,
+ const uint16_t* far_spectrum,
+ int32_t* echo_est);
+
+void WebRtcAecm_ResetAdaptiveChannelNeon(AecmCore* aecm);
+#endif
+
+#if defined(MIPS32_LE)
+void WebRtcAecm_CalcLinearEnergies_mips(AecmCore* aecm,
+ const uint16_t* far_spectrum,
+ int32_t* echo_est,
+ uint32_t* far_energy,
+ uint32_t* echo_energy_adapt,
+ uint32_t* echo_energy_stored);
+#if defined(MIPS_DSP_R1_LE)
+void WebRtcAecm_StoreAdaptiveChannel_mips(AecmCore* aecm,
+ const uint16_t* far_spectrum,
+ int32_t* echo_est);
+
+void WebRtcAecm_ResetAdaptiveChannel_mips(AecmCore* aecm);
+#endif
+#endif
+
+#endif
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_c.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_c.c
new file mode 100644
index 00000000..eb2bd918
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_c.c
@@ -0,0 +1,771 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/aecm/aecm_core.h"
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+#include "webrtc/common_audio/ring_buffer.h"
+#include "webrtc/common_audio/signal_processing/include/real_fft.h"
+#include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h"
+#include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h"
+#include "webrtc/system_wrappers/interface/compile_assert_c.h"
+#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h"
+#include "webrtc/typedefs.h"
+
+// Square root of Hanning window in Q14.
+#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON)
+// Table is defined in an ARM assembly file.
+extern const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END;
+#else
+static const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = {
+ 0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172,
+ 3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224,
+ 6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040,
+ 9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514,
+ 11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553,
+ 13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079,
+ 15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034,
+ 16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384
+};
+#endif
+
+#ifdef AECM_WITH_ABS_APPROX
+//Q15 alpha = 0.99439986968132 const Factor for magnitude approximation
+static const uint16_t kAlpha1 = 32584;
+//Q15 beta = 0.12967166976970 const Factor for magnitude approximation
+static const uint16_t kBeta1 = 4249;
+//Q15 alpha = 0.94234827210087 const Factor for magnitude approximation
+static const uint16_t kAlpha2 = 30879;
+//Q15 beta = 0.33787806009150 const Factor for magnitude approximation
+static const uint16_t kBeta2 = 11072;
+//Q15 alpha = 0.82247698684306 const Factor for magnitude approximation
+static const uint16_t kAlpha3 = 26951;
+//Q15 beta = 0.57762063060713 const Factor for magnitude approximation
+static const uint16_t kBeta3 = 18927;
+#endif
+
+static const int16_t kNoiseEstQDomain = 15;
+static const int16_t kNoiseEstIncCount = 5;
+
+static void ComfortNoise(AecmCore* aecm,
+ const uint16_t* dfa,
+ ComplexInt16* out,
+ const int16_t* lambda);
+
+static void WindowAndFFT(AecmCore* aecm,
+ int16_t* fft,
+ const int16_t* time_signal,
+ ComplexInt16* freq_signal,
+ int time_signal_scaling) {
+ int i = 0;
+
+ // FFT of signal
+ for (i = 0; i < PART_LEN; i++) {
+ // Window time domain signal and insert into real part of
+ // transformation array |fft|
+ int16_t scaled_time_signal = time_signal[i] << time_signal_scaling;
+ fft[i] = (int16_t)((scaled_time_signal * WebRtcAecm_kSqrtHanning[i]) >> 14);
+ scaled_time_signal = time_signal[i + PART_LEN] << time_signal_scaling;
+ fft[PART_LEN + i] = (int16_t)((
+ scaled_time_signal * WebRtcAecm_kSqrtHanning[PART_LEN - i]) >> 14);
+ }
+
+ // Do forward FFT, then take only the first PART_LEN complex samples,
+ // and change signs of the imaginary parts.
+ WebRtcSpl_RealForwardFFT(aecm->real_fft, fft, (int16_t*)freq_signal);
+ for (i = 0; i < PART_LEN; i++) {
+ freq_signal[i].imag = -freq_signal[i].imag;
+ }
+}
+
+static void InverseFFTAndWindow(AecmCore* aecm,
+ int16_t* fft,
+ ComplexInt16* efw,
+ int16_t* output,
+ const int16_t* nearendClean) {
+ int i, j, outCFFT;
+ int32_t tmp32no1;
+ // Reuse |efw| for the inverse FFT output after transferring
+ // the contents to |fft|.
+ int16_t* ifft_out = (int16_t*)efw;
+
+ // Synthesis
+ for (i = 1, j = 2; i < PART_LEN; i += 1, j += 2) {
+ fft[j] = efw[i].real;
+ fft[j + 1] = -efw[i].imag;
+ }
+ fft[0] = efw[0].real;
+ fft[1] = -efw[0].imag;
+
+ fft[PART_LEN2] = efw[PART_LEN].real;
+ fft[PART_LEN2 + 1] = -efw[PART_LEN].imag;
+
+ // Inverse FFT. Keep outCFFT to scale the samples in the next block.
+ outCFFT = WebRtcSpl_RealInverseFFT(aecm->real_fft, fft, ifft_out);
+ for (i = 0; i < PART_LEN; i++) {
+ ifft_out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
+ ifft_out[i], WebRtcAecm_kSqrtHanning[i], 14);
+ tmp32no1 = WEBRTC_SPL_SHIFT_W32((int32_t)ifft_out[i],
+ outCFFT - aecm->dfaCleanQDomain);
+ output[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
+ tmp32no1 + aecm->outBuf[i],
+ WEBRTC_SPL_WORD16_MIN);
+
+ tmp32no1 = (ifft_out[PART_LEN + i] *
+ WebRtcAecm_kSqrtHanning[PART_LEN - i]) >> 14;
+ tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1,
+ outCFFT - aecm->dfaCleanQDomain);
+ aecm->outBuf[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
+ tmp32no1,
+ WEBRTC_SPL_WORD16_MIN);
+ }
+
+ // Copy the current block to the old position
+ // (aecm->outBuf is shifted elsewhere)
+ memcpy(aecm->xBuf, aecm->xBuf + PART_LEN, sizeof(int16_t) * PART_LEN);
+ memcpy(aecm->dBufNoisy,
+ aecm->dBufNoisy + PART_LEN,
+ sizeof(int16_t) * PART_LEN);
+ if (nearendClean != NULL)
+ {
+ memcpy(aecm->dBufClean,
+ aecm->dBufClean + PART_LEN,
+ sizeof(int16_t) * PART_LEN);
+ }
+}
+
+// Transforms a time domain signal into the frequency domain, outputting the
+// complex valued signal, absolute value and sum of absolute values.
+//
+// time_signal [in] Pointer to time domain signal
+// freq_signal_real [out] Pointer to real part of frequency domain array
+// freq_signal_imag [out] Pointer to imaginary part of frequency domain
+// array
+// freq_signal_abs [out] Pointer to absolute value of frequency domain
+// array
+// freq_signal_sum_abs [out] Pointer to the sum of all absolute values in
+// the frequency domain array
+// return value The Q-domain of current frequency values
+//
+static int TimeToFrequencyDomain(AecmCore* aecm,
+ const int16_t* time_signal,
+ ComplexInt16* freq_signal,
+ uint16_t* freq_signal_abs,
+ uint32_t* freq_signal_sum_abs) {
+ int i = 0;
+ int time_signal_scaling = 0;
+
+ int32_t tmp32no1 = 0;
+ int32_t tmp32no2 = 0;
+
+ // In fft_buf, +16 for 32-byte alignment.
+ int16_t fft_buf[PART_LEN4 + 16];
+ int16_t *fft = (int16_t *) (((uintptr_t) fft_buf + 31) & ~31);
+
+ int16_t tmp16no1;
+#ifndef WEBRTC_ARCH_ARM_V7
+ int16_t tmp16no2;
+#endif
+#ifdef AECM_WITH_ABS_APPROX
+ int16_t max_value = 0;
+ int16_t min_value = 0;
+ uint16_t alpha = 0;
+ uint16_t beta = 0;
+#endif
+
+#ifdef AECM_DYNAMIC_Q
+ tmp16no1 = WebRtcSpl_MaxAbsValueW16(time_signal, PART_LEN2);
+ time_signal_scaling = WebRtcSpl_NormW16(tmp16no1);
+#endif
+
+ WindowAndFFT(aecm, fft, time_signal, freq_signal, time_signal_scaling);
+
+ // Extract imaginary and real part, calculate the magnitude for
+ // all frequency bins
+ freq_signal[0].imag = 0;
+ freq_signal[PART_LEN].imag = 0;
+ freq_signal_abs[0] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[0].real);
+ freq_signal_abs[PART_LEN] = (uint16_t)WEBRTC_SPL_ABS_W16(
+ freq_signal[PART_LEN].real);
+ (*freq_signal_sum_abs) = (uint32_t)(freq_signal_abs[0]) +
+ (uint32_t)(freq_signal_abs[PART_LEN]);
+
+ for (i = 1; i < PART_LEN; i++)
+ {
+ if (freq_signal[i].real == 0)
+ {
+ freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
+ }
+ else if (freq_signal[i].imag == 0)
+ {
+ freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[i].real);
+ }
+ else
+ {
+ // Approximation for magnitude of complex fft output
+ // magn = sqrt(real^2 + imag^2)
+ // magn ~= alpha * max(|imag|,|real|) + beta * min(|imag|,|real|)
+ //
+ // The parameters alpha and beta are stored in Q15
+
+#ifdef AECM_WITH_ABS_APPROX
+ tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real);
+ tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
+
+ if(tmp16no1 > tmp16no2)
+ {
+ max_value = tmp16no1;
+ min_value = tmp16no2;
+ } else
+ {
+ max_value = tmp16no2;
+ min_value = tmp16no1;
+ }
+
+ // Magnitude in Q(-6)
+ if ((max_value >> 2) > min_value)
+ {
+ alpha = kAlpha1;
+ beta = kBeta1;
+ } else if ((max_value >> 1) > min_value)
+ {
+ alpha = kAlpha2;
+ beta = kBeta2;
+ } else
+ {
+ alpha = kAlpha3;
+ beta = kBeta3;
+ }
+ tmp16no1 = (int16_t)((max_value * alpha) >> 15);
+ tmp16no2 = (int16_t)((min_value * beta) >> 15);
+ freq_signal_abs[i] = (uint16_t)tmp16no1 + (uint16_t)tmp16no2;
+#else
+#ifdef WEBRTC_ARCH_ARM_V7
+ __asm __volatile(
+ "smulbb %[tmp32no1], %[real], %[real]\n\t"
+ "smlabb %[tmp32no2], %[imag], %[imag], %[tmp32no1]\n\t"
+ :[tmp32no1]"+&r"(tmp32no1),
+ [tmp32no2]"=r"(tmp32no2)
+ :[real]"r"(freq_signal[i].real),
+ [imag]"r"(freq_signal[i].imag)
+ );
+#else
+ tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real);
+ tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
+ tmp32no1 = tmp16no1 * tmp16no1;
+ tmp32no2 = tmp16no2 * tmp16no2;
+ tmp32no2 = WebRtcSpl_AddSatW32(tmp32no1, tmp32no2);
+#endif // WEBRTC_ARCH_ARM_V7
+ tmp32no1 = WebRtcSpl_SqrtFloor(tmp32no2);
+
+ freq_signal_abs[i] = (uint16_t)tmp32no1;
+#endif // AECM_WITH_ABS_APPROX
+ }
+ (*freq_signal_sum_abs) += (uint32_t)freq_signal_abs[i];
+ }
+
+ return time_signal_scaling;
+}
+
+int WebRtcAecm_ProcessBlock(AecmCore* aecm,
+ const int16_t* farend,
+ const int16_t* nearendNoisy,
+ const int16_t* nearendClean,
+ int16_t* output) {
+ int i;
+
+ uint32_t xfaSum;
+ uint32_t dfaNoisySum;
+ uint32_t dfaCleanSum;
+ uint32_t echoEst32Gained;
+ uint32_t tmpU32;
+
+ int32_t tmp32no1;
+
+ uint16_t xfa[PART_LEN1];
+ uint16_t dfaNoisy[PART_LEN1];
+ uint16_t dfaClean[PART_LEN1];
+ uint16_t* ptrDfaClean = dfaClean;
+ const uint16_t* far_spectrum_ptr = NULL;
+
+ // 32 byte aligned buffers (with +8 or +16).
+ // TODO(kma): define fft with ComplexInt16.
+ int16_t fft_buf[PART_LEN4 + 2 + 16]; // +2 to make a loop safe.
+ int32_t echoEst32_buf[PART_LEN1 + 8];
+ int32_t dfw_buf[PART_LEN2 + 8];
+ int32_t efw_buf[PART_LEN2 + 8];
+
+ int16_t* fft = (int16_t*) (((uintptr_t) fft_buf + 31) & ~ 31);
+ int32_t* echoEst32 = (int32_t*) (((uintptr_t) echoEst32_buf + 31) & ~ 31);
+ ComplexInt16* dfw = (ComplexInt16*)(((uintptr_t)dfw_buf + 31) & ~31);
+ ComplexInt16* efw = (ComplexInt16*)(((uintptr_t)efw_buf + 31) & ~31);
+
+ int16_t hnl[PART_LEN1];
+ int16_t numPosCoef = 0;
+ int16_t nlpGain = ONE_Q14;
+ int delay;
+ int16_t tmp16no1;
+ int16_t tmp16no2;
+ int16_t mu;
+ int16_t supGain;
+ int16_t zeros32, zeros16;
+ int16_t zerosDBufNoisy, zerosDBufClean, zerosXBuf;
+ int far_q;
+ int16_t resolutionDiff, qDomainDiff, dfa_clean_q_domain_diff;
+
+ const int kMinPrefBand = 4;
+ const int kMaxPrefBand = 24;
+ int32_t avgHnl32 = 0;
+
+ // Determine startup state. There are three states:
+ // (0) the first CONV_LEN blocks
+ // (1) another CONV_LEN blocks
+ // (2) the rest
+
+ if (aecm->startupState < 2)
+ {
+ aecm->startupState = (aecm->totCount >= CONV_LEN) +
+ (aecm->totCount >= CONV_LEN2);
+ }
+ // END: Determine startup state
+
+ // Buffer near and far end signals
+ memcpy(aecm->xBuf + PART_LEN, farend, sizeof(int16_t) * PART_LEN);
+ memcpy(aecm->dBufNoisy + PART_LEN, nearendNoisy, sizeof(int16_t) * PART_LEN);
+ if (nearendClean != NULL)
+ {
+ memcpy(aecm->dBufClean + PART_LEN,
+ nearendClean,
+ sizeof(int16_t) * PART_LEN);
+ }
+
+ // Transform far end signal from time domain to frequency domain.
+ far_q = TimeToFrequencyDomain(aecm,
+ aecm->xBuf,
+ dfw,
+ xfa,
+ &xfaSum);
+
+ // Transform noisy near end signal from time domain to frequency domain.
+ zerosDBufNoisy = TimeToFrequencyDomain(aecm,
+ aecm->dBufNoisy,
+ dfw,
+ dfaNoisy,
+ &dfaNoisySum);
+ aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain;
+ aecm->dfaNoisyQDomain = (int16_t)zerosDBufNoisy;
+
+
+ if (nearendClean == NULL)
+ {
+ ptrDfaClean = dfaNoisy;
+ aecm->dfaCleanQDomainOld = aecm->dfaNoisyQDomainOld;
+ aecm->dfaCleanQDomain = aecm->dfaNoisyQDomain;
+ dfaCleanSum = dfaNoisySum;
+ } else
+ {
+ // Transform clean near end signal from time domain to frequency domain.
+ zerosDBufClean = TimeToFrequencyDomain(aecm,
+ aecm->dBufClean,
+ dfw,
+ dfaClean,
+ &dfaCleanSum);
+ aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain;
+ aecm->dfaCleanQDomain = (int16_t)zerosDBufClean;
+ }
+
+ // Get the delay
+ // Save far-end history and estimate delay
+ WebRtcAecm_UpdateFarHistory(aecm, xfa, far_q);
+ if (WebRtc_AddFarSpectrumFix(aecm->delay_estimator_farend,
+ xfa,
+ PART_LEN1,
+ far_q) == -1) {
+ return -1;
+ }
+ delay = WebRtc_DelayEstimatorProcessFix(aecm->delay_estimator,
+ dfaNoisy,
+ PART_LEN1,
+ zerosDBufNoisy);
+ if (delay == -1)
+ {
+ return -1;
+ }
+ else if (delay == -2)
+ {
+ // If the delay is unknown, we assume zero.
+ // NOTE: this will have to be adjusted if we ever add lookahead.
+ delay = 0;
+ }
+
+ if (aecm->fixedDelay >= 0)
+ {
+ // Use fixed delay
+ delay = aecm->fixedDelay;
+ }
+
+ // Get aligned far end spectrum
+ far_spectrum_ptr = WebRtcAecm_AlignedFarend(aecm, &far_q, delay);
+ zerosXBuf = (int16_t) far_q;
+ if (far_spectrum_ptr == NULL)
+ {
+ return -1;
+ }
+
+ // Calculate log(energy) and update energy threshold levels
+ WebRtcAecm_CalcEnergies(aecm,
+ far_spectrum_ptr,
+ zerosXBuf,
+ dfaNoisySum,
+ echoEst32);
+
+ // Calculate stepsize
+ mu = WebRtcAecm_CalcStepSize(aecm);
+
+ // Update counters
+ aecm->totCount++;
+
+ // This is the channel estimation algorithm.
+ // It is base on NLMS but has a variable step length,
+ // which was calculated above.
+ WebRtcAecm_UpdateChannel(aecm,
+ far_spectrum_ptr,
+ zerosXBuf,
+ dfaNoisy,
+ mu,
+ echoEst32);
+ supGain = WebRtcAecm_CalcSuppressionGain(aecm);
+
+
+ // Calculate Wiener filter hnl[]
+ for (i = 0; i < PART_LEN1; i++)
+ {
+ // Far end signal through channel estimate in Q8
+ // How much can we shift right to preserve resolution
+ tmp32no1 = echoEst32[i] - aecm->echoFilt[i];
+ aecm->echoFilt[i] += (tmp32no1 * 50) >> 8;
+
+ zeros32 = WebRtcSpl_NormW32(aecm->echoFilt[i]) + 1;
+ zeros16 = WebRtcSpl_NormW16(supGain) + 1;
+ if (zeros32 + zeros16 > 16)
+ {
+ // Multiplication is safe
+ // Result in
+ // Q(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN+
+ // aecm->xfaQDomainBuf[diff])
+ echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i],
+ (uint16_t)supGain);
+ resolutionDiff = 14 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN;
+ resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf);
+ } else
+ {
+ tmp16no1 = 17 - zeros32 - zeros16;
+ resolutionDiff = 14 + tmp16no1 - RESOLUTION_CHANNEL16 -
+ RESOLUTION_SUPGAIN;
+ resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf);
+ if (zeros32 > tmp16no1)
+ {
+ echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i],
+ supGain >> tmp16no1);
+ } else
+ {
+ // Result in Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16)
+ echoEst32Gained = (aecm->echoFilt[i] >> tmp16no1) * supGain;
+ }
+ }
+
+ zeros16 = WebRtcSpl_NormW16(aecm->nearFilt[i]);
+ assert(zeros16 >= 0); // |zeros16| is a norm, hence non-negative.
+ dfa_clean_q_domain_diff = aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld;
+ if (zeros16 < dfa_clean_q_domain_diff && aecm->nearFilt[i]) {
+ tmp16no1 = aecm->nearFilt[i] << zeros16;
+ qDomainDiff = zeros16 - dfa_clean_q_domain_diff;
+ tmp16no2 = ptrDfaClean[i] >> -qDomainDiff;
+ } else {
+ tmp16no1 = dfa_clean_q_domain_diff < 0
+ ? aecm->nearFilt[i] >> -dfa_clean_q_domain_diff
+ : aecm->nearFilt[i] << dfa_clean_q_domain_diff;
+ qDomainDiff = 0;
+ tmp16no2 = ptrDfaClean[i];
+ }
+ tmp32no1 = (int32_t)(tmp16no2 - tmp16no1);
+ tmp16no2 = (int16_t)(tmp32no1 >> 4);
+ tmp16no2 += tmp16no1;
+ zeros16 = WebRtcSpl_NormW16(tmp16no2);
+ if ((tmp16no2) & (-qDomainDiff > zeros16)) {
+ aecm->nearFilt[i] = WEBRTC_SPL_WORD16_MAX;
+ } else {
+ aecm->nearFilt[i] = qDomainDiff < 0 ? tmp16no2 << -qDomainDiff
+ : tmp16no2 >> qDomainDiff;
+ }
+
+ // Wiener filter coefficients, resulting hnl in Q14
+ if (echoEst32Gained == 0)
+ {
+ hnl[i] = ONE_Q14;
+ } else if (aecm->nearFilt[i] == 0)
+ {
+ hnl[i] = 0;
+ } else
+ {
+ // Multiply the suppression gain
+ // Rounding
+ echoEst32Gained += (uint32_t)(aecm->nearFilt[i] >> 1);
+ tmpU32 = WebRtcSpl_DivU32U16(echoEst32Gained,
+ (uint16_t)aecm->nearFilt[i]);
+
+ // Current resolution is
+ // Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN- max(0,17-zeros16- zeros32))
+ // Make sure we are in Q14
+ tmp32no1 = (int32_t)WEBRTC_SPL_SHIFT_W32(tmpU32, resolutionDiff);
+ if (tmp32no1 > ONE_Q14)
+ {
+ hnl[i] = 0;
+ } else if (tmp32no1 < 0)
+ {
+ hnl[i] = ONE_Q14;
+ } else
+ {
+ // 1-echoEst/dfa
+ hnl[i] = ONE_Q14 - (int16_t)tmp32no1;
+ if (hnl[i] < 0)
+ {
+ hnl[i] = 0;
+ }
+ }
+ }
+ if (hnl[i])
+ {
+ numPosCoef++;
+ }
+ }
+ // Only in wideband. Prevent the gain in upper band from being larger than
+ // in lower band.
+ if (aecm->mult == 2)
+ {
+ // TODO(bjornv): Investigate if the scaling of hnl[i] below can cause
+ // speech distortion in double-talk.
+ for (i = 0; i < PART_LEN1; i++)
+ {
+ hnl[i] = (int16_t)((hnl[i] * hnl[i]) >> 14);
+ }
+
+ for (i = kMinPrefBand; i <= kMaxPrefBand; i++)
+ {
+ avgHnl32 += (int32_t)hnl[i];
+ }
+ assert(kMaxPrefBand - kMinPrefBand + 1 > 0);
+ avgHnl32 /= (kMaxPrefBand - kMinPrefBand + 1);
+
+ for (i = kMaxPrefBand; i < PART_LEN1; i++)
+ {
+ if (hnl[i] > (int16_t)avgHnl32)
+ {
+ hnl[i] = (int16_t)avgHnl32;
+ }
+ }
+ }
+
+ // Calculate NLP gain, result is in Q14
+ if (aecm->nlpFlag)
+ {
+ for (i = 0; i < PART_LEN1; i++)
+ {
+ // Truncate values close to zero and one.
+ if (hnl[i] > NLP_COMP_HIGH)
+ {
+ hnl[i] = ONE_Q14;
+ } else if (hnl[i] < NLP_COMP_LOW)
+ {
+ hnl[i] = 0;
+ }
+
+ // Remove outliers
+ if (numPosCoef < 3)
+ {
+ nlpGain = 0;
+ } else
+ {
+ nlpGain = ONE_Q14;
+ }
+
+ // NLP
+ if ((hnl[i] == ONE_Q14) && (nlpGain == ONE_Q14))
+ {
+ hnl[i] = ONE_Q14;
+ } else
+ {
+ hnl[i] = (int16_t)((hnl[i] * nlpGain) >> 14);
+ }
+
+ // multiply with Wiener coefficients
+ efw[i].real = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real,
+ hnl[i], 14));
+ efw[i].imag = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag,
+ hnl[i], 14));
+ }
+ }
+ else
+ {
+ // multiply with Wiener coefficients
+ for (i = 0; i < PART_LEN1; i++)
+ {
+ efw[i].real = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real,
+ hnl[i], 14));
+ efw[i].imag = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag,
+ hnl[i], 14));
+ }
+ }
+
+ if (aecm->cngMode == AecmTrue)
+ {
+ ComfortNoise(aecm, ptrDfaClean, efw, hnl);
+ }
+
+ InverseFFTAndWindow(aecm, fft, efw, output, nearendClean);
+
+ return 0;
+}
+
+static void ComfortNoise(AecmCore* aecm,
+ const uint16_t* dfa,
+ ComplexInt16* out,
+ const int16_t* lambda) {
+ int16_t i;
+ int16_t tmp16;
+ int32_t tmp32;
+
+ int16_t randW16[PART_LEN];
+ int16_t uReal[PART_LEN1];
+ int16_t uImag[PART_LEN1];
+ int32_t outLShift32;
+ int16_t noiseRShift16[PART_LEN1];
+
+ int16_t shiftFromNearToNoise = kNoiseEstQDomain - aecm->dfaCleanQDomain;
+ int16_t minTrackShift;
+
+ assert(shiftFromNearToNoise >= 0);
+ assert(shiftFromNearToNoise < 16);
+
+ if (aecm->noiseEstCtr < 100)
+ {
+ // Track the minimum more quickly initially.
+ aecm->noiseEstCtr++;
+ minTrackShift = 6;
+ } else
+ {
+ minTrackShift = 9;
+ }
+
+ // Estimate noise power.
+ for (i = 0; i < PART_LEN1; i++)
+ {
+ // Shift to the noise domain.
+ tmp32 = (int32_t)dfa[i];
+ outLShift32 = tmp32 << shiftFromNearToNoise;
+
+ if (outLShift32 < aecm->noiseEst[i])
+ {
+ // Reset "too low" counter
+ aecm->noiseEstTooLowCtr[i] = 0;
+ // Track the minimum.
+ if (aecm->noiseEst[i] < (1 << minTrackShift))
+ {
+ // For small values, decrease noiseEst[i] every
+ // |kNoiseEstIncCount| block. The regular approach below can not
+ // go further down due to truncation.
+ aecm->noiseEstTooHighCtr[i]++;
+ if (aecm->noiseEstTooHighCtr[i] >= kNoiseEstIncCount)
+ {
+ aecm->noiseEst[i]--;
+ aecm->noiseEstTooHighCtr[i] = 0; // Reset the counter
+ }
+ }
+ else
+ {
+ aecm->noiseEst[i] -= ((aecm->noiseEst[i] - outLShift32)
+ >> minTrackShift);
+ }
+ } else
+ {
+ // Reset "too high" counter
+ aecm->noiseEstTooHighCtr[i] = 0;
+ // Ramp slowly upwards until we hit the minimum again.
+ if ((aecm->noiseEst[i] >> 19) > 0)
+ {
+ // Avoid overflow.
+ // Multiplication with 2049 will cause wrap around. Scale
+ // down first and then multiply
+ aecm->noiseEst[i] >>= 11;
+ aecm->noiseEst[i] *= 2049;
+ }
+ else if ((aecm->noiseEst[i] >> 11) > 0)
+ {
+ // Large enough for relative increase
+ aecm->noiseEst[i] *= 2049;
+ aecm->noiseEst[i] >>= 11;
+ }
+ else
+ {
+ // Make incremental increases based on size every
+ // |kNoiseEstIncCount| block
+ aecm->noiseEstTooLowCtr[i]++;
+ if (aecm->noiseEstTooLowCtr[i] >= kNoiseEstIncCount)
+ {
+ aecm->noiseEst[i] += (aecm->noiseEst[i] >> 9) + 1;
+ aecm->noiseEstTooLowCtr[i] = 0; // Reset counter
+ }
+ }
+ }
+ }
+
+ for (i = 0; i < PART_LEN1; i++)
+ {
+ tmp32 = aecm->noiseEst[i] >> shiftFromNearToNoise;
+ if (tmp32 > 32767)
+ {
+ tmp32 = 32767;
+ aecm->noiseEst[i] = tmp32 << shiftFromNearToNoise;
+ }
+ noiseRShift16[i] = (int16_t)tmp32;
+
+ tmp16 = ONE_Q14 - lambda[i];
+ noiseRShift16[i] = (int16_t)((tmp16 * noiseRShift16[i]) >> 14);
+ }
+
+ // Generate a uniform random array on [0 2^15-1].
+ WebRtcSpl_RandUArray(randW16, PART_LEN, &aecm->seed);
+
+ // Generate noise according to estimated energy.
+ uReal[0] = 0; // Reject LF noise.
+ uImag[0] = 0;
+ for (i = 1; i < PART_LEN1; i++)
+ {
+ // Get a random index for the cos and sin tables over [0 359].
+ tmp16 = (int16_t)((359 * randW16[i - 1]) >> 15);
+
+ // Tables are in Q13.
+ uReal[i] = (int16_t)((noiseRShift16[i] * WebRtcAecm_kCosTable[tmp16]) >>
+ 13);
+ uImag[i] = (int16_t)((-noiseRShift16[i] * WebRtcAecm_kSinTable[tmp16]) >>
+ 13);
+ }
+ uImag[PART_LEN] = 0;
+
+ for (i = 0; i < PART_LEN1; i++)
+ {
+ out[i].real = WebRtcSpl_AddSatW16(out[i].real, uReal[i]);
+ out[i].imag = WebRtcSpl_AddSatW16(out[i].imag, uImag[i]);
+ }
+}
+
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_mips.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_mips.c
new file mode 100644
index 00000000..3c2343a8
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_mips.c
@@ -0,0 +1,1566 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/aecm/aecm_core.h"
+
+#include <assert.h>
+
+#include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h"
+#include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h"
+
+static const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = {
+ 0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172,
+ 3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224,
+ 6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040,
+ 9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514,
+ 11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553,
+ 13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079,
+ 15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034,
+ 16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384
+};
+
+static const int16_t kNoiseEstQDomain = 15;
+static const int16_t kNoiseEstIncCount = 5;
+
+static int16_t coefTable[] = {
+ 0, 4, 256, 260, 128, 132, 384, 388,
+ 64, 68, 320, 324, 192, 196, 448, 452,
+ 32, 36, 288, 292, 160, 164, 416, 420,
+ 96, 100, 352, 356, 224, 228, 480, 484,
+ 16, 20, 272, 276, 144, 148, 400, 404,
+ 80, 84, 336, 340, 208, 212, 464, 468,
+ 48, 52, 304, 308, 176, 180, 432, 436,
+ 112, 116, 368, 372, 240, 244, 496, 500,
+ 8, 12, 264, 268, 136, 140, 392, 396,
+ 72, 76, 328, 332, 200, 204, 456, 460,
+ 40, 44, 296, 300, 168, 172, 424, 428,
+ 104, 108, 360, 364, 232, 236, 488, 492,
+ 24, 28, 280, 284, 152, 156, 408, 412,
+ 88, 92, 344, 348, 216, 220, 472, 476,
+ 56, 60, 312, 316, 184, 188, 440, 444,
+ 120, 124, 376, 380, 248, 252, 504, 508
+};
+
+static int16_t coefTable_ifft[] = {
+ 0, 512, 256, 508, 128, 252, 384, 380,
+ 64, 124, 320, 444, 192, 188, 448, 316,
+ 32, 60, 288, 476, 160, 220, 416, 348,
+ 96, 92, 352, 412, 224, 156, 480, 284,
+ 16, 28, 272, 492, 144, 236, 400, 364,
+ 80, 108, 336, 428, 208, 172, 464, 300,
+ 48, 44, 304, 460, 176, 204, 432, 332,
+ 112, 76, 368, 396, 240, 140, 496, 268,
+ 8, 12, 264, 500, 136, 244, 392, 372,
+ 72, 116, 328, 436, 200, 180, 456, 308,
+ 40, 52, 296, 468, 168, 212, 424, 340,
+ 104, 84, 360, 404, 232, 148, 488, 276,
+ 24, 20, 280, 484, 152, 228, 408, 356,
+ 88, 100, 344, 420, 216, 164, 472, 292,
+ 56, 36, 312, 452, 184, 196, 440, 324,
+ 120, 68, 376, 388, 248, 132, 504, 260
+};
+
+static void ComfortNoise(AecmCore* aecm,
+ const uint16_t* dfa,
+ ComplexInt16* out,
+ const int16_t* lambda);
+
+static void WindowAndFFT(AecmCore* aecm,
+ int16_t* fft,
+ const int16_t* time_signal,
+ ComplexInt16* freq_signal,
+ int time_signal_scaling) {
+ int i, j;
+ int32_t tmp1, tmp2, tmp3, tmp4;
+ int16_t* pfrfi;
+ ComplexInt16* pfreq_signal;
+ int16_t f_coef, s_coef;
+ int32_t load_ptr, store_ptr1, store_ptr2, shift, shift1;
+ int32_t hann, hann1, coefs;
+
+ memset(fft, 0, sizeof(int16_t) * PART_LEN4);
+
+ // FFT of signal
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "addiu %[shift], %[time_signal_scaling], -14 \n\t"
+ "addiu %[i], $zero, 64 \n\t"
+ "addiu %[load_ptr], %[time_signal], 0 \n\t"
+ "addiu %[hann], %[hanning], 0 \n\t"
+ "addiu %[hann1], %[hanning], 128 \n\t"
+ "addiu %[coefs], %[coefTable], 0 \n\t"
+ "bltz %[shift], 2f \n\t"
+ " negu %[shift1], %[shift] \n\t"
+ "1: \n\t"
+ "lh %[tmp1], 0(%[load_ptr]) \n\t"
+ "lh %[tmp2], 0(%[hann]) \n\t"
+ "lh %[tmp3], 128(%[load_ptr]) \n\t"
+ "lh %[tmp4], 0(%[hann1]) \n\t"
+ "addiu %[i], %[i], -1 \n\t"
+ "mul %[tmp1], %[tmp1], %[tmp2] \n\t"
+ "mul %[tmp3], %[tmp3], %[tmp4] \n\t"
+ "lh %[f_coef], 0(%[coefs]) \n\t"
+ "lh %[s_coef], 2(%[coefs]) \n\t"
+ "addiu %[load_ptr], %[load_ptr], 2 \n\t"
+ "addiu %[hann], %[hann], 2 \n\t"
+ "addiu %[hann1], %[hann1], -2 \n\t"
+ "addu %[store_ptr1], %[fft], %[f_coef] \n\t"
+ "addu %[store_ptr2], %[fft], %[s_coef] \n\t"
+ "sllv %[tmp1], %[tmp1], %[shift] \n\t"
+ "sllv %[tmp3], %[tmp3], %[shift] \n\t"
+ "sh %[tmp1], 0(%[store_ptr1]) \n\t"
+ "sh %[tmp3], 0(%[store_ptr2]) \n\t"
+ "bgtz %[i], 1b \n\t"
+ " addiu %[coefs], %[coefs], 4 \n\t"
+ "b 3f \n\t"
+ " nop \n\t"
+ "2: \n\t"
+ "lh %[tmp1], 0(%[load_ptr]) \n\t"
+ "lh %[tmp2], 0(%[hann]) \n\t"
+ "lh %[tmp3], 128(%[load_ptr]) \n\t"
+ "lh %[tmp4], 0(%[hann1]) \n\t"
+ "addiu %[i], %[i], -1 \n\t"
+ "mul %[tmp1], %[tmp1], %[tmp2] \n\t"
+ "mul %[tmp3], %[tmp3], %[tmp4] \n\t"
+ "lh %[f_coef], 0(%[coefs]) \n\t"
+ "lh %[s_coef], 2(%[coefs]) \n\t"
+ "addiu %[load_ptr], %[load_ptr], 2 \n\t"
+ "addiu %[hann], %[hann], 2 \n\t"
+ "addiu %[hann1], %[hann1], -2 \n\t"
+ "addu %[store_ptr1], %[fft], %[f_coef] \n\t"
+ "addu %[store_ptr2], %[fft], %[s_coef] \n\t"
+ "srav %[tmp1], %[tmp1], %[shift1] \n\t"
+ "srav %[tmp3], %[tmp3], %[shift1] \n\t"
+ "sh %[tmp1], 0(%[store_ptr1]) \n\t"
+ "sh %[tmp3], 0(%[store_ptr2]) \n\t"
+ "bgtz %[i], 2b \n\t"
+ " addiu %[coefs], %[coefs], 4 \n\t"
+ "3: \n\t"
+ ".set pop \n\t"
+ : [load_ptr] "=&r" (load_ptr), [shift] "=&r" (shift), [hann] "=&r" (hann),
+ [hann1] "=&r" (hann1), [shift1] "=&r" (shift1), [coefs] "=&r" (coefs),
+ [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
+ [tmp4] "=&r" (tmp4), [i] "=&r" (i), [f_coef] "=&r" (f_coef),
+ [s_coef] "=&r" (s_coef), [store_ptr1] "=&r" (store_ptr1),
+ [store_ptr2] "=&r" (store_ptr2)
+ : [time_signal] "r" (time_signal), [coefTable] "r" (coefTable),
+ [time_signal_scaling] "r" (time_signal_scaling),
+ [hanning] "r" (WebRtcAecm_kSqrtHanning), [fft] "r" (fft)
+ : "memory", "hi", "lo"
+ );
+
+ WebRtcSpl_ComplexFFT(fft, PART_LEN_SHIFT, 1);
+ pfrfi = fft;
+ pfreq_signal = freq_signal;
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "addiu %[j], $zero, 128 \n\t"
+ "1: \n\t"
+ "lh %[tmp1], 0(%[pfrfi]) \n\t"
+ "lh %[tmp2], 2(%[pfrfi]) \n\t"
+ "lh %[tmp3], 4(%[pfrfi]) \n\t"
+ "lh %[tmp4], 6(%[pfrfi]) \n\t"
+ "subu %[tmp2], $zero, %[tmp2] \n\t"
+ "sh %[tmp1], 0(%[pfreq_signal]) \n\t"
+ "sh %[tmp2], 2(%[pfreq_signal]) \n\t"
+ "subu %[tmp4], $zero, %[tmp4] \n\t"
+ "sh %[tmp3], 4(%[pfreq_signal]) \n\t"
+ "sh %[tmp4], 6(%[pfreq_signal]) \n\t"
+ "lh %[tmp1], 8(%[pfrfi]) \n\t"
+ "lh %[tmp2], 10(%[pfrfi]) \n\t"
+ "lh %[tmp3], 12(%[pfrfi]) \n\t"
+ "lh %[tmp4], 14(%[pfrfi]) \n\t"
+ "addiu %[j], %[j], -8 \n\t"
+ "subu %[tmp2], $zero, %[tmp2] \n\t"
+ "sh %[tmp1], 8(%[pfreq_signal]) \n\t"
+ "sh %[tmp2], 10(%[pfreq_signal]) \n\t"
+ "subu %[tmp4], $zero, %[tmp4] \n\t"
+ "sh %[tmp3], 12(%[pfreq_signal]) \n\t"
+ "sh %[tmp4], 14(%[pfreq_signal]) \n\t"
+ "addiu %[pfreq_signal], %[pfreq_signal], 16 \n\t"
+ "bgtz %[j], 1b \n\t"
+ " addiu %[pfrfi], %[pfrfi], 16 \n\t"
+ ".set pop \n\t"
+ : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
+ [j] "=&r" (j), [pfrfi] "+r" (pfrfi), [pfreq_signal] "+r" (pfreq_signal),
+ [tmp4] "=&r" (tmp4)
+ :
+ : "memory"
+ );
+}
+
+static void InverseFFTAndWindow(AecmCore* aecm,
+ int16_t* fft,
+ ComplexInt16* efw,
+ int16_t* output,
+ const int16_t* nearendClean) {
+ int i, outCFFT;
+ int32_t tmp1, tmp2, tmp3, tmp4, tmp_re, tmp_im;
+ int16_t* pcoefTable_ifft = coefTable_ifft;
+ int16_t* pfft = fft;
+ int16_t* ppfft = fft;
+ ComplexInt16* pefw = efw;
+ int32_t out_aecm;
+ int16_t* paecm_buf = aecm->outBuf;
+ const int16_t* p_kSqrtHanning = WebRtcAecm_kSqrtHanning;
+ const int16_t* pp_kSqrtHanning = &WebRtcAecm_kSqrtHanning[PART_LEN];
+ int16_t* output1 = output;
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "addiu %[i], $zero, 64 \n\t"
+ "1: \n\t"
+ "lh %[tmp1], 0(%[pcoefTable_ifft]) \n\t"
+ "lh %[tmp2], 2(%[pcoefTable_ifft]) \n\t"
+ "lh %[tmp_re], 0(%[pefw]) \n\t"
+ "lh %[tmp_im], 2(%[pefw]) \n\t"
+ "addu %[pfft], %[fft], %[tmp2] \n\t"
+ "sh %[tmp_re], 0(%[pfft]) \n\t"
+ "sh %[tmp_im], 2(%[pfft]) \n\t"
+ "addu %[pfft], %[fft], %[tmp1] \n\t"
+ "sh %[tmp_re], 0(%[pfft]) \n\t"
+ "subu %[tmp_im], $zero, %[tmp_im] \n\t"
+ "sh %[tmp_im], 2(%[pfft]) \n\t"
+ "lh %[tmp1], 4(%[pcoefTable_ifft]) \n\t"
+ "lh %[tmp2], 6(%[pcoefTable_ifft]) \n\t"
+ "lh %[tmp_re], 4(%[pefw]) \n\t"
+ "lh %[tmp_im], 6(%[pefw]) \n\t"
+ "addu %[pfft], %[fft], %[tmp2] \n\t"
+ "sh %[tmp_re], 0(%[pfft]) \n\t"
+ "sh %[tmp_im], 2(%[pfft]) \n\t"
+ "addu %[pfft], %[fft], %[tmp1] \n\t"
+ "sh %[tmp_re], 0(%[pfft]) \n\t"
+ "subu %[tmp_im], $zero, %[tmp_im] \n\t"
+ "sh %[tmp_im], 2(%[pfft]) \n\t"
+ "lh %[tmp1], 8(%[pcoefTable_ifft]) \n\t"
+ "lh %[tmp2], 10(%[pcoefTable_ifft]) \n\t"
+ "lh %[tmp_re], 8(%[pefw]) \n\t"
+ "lh %[tmp_im], 10(%[pefw]) \n\t"
+ "addu %[pfft], %[fft], %[tmp2] \n\t"
+ "sh %[tmp_re], 0(%[pfft]) \n\t"
+ "sh %[tmp_im], 2(%[pfft]) \n\t"
+ "addu %[pfft], %[fft], %[tmp1] \n\t"
+ "sh %[tmp_re], 0(%[pfft]) \n\t"
+ "subu %[tmp_im], $zero, %[tmp_im] \n\t"
+ "sh %[tmp_im], 2(%[pfft]) \n\t"
+ "lh %[tmp1], 12(%[pcoefTable_ifft]) \n\t"
+ "lh %[tmp2], 14(%[pcoefTable_ifft]) \n\t"
+ "lh %[tmp_re], 12(%[pefw]) \n\t"
+ "lh %[tmp_im], 14(%[pefw]) \n\t"
+ "addu %[pfft], %[fft], %[tmp2] \n\t"
+ "sh %[tmp_re], 0(%[pfft]) \n\t"
+ "sh %[tmp_im], 2(%[pfft]) \n\t"
+ "addu %[pfft], %[fft], %[tmp1] \n\t"
+ "sh %[tmp_re], 0(%[pfft]) \n\t"
+ "subu %[tmp_im], $zero, %[tmp_im] \n\t"
+ "sh %[tmp_im], 2(%[pfft]) \n\t"
+ "addiu %[pcoefTable_ifft], %[pcoefTable_ifft], 16 \n\t"
+ "addiu %[i], %[i], -4 \n\t"
+ "bgtz %[i], 1b \n\t"
+ " addiu %[pefw], %[pefw], 16 \n\t"
+ ".set pop \n\t"
+ : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [pfft] "+r" (pfft),
+ [i] "=&r" (i), [tmp_re] "=&r" (tmp_re), [tmp_im] "=&r" (tmp_im),
+ [pefw] "+r" (pefw), [pcoefTable_ifft] "+r" (pcoefTable_ifft),
+ [fft] "+r" (fft)
+ :
+ : "memory"
+ );
+
+ fft[2] = efw[PART_LEN].real;
+ fft[3] = -efw[PART_LEN].imag;
+
+ outCFFT = WebRtcSpl_ComplexIFFT(fft, PART_LEN_SHIFT, 1);
+ pfft = fft;
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "addiu %[i], $zero, 128 \n\t"
+ "1: \n\t"
+ "lh %[tmp1], 0(%[ppfft]) \n\t"
+ "lh %[tmp2], 4(%[ppfft]) \n\t"
+ "lh %[tmp3], 8(%[ppfft]) \n\t"
+ "lh %[tmp4], 12(%[ppfft]) \n\t"
+ "addiu %[i], %[i], -4 \n\t"
+ "sh %[tmp1], 0(%[pfft]) \n\t"
+ "sh %[tmp2], 2(%[pfft]) \n\t"
+ "sh %[tmp3], 4(%[pfft]) \n\t"
+ "sh %[tmp4], 6(%[pfft]) \n\t"
+ "addiu %[ppfft], %[ppfft], 16 \n\t"
+ "bgtz %[i], 1b \n\t"
+ " addiu %[pfft], %[pfft], 8 \n\t"
+ ".set pop \n\t"
+ : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [pfft] "+r" (pfft),
+ [i] "=&r" (i), [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4),
+ [ppfft] "+r" (ppfft)
+ :
+ : "memory"
+ );
+
+ pfft = fft;
+ out_aecm = (int32_t)(outCFFT - aecm->dfaCleanQDomain);
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "addiu %[i], $zero, 64 \n\t"
+ "11: \n\t"
+ "lh %[tmp1], 0(%[pfft]) \n\t"
+ "lh %[tmp2], 0(%[p_kSqrtHanning]) \n\t"
+ "addiu %[i], %[i], -2 \n\t"
+ "mul %[tmp1], %[tmp1], %[tmp2] \n\t"
+ "lh %[tmp3], 2(%[pfft]) \n\t"
+ "lh %[tmp4], 2(%[p_kSqrtHanning]) \n\t"
+ "mul %[tmp3], %[tmp3], %[tmp4] \n\t"
+ "addiu %[tmp1], %[tmp1], 8192 \n\t"
+ "sra %[tmp1], %[tmp1], 14 \n\t"
+ "addiu %[tmp3], %[tmp3], 8192 \n\t"
+ "sra %[tmp3], %[tmp3], 14 \n\t"
+ "bgez %[out_aecm], 1f \n\t"
+ " negu %[tmp2], %[out_aecm] \n\t"
+ "srav %[tmp1], %[tmp1], %[tmp2] \n\t"
+ "b 2f \n\t"
+ " srav %[tmp3], %[tmp3], %[tmp2] \n\t"
+ "1: \n\t"
+ "sllv %[tmp1], %[tmp1], %[out_aecm] \n\t"
+ "sllv %[tmp3], %[tmp3], %[out_aecm] \n\t"
+ "2: \n\t"
+ "lh %[tmp4], 0(%[paecm_buf]) \n\t"
+ "lh %[tmp2], 2(%[paecm_buf]) \n\t"
+ "addu %[tmp3], %[tmp3], %[tmp2] \n\t"
+ "addu %[tmp1], %[tmp1], %[tmp4] \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "shll_s.w %[tmp1], %[tmp1], 16 \n\t"
+ "sra %[tmp1], %[tmp1], 16 \n\t"
+ "shll_s.w %[tmp3], %[tmp3], 16 \n\t"
+ "sra %[tmp3], %[tmp3], 16 \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "sra %[tmp4], %[tmp1], 31 \n\t"
+ "sra %[tmp2], %[tmp1], 15 \n\t"
+ "beq %[tmp4], %[tmp2], 3f \n\t"
+ " ori %[tmp2], $zero, 0x7fff \n\t"
+ "xor %[tmp1], %[tmp2], %[tmp4] \n\t"
+ "3: \n\t"
+ "sra %[tmp2], %[tmp3], 31 \n\t"
+ "sra %[tmp4], %[tmp3], 15 \n\t"
+ "beq %[tmp2], %[tmp4], 4f \n\t"
+ " ori %[tmp4], $zero, 0x7fff \n\t"
+ "xor %[tmp3], %[tmp4], %[tmp2] \n\t"
+ "4: \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "sh %[tmp1], 0(%[pfft]) \n\t"
+ "sh %[tmp1], 0(%[output1]) \n\t"
+ "sh %[tmp3], 2(%[pfft]) \n\t"
+ "sh %[tmp3], 2(%[output1]) \n\t"
+ "lh %[tmp1], 128(%[pfft]) \n\t"
+ "lh %[tmp2], 0(%[pp_kSqrtHanning]) \n\t"
+ "mul %[tmp1], %[tmp1], %[tmp2] \n\t"
+ "lh %[tmp3], 130(%[pfft]) \n\t"
+ "lh %[tmp4], -2(%[pp_kSqrtHanning]) \n\t"
+ "mul %[tmp3], %[tmp3], %[tmp4] \n\t"
+ "sra %[tmp1], %[tmp1], 14 \n\t"
+ "sra %[tmp3], %[tmp3], 14 \n\t"
+ "bgez %[out_aecm], 5f \n\t"
+ " negu %[tmp2], %[out_aecm] \n\t"
+ "srav %[tmp3], %[tmp3], %[tmp2] \n\t"
+ "b 6f \n\t"
+ " srav %[tmp1], %[tmp1], %[tmp2] \n\t"
+ "5: \n\t"
+ "sllv %[tmp1], %[tmp1], %[out_aecm] \n\t"
+ "sllv %[tmp3], %[tmp3], %[out_aecm] \n\t"
+ "6: \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "shll_s.w %[tmp1], %[tmp1], 16 \n\t"
+ "sra %[tmp1], %[tmp1], 16 \n\t"
+ "shll_s.w %[tmp3], %[tmp3], 16 \n\t"
+ "sra %[tmp3], %[tmp3], 16 \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "sra %[tmp4], %[tmp1], 31 \n\t"
+ "sra %[tmp2], %[tmp1], 15 \n\t"
+ "beq %[tmp4], %[tmp2], 7f \n\t"
+ " ori %[tmp2], $zero, 0x7fff \n\t"
+ "xor %[tmp1], %[tmp2], %[tmp4] \n\t"
+ "7: \n\t"
+ "sra %[tmp2], %[tmp3], 31 \n\t"
+ "sra %[tmp4], %[tmp3], 15 \n\t"
+ "beq %[tmp2], %[tmp4], 8f \n\t"
+ " ori %[tmp4], $zero, 0x7fff \n\t"
+ "xor %[tmp3], %[tmp4], %[tmp2] \n\t"
+ "8: \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "sh %[tmp1], 0(%[paecm_buf]) \n\t"
+ "sh %[tmp3], 2(%[paecm_buf]) \n\t"
+ "addiu %[output1], %[output1], 4 \n\t"
+ "addiu %[paecm_buf], %[paecm_buf], 4 \n\t"
+ "addiu %[pfft], %[pfft], 4 \n\t"
+ "addiu %[p_kSqrtHanning], %[p_kSqrtHanning], 4 \n\t"
+ "bgtz %[i], 11b \n\t"
+ " addiu %[pp_kSqrtHanning], %[pp_kSqrtHanning], -4 \n\t"
+ ".set pop \n\t"
+ : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [pfft] "+r" (pfft),
+ [output1] "+r" (output1), [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4),
+ [paecm_buf] "+r" (paecm_buf), [i] "=&r" (i),
+ [pp_kSqrtHanning] "+r" (pp_kSqrtHanning),
+ [p_kSqrtHanning] "+r" (p_kSqrtHanning)
+ : [out_aecm] "r" (out_aecm),
+ [WebRtcAecm_kSqrtHanning] "r" (WebRtcAecm_kSqrtHanning)
+ : "hi", "lo","memory"
+ );
+
+ // Copy the current block to the old position
+ // (aecm->outBuf is shifted elsewhere)
+ memcpy(aecm->xBuf, aecm->xBuf + PART_LEN, sizeof(int16_t) * PART_LEN);
+ memcpy(aecm->dBufNoisy,
+ aecm->dBufNoisy + PART_LEN,
+ sizeof(int16_t) * PART_LEN);
+ if (nearendClean != NULL) {
+ memcpy(aecm->dBufClean,
+ aecm->dBufClean + PART_LEN,
+ sizeof(int16_t) * PART_LEN);
+ }
+}
+
+void WebRtcAecm_CalcLinearEnergies_mips(AecmCore* aecm,
+ const uint16_t* far_spectrum,
+ int32_t* echo_est,
+ uint32_t* far_energy,
+ uint32_t* echo_energy_adapt,
+ uint32_t* echo_energy_stored) {
+ int i;
+ uint32_t par1 = (*far_energy);
+ uint32_t par2 = (*echo_energy_adapt);
+ uint32_t par3 = (*echo_energy_stored);
+ int16_t* ch_stored_p = &(aecm->channelStored[0]);
+ int16_t* ch_adapt_p = &(aecm->channelAdapt16[0]);
+ uint16_t* spectrum_p = (uint16_t*)(&(far_spectrum[0]));
+ int32_t* echo_p = &(echo_est[0]);
+ int32_t temp0, stored0, echo0, adept0, spectrum0;
+ int32_t stored1, adept1, spectrum1, echo1, temp1;
+
+ // Get energy for the delayed far end signal and estimated
+ // echo using both stored and adapted channels.
+ for (i = 0; i < PART_LEN; i+= 4) {
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "lh %[stored0], 0(%[ch_stored_p]) \n\t"
+ "lhu %[adept0], 0(%[ch_adapt_p]) \n\t"
+ "lhu %[spectrum0], 0(%[spectrum_p]) \n\t"
+ "lh %[stored1], 2(%[ch_stored_p]) \n\t"
+ "lhu %[adept1], 2(%[ch_adapt_p]) \n\t"
+ "lhu %[spectrum1], 2(%[spectrum_p]) \n\t"
+ "mul %[echo0], %[stored0], %[spectrum0] \n\t"
+ "mul %[temp0], %[adept0], %[spectrum0] \n\t"
+ "mul %[echo1], %[stored1], %[spectrum1] \n\t"
+ "mul %[temp1], %[adept1], %[spectrum1] \n\t"
+ "addu %[par1], %[par1], %[spectrum0] \n\t"
+ "addu %[par1], %[par1], %[spectrum1] \n\t"
+ "addiu %[echo_p], %[echo_p], 16 \n\t"
+ "addu %[par3], %[par3], %[echo0] \n\t"
+ "addu %[par2], %[par2], %[temp0] \n\t"
+ "addu %[par3], %[par3], %[echo1] \n\t"
+ "addu %[par2], %[par2], %[temp1] \n\t"
+ "usw %[echo0], -16(%[echo_p]) \n\t"
+ "usw %[echo1], -12(%[echo_p]) \n\t"
+ "lh %[stored0], 4(%[ch_stored_p]) \n\t"
+ "lhu %[adept0], 4(%[ch_adapt_p]) \n\t"
+ "lhu %[spectrum0], 4(%[spectrum_p]) \n\t"
+ "lh %[stored1], 6(%[ch_stored_p]) \n\t"
+ "lhu %[adept1], 6(%[ch_adapt_p]) \n\t"
+ "lhu %[spectrum1], 6(%[spectrum_p]) \n\t"
+ "mul %[echo0], %[stored0], %[spectrum0] \n\t"
+ "mul %[temp0], %[adept0], %[spectrum0] \n\t"
+ "mul %[echo1], %[stored1], %[spectrum1] \n\t"
+ "mul %[temp1], %[adept1], %[spectrum1] \n\t"
+ "addu %[par1], %[par1], %[spectrum0] \n\t"
+ "addu %[par1], %[par1], %[spectrum1] \n\t"
+ "addiu %[ch_stored_p], %[ch_stored_p], 8 \n\t"
+ "addiu %[ch_adapt_p], %[ch_adapt_p], 8 \n\t"
+ "addiu %[spectrum_p], %[spectrum_p], 8 \n\t"
+ "addu %[par3], %[par3], %[echo0] \n\t"
+ "addu %[par2], %[par2], %[temp0] \n\t"
+ "addu %[par3], %[par3], %[echo1] \n\t"
+ "addu %[par2], %[par2], %[temp1] \n\t"
+ "usw %[echo0], -8(%[echo_p]) \n\t"
+ "usw %[echo1], -4(%[echo_p]) \n\t"
+ ".set pop \n\t"
+ : [temp0] "=&r" (temp0), [stored0] "=&r" (stored0),
+ [adept0] "=&r" (adept0), [spectrum0] "=&r" (spectrum0),
+ [echo0] "=&r" (echo0), [echo_p] "+r" (echo_p), [par3] "+r" (par3),
+ [par1] "+r" (par1), [par2] "+r" (par2), [stored1] "=&r" (stored1),
+ [adept1] "=&r" (adept1), [echo1] "=&r" (echo1),
+ [spectrum1] "=&r" (spectrum1), [temp1] "=&r" (temp1),
+ [ch_stored_p] "+r" (ch_stored_p), [ch_adapt_p] "+r" (ch_adapt_p),
+ [spectrum_p] "+r" (spectrum_p)
+ :
+ : "hi", "lo", "memory"
+ );
+ }
+
+ echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN],
+ far_spectrum[PART_LEN]);
+ par1 += (uint32_t)(far_spectrum[PART_LEN]);
+ par2 += aecm->channelAdapt16[PART_LEN] * far_spectrum[PART_LEN];
+ par3 += (uint32_t)echo_est[PART_LEN];
+
+ (*far_energy) = par1;
+ (*echo_energy_adapt) = par2;
+ (*echo_energy_stored) = par3;
+}
+
+#if defined(MIPS_DSP_R1_LE)
+void WebRtcAecm_StoreAdaptiveChannel_mips(AecmCore* aecm,
+ const uint16_t* far_spectrum,
+ int32_t* echo_est) {
+ int i;
+ int16_t* temp1;
+ uint16_t* temp8;
+ int32_t temp0, temp2, temp3, temp4, temp5, temp6;
+ int32_t* temp7 = &(echo_est[0]);
+ temp1 = &(aecm->channelStored[0]);
+ temp8 = (uint16_t*)(&far_spectrum[0]);
+
+ // During startup we store the channel every block.
+ memcpy(aecm->channelStored, aecm->channelAdapt16,
+ sizeof(int16_t) * PART_LEN1);
+ // Recalculate echo estimate
+ for (i = 0; i < PART_LEN; i += 4) {
+ __asm __volatile (
+ "ulw %[temp0], 0(%[temp8]) \n\t"
+ "ulw %[temp2], 0(%[temp1]) \n\t"
+ "ulw %[temp4], 4(%[temp8]) \n\t"
+ "ulw %[temp5], 4(%[temp1]) \n\t"
+ "muleq_s.w.phl %[temp3], %[temp2], %[temp0] \n\t"
+ "muleq_s.w.phr %[temp0], %[temp2], %[temp0] \n\t"
+ "muleq_s.w.phl %[temp6], %[temp5], %[temp4] \n\t"
+ "muleq_s.w.phr %[temp4], %[temp5], %[temp4] \n\t"
+ "addiu %[temp7], %[temp7], 16 \n\t"
+ "addiu %[temp1], %[temp1], 8 \n\t"
+ "addiu %[temp8], %[temp8], 8 \n\t"
+ "sra %[temp3], %[temp3], 1 \n\t"
+ "sra %[temp0], %[temp0], 1 \n\t"
+ "sra %[temp6], %[temp6], 1 \n\t"
+ "sra %[temp4], %[temp4], 1 \n\t"
+ "usw %[temp3], -12(%[temp7]) \n\t"
+ "usw %[temp0], -16(%[temp7]) \n\t"
+ "usw %[temp6], -4(%[temp7]) \n\t"
+ "usw %[temp4], -8(%[temp7]) \n\t"
+ : [temp0] "=&r" (temp0), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),
+ [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [temp6] "=&r" (temp6),
+ [temp1] "+r" (temp1), [temp8] "+r" (temp8), [temp7] "+r" (temp7)
+ :
+ : "hi", "lo", "memory"
+ );
+ }
+ echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],
+ far_spectrum[i]);
+}
+
+void WebRtcAecm_ResetAdaptiveChannel_mips(AecmCore* aecm) {
+ int i;
+ int32_t* temp3;
+ int16_t* temp0;
+ int32_t temp1, temp2, temp4, temp5;
+
+ temp0 = &(aecm->channelStored[0]);
+ temp3 = &(aecm->channelAdapt32[0]);
+
+ // The stored channel has a significantly lower MSE than the adaptive one for
+ // two consecutive calculations. Reset the adaptive channel.
+ memcpy(aecm->channelAdapt16,
+ aecm->channelStored,
+ sizeof(int16_t) * PART_LEN1);
+
+ // Restore the W32 channel
+ for (i = 0; i < PART_LEN; i += 4) {
+ __asm __volatile (
+ "ulw %[temp1], 0(%[temp0]) \n\t"
+ "ulw %[temp4], 4(%[temp0]) \n\t"
+ "preceq.w.phl %[temp2], %[temp1] \n\t"
+ "preceq.w.phr %[temp1], %[temp1] \n\t"
+ "preceq.w.phl %[temp5], %[temp4] \n\t"
+ "preceq.w.phr %[temp4], %[temp4] \n\t"
+ "addiu %[temp0], %[temp0], 8 \n\t"
+ "usw %[temp2], 4(%[temp3]) \n\t"
+ "usw %[temp1], 0(%[temp3]) \n\t"
+ "usw %[temp5], 12(%[temp3]) \n\t"
+ "usw %[temp4], 8(%[temp3]) \n\t"
+ "addiu %[temp3], %[temp3], 16 \n\t"
+ : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2),
+ [temp4] "=&r" (temp4), [temp5] "=&r" (temp5),
+ [temp3] "+r" (temp3), [temp0] "+r" (temp0)
+ :
+ : "memory"
+ );
+ }
+
+ aecm->channelAdapt32[i] = (int32_t)aecm->channelStored[i] << 16;
+}
+#endif // #if defined(MIPS_DSP_R1_LE)
+
+// Transforms a time domain signal into the frequency domain, outputting the
+// complex valued signal, absolute value and sum of absolute values.
+//
+// time_signal [in] Pointer to time domain signal
+// freq_signal_real [out] Pointer to real part of frequency domain array
+// freq_signal_imag [out] Pointer to imaginary part of frequency domain
+// array
+// freq_signal_abs [out] Pointer to absolute value of frequency domain
+// array
+// freq_signal_sum_abs [out] Pointer to the sum of all absolute values in
+// the frequency domain array
+// return value The Q-domain of current frequency values
+//
+static int TimeToFrequencyDomain(AecmCore* aecm,
+ const int16_t* time_signal,
+ ComplexInt16* freq_signal,
+ uint16_t* freq_signal_abs,
+ uint32_t* freq_signal_sum_abs) {
+ int i = 0;
+ int time_signal_scaling = 0;
+
+ // In fft_buf, +16 for 32-byte alignment.
+ int16_t fft_buf[PART_LEN4 + 16];
+ int16_t *fft = (int16_t *) (((uintptr_t) fft_buf + 31) & ~31);
+
+ int16_t tmp16no1;
+#if !defined(MIPS_DSP_R2_LE)
+ int32_t tmp32no1;
+ int32_t tmp32no2;
+ int16_t tmp16no2;
+#else
+ int32_t tmp32no10, tmp32no11, tmp32no12, tmp32no13;
+ int32_t tmp32no20, tmp32no21, tmp32no22, tmp32no23;
+ int16_t* freqp;
+ uint16_t* freqabsp;
+ uint32_t freqt0, freqt1, freqt2, freqt3;
+ uint32_t freqs;
+#endif
+
+#ifdef AECM_DYNAMIC_Q
+ tmp16no1 = WebRtcSpl_MaxAbsValueW16(time_signal, PART_LEN2);
+ time_signal_scaling = WebRtcSpl_NormW16(tmp16no1);
+#endif
+
+ WindowAndFFT(aecm, fft, time_signal, freq_signal, time_signal_scaling);
+
+ // Extract imaginary and real part,
+ // calculate the magnitude for all frequency bins
+ freq_signal[0].imag = 0;
+ freq_signal[PART_LEN].imag = 0;
+ freq_signal[PART_LEN].real = fft[PART_LEN2];
+ freq_signal_abs[0] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[0].real);
+ freq_signal_abs[PART_LEN] = (uint16_t)WEBRTC_SPL_ABS_W16(
+ freq_signal[PART_LEN].real);
+ (*freq_signal_sum_abs) = (uint32_t)(freq_signal_abs[0]) +
+ (uint32_t)(freq_signal_abs[PART_LEN]);
+
+#if !defined(MIPS_DSP_R2_LE)
+ for (i = 1; i < PART_LEN; i++) {
+ if (freq_signal[i].real == 0)
+ {
+ freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(
+ freq_signal[i].imag);
+ }
+ else if (freq_signal[i].imag == 0)
+ {
+ freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(
+ freq_signal[i].real);
+ }
+ else
+ {
+ // Approximation for magnitude of complex fft output
+ // magn = sqrt(real^2 + imag^2)
+ // magn ~= alpha * max(|imag|,|real|) + beta * min(|imag|,|real|)
+ //
+ // The parameters alpha and beta are stored in Q15
+ tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real);
+ tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
+ tmp32no1 = tmp16no1 * tmp16no1;
+ tmp32no2 = tmp16no2 * tmp16no2;
+ tmp32no2 = WebRtcSpl_AddSatW32(tmp32no1, tmp32no2);
+ tmp32no1 = WebRtcSpl_SqrtFloor(tmp32no2);
+
+ freq_signal_abs[i] = (uint16_t)tmp32no1;
+ }
+ (*freq_signal_sum_abs) += (uint32_t)freq_signal_abs[i];
+ }
+#else // #if !defined(MIPS_DSP_R2_LE)
+ freqs = (uint32_t)(freq_signal_abs[0]) +
+ (uint32_t)(freq_signal_abs[PART_LEN]);
+ freqp = &(freq_signal[1].real);
+
+ __asm __volatile (
+ "lw %[freqt0], 0(%[freqp]) \n\t"
+ "lw %[freqt1], 4(%[freqp]) \n\t"
+ "lw %[freqt2], 8(%[freqp]) \n\t"
+ "mult $ac0, $zero, $zero \n\t"
+ "mult $ac1, $zero, $zero \n\t"
+ "mult $ac2, $zero, $zero \n\t"
+ "dpaq_s.w.ph $ac0, %[freqt0], %[freqt0] \n\t"
+ "dpaq_s.w.ph $ac1, %[freqt1], %[freqt1] \n\t"
+ "dpaq_s.w.ph $ac2, %[freqt2], %[freqt2] \n\t"
+ "addiu %[freqp], %[freqp], 12 \n\t"
+ "extr.w %[tmp32no20], $ac0, 1 \n\t"
+ "extr.w %[tmp32no21], $ac1, 1 \n\t"
+ "extr.w %[tmp32no22], $ac2, 1 \n\t"
+ : [freqt0] "=&r" (freqt0), [freqt1] "=&r" (freqt1),
+ [freqt2] "=&r" (freqt2), [freqp] "+r" (freqp),
+ [tmp32no20] "=r" (tmp32no20), [tmp32no21] "=r" (tmp32no21),
+ [tmp32no22] "=r" (tmp32no22)
+ :
+ : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo"
+ );
+
+ tmp32no10 = WebRtcSpl_SqrtFloor(tmp32no20);
+ tmp32no11 = WebRtcSpl_SqrtFloor(tmp32no21);
+ tmp32no12 = WebRtcSpl_SqrtFloor(tmp32no22);
+ freq_signal_abs[1] = (uint16_t)tmp32no10;
+ freq_signal_abs[2] = (uint16_t)tmp32no11;
+ freq_signal_abs[3] = (uint16_t)tmp32no12;
+ freqs += (uint32_t)tmp32no10;
+ freqs += (uint32_t)tmp32no11;
+ freqs += (uint32_t)tmp32no12;
+ freqabsp = &(freq_signal_abs[4]);
+ for (i = 4; i < PART_LEN; i+=4)
+ {
+ __asm __volatile (
+ "ulw %[freqt0], 0(%[freqp]) \n\t"
+ "ulw %[freqt1], 4(%[freqp]) \n\t"
+ "ulw %[freqt2], 8(%[freqp]) \n\t"
+ "ulw %[freqt3], 12(%[freqp]) \n\t"
+ "mult $ac0, $zero, $zero \n\t"
+ "mult $ac1, $zero, $zero \n\t"
+ "mult $ac2, $zero, $zero \n\t"
+ "mult $ac3, $zero, $zero \n\t"
+ "dpaq_s.w.ph $ac0, %[freqt0], %[freqt0] \n\t"
+ "dpaq_s.w.ph $ac1, %[freqt1], %[freqt1] \n\t"
+ "dpaq_s.w.ph $ac2, %[freqt2], %[freqt2] \n\t"
+ "dpaq_s.w.ph $ac3, %[freqt3], %[freqt3] \n\t"
+ "addiu %[freqp], %[freqp], 16 \n\t"
+ "addiu %[freqabsp], %[freqabsp], 8 \n\t"
+ "extr.w %[tmp32no20], $ac0, 1 \n\t"
+ "extr.w %[tmp32no21], $ac1, 1 \n\t"
+ "extr.w %[tmp32no22], $ac2, 1 \n\t"
+ "extr.w %[tmp32no23], $ac3, 1 \n\t"
+ : [freqt0] "=&r" (freqt0), [freqt1] "=&r" (freqt1),
+ [freqt2] "=&r" (freqt2), [freqt3] "=&r" (freqt3),
+ [tmp32no20] "=r" (tmp32no20), [tmp32no21] "=r" (tmp32no21),
+ [tmp32no22] "=r" (tmp32no22), [tmp32no23] "=r" (tmp32no23),
+ [freqabsp] "+r" (freqabsp), [freqp] "+r" (freqp)
+ :
+ : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
+ "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
+ );
+
+ tmp32no10 = WebRtcSpl_SqrtFloor(tmp32no20);
+ tmp32no11 = WebRtcSpl_SqrtFloor(tmp32no21);
+ tmp32no12 = WebRtcSpl_SqrtFloor(tmp32no22);
+ tmp32no13 = WebRtcSpl_SqrtFloor(tmp32no23);
+
+ __asm __volatile (
+ "sh %[tmp32no10], -8(%[freqabsp]) \n\t"
+ "sh %[tmp32no11], -6(%[freqabsp]) \n\t"
+ "sh %[tmp32no12], -4(%[freqabsp]) \n\t"
+ "sh %[tmp32no13], -2(%[freqabsp]) \n\t"
+ "addu %[freqs], %[freqs], %[tmp32no10] \n\t"
+ "addu %[freqs], %[freqs], %[tmp32no11] \n\t"
+ "addu %[freqs], %[freqs], %[tmp32no12] \n\t"
+ "addu %[freqs], %[freqs], %[tmp32no13] \n\t"
+ : [freqs] "+r" (freqs)
+ : [tmp32no10] "r" (tmp32no10), [tmp32no11] "r" (tmp32no11),
+ [tmp32no12] "r" (tmp32no12), [tmp32no13] "r" (tmp32no13),
+ [freqabsp] "r" (freqabsp)
+ : "memory"
+ );
+ }
+
+ (*freq_signal_sum_abs) = freqs;
+#endif
+
+ return time_signal_scaling;
+}
+
+int WebRtcAecm_ProcessBlock(AecmCore* aecm,
+ const int16_t* farend,
+ const int16_t* nearendNoisy,
+ const int16_t* nearendClean,
+ int16_t* output) {
+ int i;
+ uint32_t xfaSum;
+ uint32_t dfaNoisySum;
+ uint32_t dfaCleanSum;
+ uint32_t echoEst32Gained;
+ uint32_t tmpU32;
+ int32_t tmp32no1;
+
+ uint16_t xfa[PART_LEN1];
+ uint16_t dfaNoisy[PART_LEN1];
+ uint16_t dfaClean[PART_LEN1];
+ uint16_t* ptrDfaClean = dfaClean;
+ const uint16_t* far_spectrum_ptr = NULL;
+
+ // 32 byte aligned buffers (with +8 or +16).
+ int16_t fft_buf[PART_LEN4 + 2 + 16]; // +2 to make a loop safe.
+ int32_t echoEst32_buf[PART_LEN1 + 8];
+ int32_t dfw_buf[PART_LEN2 + 8];
+ int32_t efw_buf[PART_LEN2 + 8];
+
+ int16_t* fft = (int16_t*)(((uint32_t)fft_buf + 31) & ~ 31);
+ int32_t* echoEst32 = (int32_t*)(((uint32_t)echoEst32_buf + 31) & ~ 31);
+ ComplexInt16* dfw = (ComplexInt16*)(((uint32_t)dfw_buf + 31) & ~31);
+ ComplexInt16* efw = (ComplexInt16*)(((uint32_t)efw_buf + 31) & ~31);
+
+ int16_t hnl[PART_LEN1];
+ int16_t numPosCoef = 0;
+ int delay;
+ int16_t tmp16no1;
+ int16_t tmp16no2;
+ int16_t mu;
+ int16_t supGain;
+ int16_t zeros32, zeros16;
+ int16_t zerosDBufNoisy, zerosDBufClean, zerosXBuf;
+ int far_q;
+ int16_t resolutionDiff, qDomainDiff, dfa_clean_q_domain_diff;
+
+ const int kMinPrefBand = 4;
+ const int kMaxPrefBand = 24;
+ int32_t avgHnl32 = 0;
+
+ int32_t temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
+ int16_t* ptr;
+ int16_t* ptr1;
+ int16_t* er_ptr;
+ int16_t* dr_ptr;
+
+ ptr = &hnl[0];
+ ptr1 = &hnl[0];
+ er_ptr = &efw[0].real;
+ dr_ptr = &dfw[0].real;
+
+ // Determine startup state. There are three states:
+ // (0) the first CONV_LEN blocks
+ // (1) another CONV_LEN blocks
+ // (2) the rest
+
+ if (aecm->startupState < 2) {
+ aecm->startupState = (aecm->totCount >= CONV_LEN) +
+ (aecm->totCount >= CONV_LEN2);
+ }
+ // END: Determine startup state
+
+ // Buffer near and far end signals
+ memcpy(aecm->xBuf + PART_LEN, farend, sizeof(int16_t) * PART_LEN);
+ memcpy(aecm->dBufNoisy + PART_LEN,
+ nearendNoisy,
+ sizeof(int16_t) * PART_LEN);
+ if (nearendClean != NULL) {
+ memcpy(aecm->dBufClean + PART_LEN,
+ nearendClean,
+ sizeof(int16_t) * PART_LEN);
+ }
+
+ // Transform far end signal from time domain to frequency domain.
+ far_q = TimeToFrequencyDomain(aecm,
+ aecm->xBuf,
+ dfw,
+ xfa,
+ &xfaSum);
+
+ // Transform noisy near end signal from time domain to frequency domain.
+ zerosDBufNoisy = TimeToFrequencyDomain(aecm,
+ aecm->dBufNoisy,
+ dfw,
+ dfaNoisy,
+ &dfaNoisySum);
+ aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain;
+ aecm->dfaNoisyQDomain = (int16_t)zerosDBufNoisy;
+
+ if (nearendClean == NULL) {
+ ptrDfaClean = dfaNoisy;
+ aecm->dfaCleanQDomainOld = aecm->dfaNoisyQDomainOld;
+ aecm->dfaCleanQDomain = aecm->dfaNoisyQDomain;
+ dfaCleanSum = dfaNoisySum;
+ } else {
+ // Transform clean near end signal from time domain to frequency domain.
+ zerosDBufClean = TimeToFrequencyDomain(aecm,
+ aecm->dBufClean,
+ dfw,
+ dfaClean,
+ &dfaCleanSum);
+ aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain;
+ aecm->dfaCleanQDomain = (int16_t)zerosDBufClean;
+ }
+
+ // Get the delay
+ // Save far-end history and estimate delay
+ WebRtcAecm_UpdateFarHistory(aecm, xfa, far_q);
+
+ if (WebRtc_AddFarSpectrumFix(aecm->delay_estimator_farend, xfa, PART_LEN1,
+ far_q) == -1) {
+ return -1;
+ }
+ delay = WebRtc_DelayEstimatorProcessFix(aecm->delay_estimator,
+ dfaNoisy,
+ PART_LEN1,
+ zerosDBufNoisy);
+ if (delay == -1) {
+ return -1;
+ }
+ else if (delay == -2) {
+ // If the delay is unknown, we assume zero.
+ // NOTE: this will have to be adjusted if we ever add lookahead.
+ delay = 0;
+ }
+
+ if (aecm->fixedDelay >= 0) {
+ // Use fixed delay
+ delay = aecm->fixedDelay;
+ }
+
+ // Get aligned far end spectrum
+ far_spectrum_ptr = WebRtcAecm_AlignedFarend(aecm, &far_q, delay);
+ zerosXBuf = (int16_t) far_q;
+
+ if (far_spectrum_ptr == NULL) {
+ return -1;
+ }
+
+ // Calculate log(energy) and update energy threshold levels
+ WebRtcAecm_CalcEnergies(aecm,
+ far_spectrum_ptr,
+ zerosXBuf,
+ dfaNoisySum,
+ echoEst32);
+ // Calculate stepsize
+ mu = WebRtcAecm_CalcStepSize(aecm);
+
+ // Update counters
+ aecm->totCount++;
+
+ // This is the channel estimation algorithm.
+ // It is base on NLMS but has a variable step length,
+ // which was calculated above.
+ WebRtcAecm_UpdateChannel(aecm,
+ far_spectrum_ptr,
+ zerosXBuf,
+ dfaNoisy,
+ mu,
+ echoEst32);
+
+ supGain = WebRtcAecm_CalcSuppressionGain(aecm);
+
+ // Calculate Wiener filter hnl[]
+ for (i = 0; i < PART_LEN1; i++) {
+ // Far end signal through channel estimate in Q8
+ // How much can we shift right to preserve resolution
+ tmp32no1 = echoEst32[i] - aecm->echoFilt[i];
+ aecm->echoFilt[i] += (tmp32no1 * 50) >> 8;
+
+ zeros32 = WebRtcSpl_NormW32(aecm->echoFilt[i]) + 1;
+ zeros16 = WebRtcSpl_NormW16(supGain) + 1;
+ if (zeros32 + zeros16 > 16) {
+ // Multiplication is safe
+ // Result in
+ // Q(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN+aecm->xfaQDomainBuf[diff])
+ echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i],
+ (uint16_t)supGain);
+ resolutionDiff = 14 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN;
+ resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf);
+ } else {
+ tmp16no1 = 17 - zeros32 - zeros16;
+ resolutionDiff = 14 + tmp16no1 - RESOLUTION_CHANNEL16 -
+ RESOLUTION_SUPGAIN;
+ resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf);
+ if (zeros32 > tmp16no1) {
+ echoEst32Gained = WEBRTC_SPL_UMUL_32_16(
+ (uint32_t)aecm->echoFilt[i],
+ supGain >> tmp16no1);
+ } else {
+ // Result in Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16)
+ echoEst32Gained = (aecm->echoFilt[i] >> tmp16no1) * supGain;
+ }
+ }
+
+ zeros16 = WebRtcSpl_NormW16(aecm->nearFilt[i]);
+ assert(zeros16 >= 0); // |zeros16| is a norm, hence non-negative.
+ dfa_clean_q_domain_diff = aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld;
+ if (zeros16 < dfa_clean_q_domain_diff && aecm->nearFilt[i]) {
+ tmp16no1 = aecm->nearFilt[i] << zeros16;
+ qDomainDiff = zeros16 - dfa_clean_q_domain_diff;
+ tmp16no2 = ptrDfaClean[i] >> -qDomainDiff;
+ } else {
+ tmp16no1 = dfa_clean_q_domain_diff < 0
+ ? aecm->nearFilt[i] >> -dfa_clean_q_domain_diff
+ : aecm->nearFilt[i] << dfa_clean_q_domain_diff;
+ qDomainDiff = 0;
+ tmp16no2 = ptrDfaClean[i];
+ }
+
+ tmp32no1 = (int32_t)(tmp16no2 - tmp16no1);
+ tmp16no2 = (int16_t)(tmp32no1 >> 4);
+ tmp16no2 += tmp16no1;
+ zeros16 = WebRtcSpl_NormW16(tmp16no2);
+ if ((tmp16no2) & (-qDomainDiff > zeros16)) {
+ aecm->nearFilt[i] = WEBRTC_SPL_WORD16_MAX;
+ } else {
+ aecm->nearFilt[i] = qDomainDiff < 0 ? tmp16no2 << -qDomainDiff
+ : tmp16no2 >> qDomainDiff;
+ }
+
+ // Wiener filter coefficients, resulting hnl in Q14
+ if (echoEst32Gained == 0) {
+ hnl[i] = ONE_Q14;
+ numPosCoef++;
+ } else if (aecm->nearFilt[i] == 0) {
+ hnl[i] = 0;
+ } else {
+ // Multiply the suppression gain
+ // Rounding
+ echoEst32Gained += (uint32_t)(aecm->nearFilt[i] >> 1);
+ tmpU32 = WebRtcSpl_DivU32U16(echoEst32Gained,
+ (uint16_t)aecm->nearFilt[i]);
+
+ // Current resolution is
+ // Q-(RESOLUTION_CHANNEL + RESOLUTION_SUPGAIN
+ // - max(0, 17 - zeros16 - zeros32))
+ // Make sure we are in Q14
+ tmp32no1 = (int32_t)WEBRTC_SPL_SHIFT_W32(tmpU32, resolutionDiff);
+ if (tmp32no1 > ONE_Q14) {
+ hnl[i] = 0;
+ } else if (tmp32no1 < 0) {
+ hnl[i] = ONE_Q14;
+ numPosCoef++;
+ } else {
+ // 1-echoEst/dfa
+ hnl[i] = ONE_Q14 - (int16_t)tmp32no1;
+ if (hnl[i] <= 0) {
+ hnl[i] = 0;
+ } else {
+ numPosCoef++;
+ }
+ }
+ }
+ }
+
+ // Only in wideband. Prevent the gain in upper band from being larger than
+ // in lower band.
+ if (aecm->mult == 2) {
+ // TODO(bjornv): Investigate if the scaling of hnl[i] below can cause
+ // speech distortion in double-talk.
+ for (i = 0; i < (PART_LEN1 >> 3); i++) {
+ __asm __volatile (
+ "lh %[temp1], 0(%[ptr1]) \n\t"
+ "lh %[temp2], 2(%[ptr1]) \n\t"
+ "lh %[temp3], 4(%[ptr1]) \n\t"
+ "lh %[temp4], 6(%[ptr1]) \n\t"
+ "lh %[temp5], 8(%[ptr1]) \n\t"
+ "lh %[temp6], 10(%[ptr1]) \n\t"
+ "lh %[temp7], 12(%[ptr1]) \n\t"
+ "lh %[temp8], 14(%[ptr1]) \n\t"
+ "mul %[temp1], %[temp1], %[temp1] \n\t"
+ "mul %[temp2], %[temp2], %[temp2] \n\t"
+ "mul %[temp3], %[temp3], %[temp3] \n\t"
+ "mul %[temp4], %[temp4], %[temp4] \n\t"
+ "mul %[temp5], %[temp5], %[temp5] \n\t"
+ "mul %[temp6], %[temp6], %[temp6] \n\t"
+ "mul %[temp7], %[temp7], %[temp7] \n\t"
+ "mul %[temp8], %[temp8], %[temp8] \n\t"
+ "sra %[temp1], %[temp1], 14 \n\t"
+ "sra %[temp2], %[temp2], 14 \n\t"
+ "sra %[temp3], %[temp3], 14 \n\t"
+ "sra %[temp4], %[temp4], 14 \n\t"
+ "sra %[temp5], %[temp5], 14 \n\t"
+ "sra %[temp6], %[temp6], 14 \n\t"
+ "sra %[temp7], %[temp7], 14 \n\t"
+ "sra %[temp8], %[temp8], 14 \n\t"
+ "sh %[temp1], 0(%[ptr1]) \n\t"
+ "sh %[temp2], 2(%[ptr1]) \n\t"
+ "sh %[temp3], 4(%[ptr1]) \n\t"
+ "sh %[temp4], 6(%[ptr1]) \n\t"
+ "sh %[temp5], 8(%[ptr1]) \n\t"
+ "sh %[temp6], 10(%[ptr1]) \n\t"
+ "sh %[temp7], 12(%[ptr1]) \n\t"
+ "sh %[temp8], 14(%[ptr1]) \n\t"
+ "addiu %[ptr1], %[ptr1], 16 \n\t"
+ : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),
+ [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [temp6] "=&r" (temp6),
+ [temp7] "=&r" (temp7), [temp8] "=&r" (temp8), [ptr1] "+r" (ptr1)
+ :
+ : "memory", "hi", "lo"
+ );
+ }
+ for(i = 0; i < (PART_LEN1 & 7); i++) {
+ __asm __volatile (
+ "lh %[temp1], 0(%[ptr1]) \n\t"
+ "mul %[temp1], %[temp1], %[temp1] \n\t"
+ "sra %[temp1], %[temp1], 14 \n\t"
+ "sh %[temp1], 0(%[ptr1]) \n\t"
+ "addiu %[ptr1], %[ptr1], 2 \n\t"
+ : [temp1] "=&r" (temp1), [ptr1] "+r" (ptr1)
+ :
+ : "memory", "hi", "lo"
+ );
+ }
+
+ for (i = kMinPrefBand; i <= kMaxPrefBand; i++) {
+ avgHnl32 += (int32_t)hnl[i];
+ }
+
+ assert(kMaxPrefBand - kMinPrefBand + 1 > 0);
+ avgHnl32 /= (kMaxPrefBand - kMinPrefBand + 1);
+
+ for (i = kMaxPrefBand; i < PART_LEN1; i++) {
+ if (hnl[i] > (int16_t)avgHnl32) {
+ hnl[i] = (int16_t)avgHnl32;
+ }
+ }
+ }
+
+ // Calculate NLP gain, result is in Q14
+ if (aecm->nlpFlag) {
+ if (numPosCoef < 3) {
+ for (i = 0; i < PART_LEN1; i++) {
+ efw[i].real = 0;
+ efw[i].imag = 0;
+ hnl[i] = 0;
+ }
+ } else {
+ for (i = 0; i < PART_LEN1; i++) {
+#if defined(MIPS_DSP_R1_LE)
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "lh %[temp1], 0(%[ptr]) \n\t"
+ "lh %[temp2], 0(%[dr_ptr]) \n\t"
+ "slti %[temp4], %[temp1], 0x4001 \n\t"
+ "beqz %[temp4], 3f \n\t"
+ " lh %[temp3], 2(%[dr_ptr]) \n\t"
+ "slti %[temp5], %[temp1], 3277 \n\t"
+ "bnez %[temp5], 2f \n\t"
+ " addiu %[dr_ptr], %[dr_ptr], 4 \n\t"
+ "mul %[temp2], %[temp2], %[temp1] \n\t"
+ "mul %[temp3], %[temp3], %[temp1] \n\t"
+ "shra_r.w %[temp2], %[temp2], 14 \n\t"
+ "shra_r.w %[temp3], %[temp3], 14 \n\t"
+ "b 4f \n\t"
+ " nop \n\t"
+ "2: \n\t"
+ "addu %[temp1], $zero, $zero \n\t"
+ "addu %[temp2], $zero, $zero \n\t"
+ "addu %[temp3], $zero, $zero \n\t"
+ "b 1f \n\t"
+ " nop \n\t"
+ "3: \n\t"
+ "addiu %[temp1], $0, 0x4000 \n\t"
+ "1: \n\t"
+ "sh %[temp1], 0(%[ptr]) \n\t"
+ "4: \n\t"
+ "sh %[temp2], 0(%[er_ptr]) \n\t"
+ "sh %[temp3], 2(%[er_ptr]) \n\t"
+ "addiu %[ptr], %[ptr], 2 \n\t"
+ "addiu %[er_ptr], %[er_ptr], 4 \n\t"
+ ".set pop \n\t"
+ : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),
+ [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [ptr] "+r" (ptr),
+ [er_ptr] "+r" (er_ptr), [dr_ptr] "+r" (dr_ptr)
+ :
+ : "memory", "hi", "lo"
+ );
+#else
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "lh %[temp1], 0(%[ptr]) \n\t"
+ "lh %[temp2], 0(%[dr_ptr]) \n\t"
+ "slti %[temp4], %[temp1], 0x4001 \n\t"
+ "beqz %[temp4], 3f \n\t"
+ " lh %[temp3], 2(%[dr_ptr]) \n\t"
+ "slti %[temp5], %[temp1], 3277 \n\t"
+ "bnez %[temp5], 2f \n\t"
+ " addiu %[dr_ptr], %[dr_ptr], 4 \n\t"
+ "mul %[temp2], %[temp2], %[temp1] \n\t"
+ "mul %[temp3], %[temp3], %[temp1] \n\t"
+ "addiu %[temp2], %[temp2], 0x2000 \n\t"
+ "addiu %[temp3], %[temp3], 0x2000 \n\t"
+ "sra %[temp2], %[temp2], 14 \n\t"
+ "sra %[temp3], %[temp3], 14 \n\t"
+ "b 4f \n\t"
+ " nop \n\t"
+ "2: \n\t"
+ "addu %[temp1], $zero, $zero \n\t"
+ "addu %[temp2], $zero, $zero \n\t"
+ "addu %[temp3], $zero, $zero \n\t"
+ "b 1f \n\t"
+ " nop \n\t"
+ "3: \n\t"
+ "addiu %[temp1], $0, 0x4000 \n\t"
+ "1: \n\t"
+ "sh %[temp1], 0(%[ptr]) \n\t"
+ "4: \n\t"
+ "sh %[temp2], 0(%[er_ptr]) \n\t"
+ "sh %[temp3], 2(%[er_ptr]) \n\t"
+ "addiu %[ptr], %[ptr], 2 \n\t"
+ "addiu %[er_ptr], %[er_ptr], 4 \n\t"
+ ".set pop \n\t"
+ : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),
+ [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [ptr] "+r" (ptr),
+ [er_ptr] "+r" (er_ptr), [dr_ptr] "+r" (dr_ptr)
+ :
+ : "memory", "hi", "lo"
+ );
+#endif
+ }
+ }
+ }
+ else {
+ // multiply with Wiener coefficients
+ for (i = 0; i < PART_LEN1; i++) {
+ efw[i].real = (int16_t)
+ (WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real,
+ hnl[i],
+ 14));
+ efw[i].imag = (int16_t)
+ (WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag,
+ hnl[i],
+ 14));
+ }
+ }
+
+ if (aecm->cngMode == AecmTrue) {
+ ComfortNoise(aecm, ptrDfaClean, efw, hnl);
+ }
+
+ InverseFFTAndWindow(aecm, fft, efw, output, nearendClean);
+
+ return 0;
+}
+
+// Generate comfort noise and add to output signal.
+static void ComfortNoise(AecmCore* aecm,
+ const uint16_t* dfa,
+ ComplexInt16* out,
+ const int16_t* lambda) {
+ int16_t i;
+ int16_t tmp16, tmp161, tmp162, tmp163, nrsh1, nrsh2;
+ int32_t tmp32, tmp321, tnoise, tnoise1;
+ int32_t tmp322, tmp323, *tmp1;
+ int16_t* dfap;
+ int16_t* lambdap;
+ const int32_t c2049 = 2049;
+ const int32_t c359 = 359;
+ const int32_t c114 = ONE_Q14;
+
+ int16_t randW16[PART_LEN];
+ int16_t uReal[PART_LEN1];
+ int16_t uImag[PART_LEN1];
+ int32_t outLShift32;
+
+ int16_t shiftFromNearToNoise = kNoiseEstQDomain - aecm->dfaCleanQDomain;
+ int16_t minTrackShift = 9;
+
+ assert(shiftFromNearToNoise >= 0);
+ assert(shiftFromNearToNoise < 16);
+
+ if (aecm->noiseEstCtr < 100) {
+ // Track the minimum more quickly initially.
+ aecm->noiseEstCtr++;
+ minTrackShift = 6;
+ }
+
+ // Generate a uniform random array on [0 2^15-1].
+ WebRtcSpl_RandUArray(randW16, PART_LEN, &aecm->seed);
+ int16_t* randW16p = (int16_t*)randW16;
+#if defined (MIPS_DSP_R1_LE)
+ int16_t* kCosTablep = (int16_t*)WebRtcAecm_kCosTable;
+ int16_t* kSinTablep = (int16_t*)WebRtcAecm_kSinTable;
+#endif // #if defined(MIPS_DSP_R1_LE)
+ tmp1 = (int32_t*)aecm->noiseEst + 1;
+ dfap = (int16_t*)dfa + 1;
+ lambdap = (int16_t*)lambda + 1;
+ // Estimate noise power.
+ for (i = 1; i < PART_LEN1; i+=2) {
+ // Shift to the noise domain.
+ __asm __volatile (
+ "lh %[tmp32], 0(%[dfap]) \n\t"
+ "lw %[tnoise], 0(%[tmp1]) \n\t"
+ "sllv %[outLShift32], %[tmp32], %[shiftFromNearToNoise] \n\t"
+ : [tmp32] "=&r" (tmp32), [outLShift32] "=r" (outLShift32),
+ [tnoise] "=&r" (tnoise)
+ : [tmp1] "r" (tmp1), [dfap] "r" (dfap),
+ [shiftFromNearToNoise] "r" (shiftFromNearToNoise)
+ : "memory"
+ );
+
+ if (outLShift32 < tnoise) {
+ // Reset "too low" counter
+ aecm->noiseEstTooLowCtr[i] = 0;
+ // Track the minimum.
+ if (tnoise < (1 << minTrackShift)) {
+ // For small values, decrease noiseEst[i] every
+ // |kNoiseEstIncCount| block. The regular approach below can not
+ // go further down due to truncation.
+ aecm->noiseEstTooHighCtr[i]++;
+ if (aecm->noiseEstTooHighCtr[i] >= kNoiseEstIncCount) {
+ tnoise--;
+ aecm->noiseEstTooHighCtr[i] = 0; // Reset the counter
+ }
+ } else {
+ __asm __volatile (
+ "subu %[tmp32], %[tnoise], %[outLShift32] \n\t"
+ "srav %[tmp32], %[tmp32], %[minTrackShift] \n\t"
+ "subu %[tnoise], %[tnoise], %[tmp32] \n\t"
+ : [tmp32] "=&r" (tmp32), [tnoise] "+r" (tnoise)
+ : [outLShift32] "r" (outLShift32), [minTrackShift] "r" (minTrackShift)
+ );
+ }
+ } else {
+ // Reset "too high" counter
+ aecm->noiseEstTooHighCtr[i] = 0;
+ // Ramp slowly upwards until we hit the minimum again.
+ if ((tnoise >> 19) <= 0) {
+ if ((tnoise >> 11) > 0) {
+ // Large enough for relative increase
+ __asm __volatile (
+ "mul %[tnoise], %[tnoise], %[c2049] \n\t"
+ "sra %[tnoise], %[tnoise], 11 \n\t"
+ : [tnoise] "+r" (tnoise)
+ : [c2049] "r" (c2049)
+ : "hi", "lo"
+ );
+ } else {
+ // Make incremental increases based on size every
+ // |kNoiseEstIncCount| block
+ aecm->noiseEstTooLowCtr[i]++;
+ if (aecm->noiseEstTooLowCtr[i] >= kNoiseEstIncCount) {
+ __asm __volatile (
+ "sra %[tmp32], %[tnoise], 9 \n\t"
+ "addi %[tnoise], %[tnoise], 1 \n\t"
+ "addu %[tnoise], %[tnoise], %[tmp32] \n\t"
+ : [tnoise] "+r" (tnoise), [tmp32] "=&r" (tmp32)
+ :
+ );
+ aecm->noiseEstTooLowCtr[i] = 0; // Reset counter
+ }
+ }
+ } else {
+ // Avoid overflow.
+ // Multiplication with 2049 will cause wrap around. Scale
+ // down first and then multiply
+ __asm __volatile (
+ "sra %[tnoise], %[tnoise], 11 \n\t"
+ "mul %[tnoise], %[tnoise], %[c2049] \n\t"
+ : [tnoise] "+r" (tnoise)
+ : [c2049] "r" (c2049)
+ : "hi", "lo"
+ );
+ }
+ }
+
+ // Shift to the noise domain.
+ __asm __volatile (
+ "lh %[tmp32], 2(%[dfap]) \n\t"
+ "lw %[tnoise1], 4(%[tmp1]) \n\t"
+ "addiu %[dfap], %[dfap], 4 \n\t"
+ "sllv %[outLShift32], %[tmp32], %[shiftFromNearToNoise] \n\t"
+ : [tmp32] "=&r" (tmp32), [dfap] "+r" (dfap),
+ [outLShift32] "=r" (outLShift32), [tnoise1] "=&r" (tnoise1)
+ : [tmp1] "r" (tmp1), [shiftFromNearToNoise] "r" (shiftFromNearToNoise)
+ : "memory"
+ );
+
+ if (outLShift32 < tnoise1) {
+ // Reset "too low" counter
+ aecm->noiseEstTooLowCtr[i + 1] = 0;
+ // Track the minimum.
+ if (tnoise1 < (1 << minTrackShift)) {
+ // For small values, decrease noiseEst[i] every
+ // |kNoiseEstIncCount| block. The regular approach below can not
+ // go further down due to truncation.
+ aecm->noiseEstTooHighCtr[i + 1]++;
+ if (aecm->noiseEstTooHighCtr[i + 1] >= kNoiseEstIncCount) {
+ tnoise1--;
+ aecm->noiseEstTooHighCtr[i + 1] = 0; // Reset the counter
+ }
+ } else {
+ __asm __volatile (
+ "subu %[tmp32], %[tnoise1], %[outLShift32] \n\t"
+ "srav %[tmp32], %[tmp32], %[minTrackShift] \n\t"
+ "subu %[tnoise1], %[tnoise1], %[tmp32] \n\t"
+ : [tmp32] "=&r" (tmp32), [tnoise1] "+r" (tnoise1)
+ : [outLShift32] "r" (outLShift32), [minTrackShift] "r" (minTrackShift)
+ );
+ }
+ } else {
+ // Reset "too high" counter
+ aecm->noiseEstTooHighCtr[i + 1] = 0;
+ // Ramp slowly upwards until we hit the minimum again.
+ if ((tnoise1 >> 19) <= 0) {
+ if ((tnoise1 >> 11) > 0) {
+ // Large enough for relative increase
+ __asm __volatile (
+ "mul %[tnoise1], %[tnoise1], %[c2049] \n\t"
+ "sra %[tnoise1], %[tnoise1], 11 \n\t"
+ : [tnoise1] "+r" (tnoise1)
+ : [c2049] "r" (c2049)
+ : "hi", "lo"
+ );
+ } else {
+ // Make incremental increases based on size every
+ // |kNoiseEstIncCount| block
+ aecm->noiseEstTooLowCtr[i + 1]++;
+ if (aecm->noiseEstTooLowCtr[i + 1] >= kNoiseEstIncCount) {
+ __asm __volatile (
+ "sra %[tmp32], %[tnoise1], 9 \n\t"
+ "addi %[tnoise1], %[tnoise1], 1 \n\t"
+ "addu %[tnoise1], %[tnoise1], %[tmp32] \n\t"
+ : [tnoise1] "+r" (tnoise1), [tmp32] "=&r" (tmp32)
+ :
+ );
+ aecm->noiseEstTooLowCtr[i + 1] = 0; // Reset counter
+ }
+ }
+ } else {
+ // Avoid overflow.
+ // Multiplication with 2049 will cause wrap around. Scale
+ // down first and then multiply
+ __asm __volatile (
+ "sra %[tnoise1], %[tnoise1], 11 \n\t"
+ "mul %[tnoise1], %[tnoise1], %[c2049] \n\t"
+ : [tnoise1] "+r" (tnoise1)
+ : [c2049] "r" (c2049)
+ : "hi", "lo"
+ );
+ }
+ }
+
+ __asm __volatile (
+ "lh %[tmp16], 0(%[lambdap]) \n\t"
+ "lh %[tmp161], 2(%[lambdap]) \n\t"
+ "sw %[tnoise], 0(%[tmp1]) \n\t"
+ "sw %[tnoise1], 4(%[tmp1]) \n\t"
+ "subu %[tmp16], %[c114], %[tmp16] \n\t"
+ "subu %[tmp161], %[c114], %[tmp161] \n\t"
+ "srav %[tmp32], %[tnoise], %[shiftFromNearToNoise] \n\t"
+ "srav %[tmp321], %[tnoise1], %[shiftFromNearToNoise] \n\t"
+ "addiu %[lambdap], %[lambdap], 4 \n\t"
+ "addiu %[tmp1], %[tmp1], 8 \n\t"
+ : [tmp16] "=&r" (tmp16), [tmp161] "=&r" (tmp161), [tmp1] "+r" (tmp1),
+ [tmp32] "=&r" (tmp32), [tmp321] "=&r" (tmp321), [lambdap] "+r" (lambdap)
+ : [tnoise] "r" (tnoise), [tnoise1] "r" (tnoise1), [c114] "r" (c114),
+ [shiftFromNearToNoise] "r" (shiftFromNearToNoise)
+ : "memory"
+ );
+
+ if (tmp32 > 32767) {
+ tmp32 = 32767;
+ aecm->noiseEst[i] = tmp32 << shiftFromNearToNoise;
+ }
+ if (tmp321 > 32767) {
+ tmp321 = 32767;
+ aecm->noiseEst[i+1] = tmp321 << shiftFromNearToNoise;
+ }
+
+ __asm __volatile (
+ "mul %[tmp32], %[tmp32], %[tmp16] \n\t"
+ "mul %[tmp321], %[tmp321], %[tmp161] \n\t"
+ "sra %[nrsh1], %[tmp32], 14 \n\t"
+ "sra %[nrsh2], %[tmp321], 14 \n\t"
+ : [nrsh1] "=&r" (nrsh1), [nrsh2] "=r" (nrsh2)
+ : [tmp16] "r" (tmp16), [tmp161] "r" (tmp161), [tmp32] "r" (tmp32),
+ [tmp321] "r" (tmp321)
+ : "memory", "hi", "lo"
+ );
+
+ __asm __volatile (
+ "lh %[tmp32], 0(%[randW16p]) \n\t"
+ "lh %[tmp321], 2(%[randW16p]) \n\t"
+ "addiu %[randW16p], %[randW16p], 4 \n\t"
+ "mul %[tmp32], %[tmp32], %[c359] \n\t"
+ "mul %[tmp321], %[tmp321], %[c359] \n\t"
+ "sra %[tmp16], %[tmp32], 15 \n\t"
+ "sra %[tmp161], %[tmp321], 15 \n\t"
+ : [randW16p] "+r" (randW16p), [tmp32] "=&r" (tmp32),
+ [tmp16] "=r" (tmp16), [tmp161] "=r" (tmp161), [tmp321] "=&r" (tmp321)
+ : [c359] "r" (c359)
+ : "memory", "hi", "lo"
+ );
+
+#if !defined(MIPS_DSP_R1_LE)
+ tmp32 = WebRtcAecm_kCosTable[tmp16];
+ tmp321 = WebRtcAecm_kSinTable[tmp16];
+ tmp322 = WebRtcAecm_kCosTable[tmp161];
+ tmp323 = WebRtcAecm_kSinTable[tmp161];
+#else
+ __asm __volatile (
+ "sll %[tmp16], %[tmp16], 1 \n\t"
+ "sll %[tmp161], %[tmp161], 1 \n\t"
+ "lhx %[tmp32], %[tmp16](%[kCosTablep]) \n\t"
+ "lhx %[tmp321], %[tmp16](%[kSinTablep]) \n\t"
+ "lhx %[tmp322], %[tmp161](%[kCosTablep]) \n\t"
+ "lhx %[tmp323], %[tmp161](%[kSinTablep]) \n\t"
+ : [tmp32] "=&r" (tmp32), [tmp321] "=&r" (tmp321),
+ [tmp322] "=&r" (tmp322), [tmp323] "=&r" (tmp323)
+ : [kCosTablep] "r" (kCosTablep), [tmp16] "r" (tmp16),
+ [tmp161] "r" (tmp161), [kSinTablep] "r" (kSinTablep)
+ : "memory"
+ );
+#endif
+ __asm __volatile (
+ "mul %[tmp32], %[tmp32], %[nrsh1] \n\t"
+ "negu %[tmp162], %[nrsh1] \n\t"
+ "mul %[tmp322], %[tmp322], %[nrsh2] \n\t"
+ "negu %[tmp163], %[nrsh2] \n\t"
+ "sra %[tmp32], %[tmp32], 13 \n\t"
+ "mul %[tmp321], %[tmp321], %[tmp162] \n\t"
+ "sra %[tmp322], %[tmp322], 13 \n\t"
+ "mul %[tmp323], %[tmp323], %[tmp163] \n\t"
+ "sra %[tmp321], %[tmp321], 13 \n\t"
+ "sra %[tmp323], %[tmp323], 13 \n\t"
+ : [tmp32] "+r" (tmp32), [tmp321] "+r" (tmp321), [tmp162] "=&r" (tmp162),
+ [tmp322] "+r" (tmp322), [tmp323] "+r" (tmp323), [tmp163] "=&r" (tmp163)
+ : [nrsh1] "r" (nrsh1), [nrsh2] "r" (nrsh2)
+ : "hi", "lo"
+ );
+ // Tables are in Q13.
+ uReal[i] = (int16_t)tmp32;
+ uImag[i] = (int16_t)tmp321;
+ uReal[i + 1] = (int16_t)tmp322;
+ uImag[i + 1] = (int16_t)tmp323;
+ }
+
+ int32_t tt, sgn;
+ tt = out[0].real;
+ sgn = ((int)tt) >> 31;
+ out[0].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);
+ tt = out[0].imag;
+ sgn = ((int)tt) >> 31;
+ out[0].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);
+ for (i = 1; i < PART_LEN; i++) {
+ tt = out[i].real + uReal[i];
+ sgn = ((int)tt) >> 31;
+ out[i].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);
+ tt = out[i].imag + uImag[i];
+ sgn = ((int)tt) >> 31;
+ out[i].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);
+ }
+ tt = out[PART_LEN].real + uReal[PART_LEN];
+ sgn = ((int)tt) >> 31;
+ out[PART_LEN].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);
+ tt = out[PART_LEN].imag;
+ sgn = ((int)tt) >> 31;
+ out[PART_LEN].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);
+}
+
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_neon.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_neon.c
new file mode 100644
index 00000000..1751fcf7
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_neon.c
@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/aecm/aecm_core.h"
+
+#include <arm_neon.h>
+#include <assert.h>
+
+#include "webrtc/common_audio/signal_processing/include/real_fft.h"
+
+// TODO(kma): Re-write the corresponding assembly file, the offset
+// generating script and makefile, to replace these C functions.
+
+// Square root of Hanning window in Q14.
+const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = {
+ 0,
+ 399, 798, 1196, 1594, 1990, 2386, 2780, 3172,
+ 3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224,
+ 6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040,
+ 9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514,
+ 11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553,
+ 13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079,
+ 15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034,
+ 16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384
+};
+
+static inline void AddLanes(uint32_t* ptr, uint32x4_t v) {
+#if defined(WEBRTC_ARCH_ARM64)
+ *(ptr) = vaddvq_u32(v);
+#else
+ uint32x2_t tmp_v;
+ tmp_v = vadd_u32(vget_low_u32(v), vget_high_u32(v));
+ tmp_v = vpadd_u32(tmp_v, tmp_v);
+ *(ptr) = vget_lane_u32(tmp_v, 0);
+#endif
+}
+
+void WebRtcAecm_CalcLinearEnergiesNeon(AecmCore* aecm,
+ const uint16_t* far_spectrum,
+ int32_t* echo_est,
+ uint32_t* far_energy,
+ uint32_t* echo_energy_adapt,
+ uint32_t* echo_energy_stored) {
+ int16_t* start_stored_p = aecm->channelStored;
+ int16_t* start_adapt_p = aecm->channelAdapt16;
+ int32_t* echo_est_p = echo_est;
+ const int16_t* end_stored_p = aecm->channelStored + PART_LEN;
+ const uint16_t* far_spectrum_p = far_spectrum;
+ int16x8_t store_v, adapt_v;
+ uint16x8_t spectrum_v;
+ uint32x4_t echo_est_v_low, echo_est_v_high;
+ uint32x4_t far_energy_v, echo_stored_v, echo_adapt_v;
+
+ far_energy_v = vdupq_n_u32(0);
+ echo_adapt_v = vdupq_n_u32(0);
+ echo_stored_v = vdupq_n_u32(0);
+
+ // Get energy for the delayed far end signal and estimated
+ // echo using both stored and adapted channels.
+ // The C code:
+ // for (i = 0; i < PART_LEN1; i++) {
+ // echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],
+ // far_spectrum[i]);
+ // (*far_energy) += (uint32_t)(far_spectrum[i]);
+ // *echo_energy_adapt += aecm->channelAdapt16[i] * far_spectrum[i];
+ // (*echo_energy_stored) += (uint32_t)echo_est[i];
+ // }
+ while (start_stored_p < end_stored_p) {
+ spectrum_v = vld1q_u16(far_spectrum_p);
+ adapt_v = vld1q_s16(start_adapt_p);
+ store_v = vld1q_s16(start_stored_p);
+
+ far_energy_v = vaddw_u16(far_energy_v, vget_low_u16(spectrum_v));
+ far_energy_v = vaddw_u16(far_energy_v, vget_high_u16(spectrum_v));
+
+ echo_est_v_low = vmull_u16(vreinterpret_u16_s16(vget_low_s16(store_v)),
+ vget_low_u16(spectrum_v));
+ echo_est_v_high = vmull_u16(vreinterpret_u16_s16(vget_high_s16(store_v)),
+ vget_high_u16(spectrum_v));
+ vst1q_s32(echo_est_p, vreinterpretq_s32_u32(echo_est_v_low));
+ vst1q_s32(echo_est_p + 4, vreinterpretq_s32_u32(echo_est_v_high));
+
+ echo_stored_v = vaddq_u32(echo_est_v_low, echo_stored_v);
+ echo_stored_v = vaddq_u32(echo_est_v_high, echo_stored_v);
+
+ echo_adapt_v = vmlal_u16(echo_adapt_v,
+ vreinterpret_u16_s16(vget_low_s16(adapt_v)),
+ vget_low_u16(spectrum_v));
+ echo_adapt_v = vmlal_u16(echo_adapt_v,
+ vreinterpret_u16_s16(vget_high_s16(adapt_v)),
+ vget_high_u16(spectrum_v));
+
+ start_stored_p += 8;
+ start_adapt_p += 8;
+ far_spectrum_p += 8;
+ echo_est_p += 8;
+ }
+
+ AddLanes(far_energy, far_energy_v);
+ AddLanes(echo_energy_stored, echo_stored_v);
+ AddLanes(echo_energy_adapt, echo_adapt_v);
+
+ echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN],
+ far_spectrum[PART_LEN]);
+ *echo_energy_stored += (uint32_t)echo_est[PART_LEN];
+ *far_energy += (uint32_t)far_spectrum[PART_LEN];
+ *echo_energy_adapt += aecm->channelAdapt16[PART_LEN] * far_spectrum[PART_LEN];
+}
+
+void WebRtcAecm_StoreAdaptiveChannelNeon(AecmCore* aecm,
+ const uint16_t* far_spectrum,
+ int32_t* echo_est) {
+ assert((uintptr_t)echo_est % 32 == 0);
+ assert((uintptr_t)(aecm->channelStored) % 16 == 0);
+ assert((uintptr_t)(aecm->channelAdapt16) % 16 == 0);
+
+ // This is C code of following optimized code.
+ // During startup we store the channel every block.
+ // memcpy(aecm->channelStored,
+ // aecm->channelAdapt16,
+ // sizeof(int16_t) * PART_LEN1);
+ // Recalculate echo estimate
+ // for (i = 0; i < PART_LEN; i += 4) {
+ // echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],
+ // far_spectrum[i]);
+ // echo_est[i + 1] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 1],
+ // far_spectrum[i + 1]);
+ // echo_est[i + 2] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 2],
+ // far_spectrum[i + 2]);
+ // echo_est[i + 3] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 3],
+ // far_spectrum[i + 3]);
+ // }
+ // echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],
+ // far_spectrum[i]);
+ const uint16_t* far_spectrum_p = far_spectrum;
+ int16_t* start_adapt_p = aecm->channelAdapt16;
+ int16_t* start_stored_p = aecm->channelStored;
+ const int16_t* end_stored_p = aecm->channelStored + PART_LEN;
+ int32_t* echo_est_p = echo_est;
+
+ uint16x8_t far_spectrum_v;
+ int16x8_t adapt_v;
+ uint32x4_t echo_est_v_low, echo_est_v_high;
+
+ while (start_stored_p < end_stored_p) {
+ far_spectrum_v = vld1q_u16(far_spectrum_p);
+ adapt_v = vld1q_s16(start_adapt_p);
+
+ vst1q_s16(start_stored_p, adapt_v);
+
+ echo_est_v_low = vmull_u16(vget_low_u16(far_spectrum_v),
+ vget_low_u16(vreinterpretq_u16_s16(adapt_v)));
+ echo_est_v_high = vmull_u16(vget_high_u16(far_spectrum_v),
+ vget_high_u16(vreinterpretq_u16_s16(adapt_v)));
+
+ vst1q_s32(echo_est_p, vreinterpretq_s32_u32(echo_est_v_low));
+ vst1q_s32(echo_est_p + 4, vreinterpretq_s32_u32(echo_est_v_high));
+
+ far_spectrum_p += 8;
+ start_adapt_p += 8;
+ start_stored_p += 8;
+ echo_est_p += 8;
+ }
+ aecm->channelStored[PART_LEN] = aecm->channelAdapt16[PART_LEN];
+ echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN],
+ far_spectrum[PART_LEN]);
+}
+
+void WebRtcAecm_ResetAdaptiveChannelNeon(AecmCore* aecm) {
+ assert((uintptr_t)(aecm->channelStored) % 16 == 0);
+ assert((uintptr_t)(aecm->channelAdapt16) % 16 == 0);
+ assert((uintptr_t)(aecm->channelAdapt32) % 32 == 0);
+
+ // The C code of following optimized code.
+ // for (i = 0; i < PART_LEN1; i++) {
+ // aecm->channelAdapt16[i] = aecm->channelStored[i];
+ // aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32(
+ // (int32_t)aecm->channelStored[i], 16);
+ // }
+
+ int16_t* start_stored_p = aecm->channelStored;
+ int16_t* start_adapt16_p = aecm->channelAdapt16;
+ int32_t* start_adapt32_p = aecm->channelAdapt32;
+ const int16_t* end_stored_p = start_stored_p + PART_LEN;
+
+ int16x8_t stored_v;
+ int32x4_t adapt32_v_low, adapt32_v_high;
+
+ while (start_stored_p < end_stored_p) {
+ stored_v = vld1q_s16(start_stored_p);
+ vst1q_s16(start_adapt16_p, stored_v);
+
+ adapt32_v_low = vshll_n_s16(vget_low_s16(stored_v), 16);
+ adapt32_v_high = vshll_n_s16(vget_high_s16(stored_v), 16);
+
+ vst1q_s32(start_adapt32_p, adapt32_v_low);
+ vst1q_s32(start_adapt32_p + 4, adapt32_v_high);
+
+ start_stored_p += 8;
+ start_adapt16_p += 8;
+ start_adapt32_p += 8;
+ }
+ aecm->channelAdapt16[PART_LEN] = aecm->channelStored[PART_LEN];
+ aecm->channelAdapt32[PART_LEN] = (int32_t)aecm->channelStored[PART_LEN] << 16;
+}
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_defines.h b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_defines.h
new file mode 100644
index 00000000..6d63990b
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_defines.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AECM_AECM_DEFINES_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AECM_AECM_DEFINES_H_
+
+#define AECM_DYNAMIC_Q /* Turn on/off dynamic Q-domain. */
+
+/* Algorithm parameters */
+#define FRAME_LEN 80 /* Total frame length, 10 ms. */
+
+#define PART_LEN 64 /* Length of partition. */
+#define PART_LEN_SHIFT 7 /* Length of (PART_LEN * 2) in base 2. */
+
+#define PART_LEN1 (PART_LEN + 1) /* Unique fft coefficients. */
+#define PART_LEN2 (PART_LEN << 1) /* Length of partition * 2. */
+#define PART_LEN4 (PART_LEN << 2) /* Length of partition * 4. */
+#define FAR_BUF_LEN PART_LEN4 /* Length of buffers. */
+#define MAX_DELAY 100
+
+/* Counter parameters */
+#define CONV_LEN 512 /* Convergence length used at startup. */
+#define CONV_LEN2 (CONV_LEN << 1) /* Used at startup. */
+
+/* Energy parameters */
+#define MAX_BUF_LEN 64 /* History length of energy signals. */
+#define FAR_ENERGY_MIN 1025 /* Lowest Far energy level: At least 2 */
+ /* in energy. */
+#define FAR_ENERGY_DIFF 929 /* Allowed difference between max */
+ /* and min. */
+#define ENERGY_DEV_OFFSET 0 /* The energy error offset in Q8. */
+#define ENERGY_DEV_TOL 400 /* The energy estimation tolerance (Q8). */
+#define FAR_ENERGY_VAD_REGION 230 /* Far VAD tolerance region. */
+
+/* Stepsize parameters */
+#define MU_MIN 10 /* Min stepsize 2^-MU_MIN (far end energy */
+ /* dependent). */
+#define MU_MAX 1 /* Max stepsize 2^-MU_MAX (far end energy */
+ /* dependent). */
+#define MU_DIFF 9 /* MU_MIN - MU_MAX */
+
+/* Channel parameters */
+#define MIN_MSE_COUNT 20 /* Min number of consecutive blocks with enough */
+ /* far end energy to compare channel estimates. */
+#define MIN_MSE_DIFF 29 /* The ratio between adapted and stored channel to */
+ /* accept a new storage (0.8 in Q-MSE_RESOLUTION). */
+#define MSE_RESOLUTION 5 /* MSE parameter resolution. */
+#define RESOLUTION_CHANNEL16 12 /* W16 Channel in Q-RESOLUTION_CHANNEL16. */
+#define RESOLUTION_CHANNEL32 28 /* W32 Channel in Q-RESOLUTION_CHANNEL. */
+#define CHANNEL_VAD 16 /* Minimum energy in frequency band */
+ /* to update channel. */
+
+/* Suppression gain parameters: SUPGAIN parameters in Q-(RESOLUTION_SUPGAIN). */
+#define RESOLUTION_SUPGAIN 8 /* Channel in Q-(RESOLUTION_SUPGAIN). */
+#define SUPGAIN_DEFAULT (1 << RESOLUTION_SUPGAIN) /* Default. */
+#define SUPGAIN_ERROR_PARAM_A 3072 /* Estimation error parameter */
+ /* (Maximum gain) (8 in Q8). */
+#define SUPGAIN_ERROR_PARAM_B 1536 /* Estimation error parameter */
+ /* (Gain before going down). */
+#define SUPGAIN_ERROR_PARAM_D SUPGAIN_DEFAULT /* Estimation error parameter */
+ /* (Should be the same as Default) (1 in Q8). */
+#define SUPGAIN_EPC_DT 200 /* SUPGAIN_ERROR_PARAM_C * ENERGY_DEV_TOL */
+
+/* Defines for "check delay estimation" */
+#define CORR_WIDTH 31 /* Number of samples to correlate over. */
+#define CORR_MAX 16 /* Maximum correlation offset. */
+#define CORR_MAX_BUF 63
+#define CORR_DEV 4
+#define CORR_MAX_LEVEL 20
+#define CORR_MAX_LOW 4
+#define CORR_BUF_LEN (CORR_MAX << 1) + 1
+/* Note that CORR_WIDTH + 2*CORR_MAX <= MAX_BUF_LEN. */
+
+#define ONE_Q14 (1 << 14)
+
+/* NLP defines */
+#define NLP_COMP_LOW 3277 /* 0.2 in Q14 */
+#define NLP_COMP_HIGH ONE_Q14 /* 1 in Q14 */
+
+#endif
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/echo_control_mobile.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/echo_control_mobile.c
new file mode 100644
index 00000000..83781e97
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/echo_control_mobile.c
@@ -0,0 +1,702 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h"
+
+#ifdef AEC_DEBUG
+#include <stdio.h>
+#endif
+#include <stdlib.h>
+
+#include "webrtc/common_audio/ring_buffer.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_processing/aecm/aecm_core.h"
+
+#define BUF_SIZE_FRAMES 50 // buffer size (frames)
+// Maximum length of resampled signal. Must be an integer multiple of frames
+// (ceil(1/(1 + MIN_SKEW)*2) + 1)*FRAME_LEN
+// The factor of 2 handles wb, and the + 1 is as a safety margin
+#define MAX_RESAMP_LEN (5 * FRAME_LEN)
+
+static const size_t kBufSizeSamp = BUF_SIZE_FRAMES * FRAME_LEN; // buffer size (samples)
+static const int kSampMsNb = 8; // samples per ms in nb
+// Target suppression levels for nlp modes
+// log{0.001, 0.00001, 0.00000001}
+static const int kInitCheck = 42;
+
+typedef struct
+{
+ int sampFreq;
+ int scSampFreq;
+ short bufSizeStart;
+ int knownDelay;
+
+ // Stores the last frame added to the farend buffer
+ short farendOld[2][FRAME_LEN];
+ short initFlag; // indicates if AEC has been initialized
+
+ // Variables used for averaging far end buffer size
+ short counter;
+ short sum;
+ short firstVal;
+ short checkBufSizeCtr;
+
+ // Variables used for delay shifts
+ short msInSndCardBuf;
+ short filtDelay;
+ int timeForDelayChange;
+ int ECstartup;
+ int checkBuffSize;
+ int delayChange;
+ short lastDelayDiff;
+
+ int16_t echoMode;
+
+#ifdef AEC_DEBUG
+ FILE *bufFile;
+ FILE *delayFile;
+ FILE *preCompFile;
+ FILE *postCompFile;
+#endif // AEC_DEBUG
+ // Structures
+ RingBuffer *farendBuf;
+
+ int lastError;
+
+ AecmCore* aecmCore;
+} AecMobile;
+
+// Estimates delay to set the position of the farend buffer read pointer
+// (controlled by knownDelay)
+static int WebRtcAecm_EstBufDelay(AecMobile* aecmInst, short msInSndCardBuf);
+
+// Stuffs the farend buffer if the estimated delay is too large
+static int WebRtcAecm_DelayComp(AecMobile* aecmInst);
+
+void* WebRtcAecm_Create() {
+ AecMobile* aecm = malloc(sizeof(AecMobile));
+
+ WebRtcSpl_Init();
+
+ aecm->aecmCore = WebRtcAecm_CreateCore();
+ if (!aecm->aecmCore) {
+ WebRtcAecm_Free(aecm);
+ return NULL;
+ }
+
+ aecm->farendBuf = WebRtc_CreateBuffer(kBufSizeSamp,
+ sizeof(int16_t));
+ if (!aecm->farendBuf)
+ {
+ WebRtcAecm_Free(aecm);
+ return NULL;
+ }
+
+ aecm->initFlag = 0;
+ aecm->lastError = 0;
+
+#ifdef AEC_DEBUG
+ aecm->aecmCore->farFile = fopen("aecFar.pcm","wb");
+ aecm->aecmCore->nearFile = fopen("aecNear.pcm","wb");
+ aecm->aecmCore->outFile = fopen("aecOut.pcm","wb");
+ //aecm->aecmCore->outLpFile = fopen("aecOutLp.pcm","wb");
+
+ aecm->bufFile = fopen("aecBuf.dat", "wb");
+ aecm->delayFile = fopen("aecDelay.dat", "wb");
+ aecm->preCompFile = fopen("preComp.pcm", "wb");
+ aecm->postCompFile = fopen("postComp.pcm", "wb");
+#endif // AEC_DEBUG
+ return aecm;
+}
+
+void WebRtcAecm_Free(void* aecmInst) {
+ AecMobile* aecm = aecmInst;
+
+ if (aecm == NULL) {
+ return;
+ }
+
+#ifdef AEC_DEBUG
+ fclose(aecm->aecmCore->farFile);
+ fclose(aecm->aecmCore->nearFile);
+ fclose(aecm->aecmCore->outFile);
+ //fclose(aecm->aecmCore->outLpFile);
+
+ fclose(aecm->bufFile);
+ fclose(aecm->delayFile);
+ fclose(aecm->preCompFile);
+ fclose(aecm->postCompFile);
+#endif // AEC_DEBUG
+ WebRtcAecm_FreeCore(aecm->aecmCore);
+ WebRtc_FreeBuffer(aecm->farendBuf);
+ free(aecm);
+}
+
+int32_t WebRtcAecm_Init(void *aecmInst, int32_t sampFreq)
+{
+ AecMobile* aecm = aecmInst;
+ AecmConfig aecConfig;
+
+ if (aecm == NULL)
+ {
+ return -1;
+ }
+
+ if (sampFreq != 8000 && sampFreq != 16000)
+ {
+ aecm->lastError = AECM_BAD_PARAMETER_ERROR;
+ return -1;
+ }
+ aecm->sampFreq = sampFreq;
+
+ // Initialize AECM core
+ if (WebRtcAecm_InitCore(aecm->aecmCore, aecm->sampFreq) == -1)
+ {
+ aecm->lastError = AECM_UNSPECIFIED_ERROR;
+ return -1;
+ }
+
+ // Initialize farend buffer
+ WebRtc_InitBuffer(aecm->farendBuf);
+
+ aecm->initFlag = kInitCheck; // indicates that initialization has been done
+
+ aecm->delayChange = 1;
+
+ aecm->sum = 0;
+ aecm->counter = 0;
+ aecm->checkBuffSize = 1;
+ aecm->firstVal = 0;
+
+ aecm->ECstartup = 1;
+ aecm->bufSizeStart = 0;
+ aecm->checkBufSizeCtr = 0;
+ aecm->filtDelay = 0;
+ aecm->timeForDelayChange = 0;
+ aecm->knownDelay = 0;
+ aecm->lastDelayDiff = 0;
+
+ memset(&aecm->farendOld[0][0], 0, 160);
+
+ // Default settings.
+ aecConfig.cngMode = AecmTrue;
+ aecConfig.echoMode = 3;
+
+ if (WebRtcAecm_set_config(aecm, aecConfig) == -1)
+ {
+ aecm->lastError = AECM_UNSPECIFIED_ERROR;
+ return -1;
+ }
+
+ return 0;
+}
+
+int32_t WebRtcAecm_BufferFarend(void *aecmInst, const int16_t *farend,
+ size_t nrOfSamples)
+{
+ AecMobile* aecm = aecmInst;
+ int32_t retVal = 0;
+
+ if (aecm == NULL)
+ {
+ return -1;
+ }
+
+ if (farend == NULL)
+ {
+ aecm->lastError = AECM_NULL_POINTER_ERROR;
+ return -1;
+ }
+
+ if (aecm->initFlag != kInitCheck)
+ {
+ aecm->lastError = AECM_UNINITIALIZED_ERROR;
+ return -1;
+ }
+
+ if (nrOfSamples != 80 && nrOfSamples != 160)
+ {
+ aecm->lastError = AECM_BAD_PARAMETER_ERROR;
+ return -1;
+ }
+
+ // TODO: Is this really a good idea?
+ if (!aecm->ECstartup)
+ {
+ WebRtcAecm_DelayComp(aecm);
+ }
+
+ WebRtc_WriteBuffer(aecm->farendBuf, farend, nrOfSamples);
+
+ return retVal;
+}
+
+int32_t WebRtcAecm_Process(void *aecmInst, const int16_t *nearendNoisy,
+ const int16_t *nearendClean, int16_t *out,
+ size_t nrOfSamples, int16_t msInSndCardBuf)
+{
+ AecMobile* aecm = aecmInst;
+ int32_t retVal = 0;
+ size_t i;
+ short nmbrOfFilledBuffers;
+ size_t nBlocks10ms;
+ size_t nFrames;
+#ifdef AEC_DEBUG
+ short msInAECBuf;
+#endif
+
+ if (aecm == NULL)
+ {
+ return -1;
+ }
+
+ if (nearendNoisy == NULL)
+ {
+ aecm->lastError = AECM_NULL_POINTER_ERROR;
+ return -1;
+ }
+
+ if (out == NULL)
+ {
+ aecm->lastError = AECM_NULL_POINTER_ERROR;
+ return -1;
+ }
+
+ if (aecm->initFlag != kInitCheck)
+ {
+ aecm->lastError = AECM_UNINITIALIZED_ERROR;
+ return -1;
+ }
+
+ if (nrOfSamples != 80 && nrOfSamples != 160)
+ {
+ aecm->lastError = AECM_BAD_PARAMETER_ERROR;
+ return -1;
+ }
+
+ if (msInSndCardBuf < 0)
+ {
+ msInSndCardBuf = 0;
+ aecm->lastError = AECM_BAD_PARAMETER_WARNING;
+ retVal = -1;
+ } else if (msInSndCardBuf > 500)
+ {
+ msInSndCardBuf = 500;
+ aecm->lastError = AECM_BAD_PARAMETER_WARNING;
+ retVal = -1;
+ }
+ msInSndCardBuf += 10;
+ aecm->msInSndCardBuf = msInSndCardBuf;
+
+ nFrames = nrOfSamples / FRAME_LEN;
+ nBlocks10ms = nFrames / aecm->aecmCore->mult;
+
+ if (aecm->ECstartup)
+ {
+ if (nearendClean == NULL)
+ {
+ if (out != nearendNoisy)
+ {
+ memcpy(out, nearendNoisy, sizeof(short) * nrOfSamples);
+ }
+ } else if (out != nearendClean)
+ {
+ memcpy(out, nearendClean, sizeof(short) * nrOfSamples);
+ }
+
+ nmbrOfFilledBuffers =
+ (short) WebRtc_available_read(aecm->farendBuf) / FRAME_LEN;
+ // The AECM is in the start up mode
+ // AECM is disabled until the soundcard buffer and farend buffers are OK
+
+ // Mechanism to ensure that the soundcard buffer is reasonably stable.
+ if (aecm->checkBuffSize)
+ {
+ aecm->checkBufSizeCtr++;
+ // Before we fill up the far end buffer we require the amount of data on the
+ // sound card to be stable (+/-8 ms) compared to the first value. This
+ // comparison is made during the following 4 consecutive frames. If it seems
+ // to be stable then we start to fill up the far end buffer.
+
+ if (aecm->counter == 0)
+ {
+ aecm->firstVal = aecm->msInSndCardBuf;
+ aecm->sum = 0;
+ }
+
+ if (abs(aecm->firstVal - aecm->msInSndCardBuf)
+ < WEBRTC_SPL_MAX(0.2 * aecm->msInSndCardBuf, kSampMsNb))
+ {
+ aecm->sum += aecm->msInSndCardBuf;
+ aecm->counter++;
+ } else
+ {
+ aecm->counter = 0;
+ }
+
+ if (aecm->counter * nBlocks10ms >= 6)
+ {
+ // The farend buffer size is determined in blocks of 80 samples
+ // Use 75% of the average value of the soundcard buffer
+ aecm->bufSizeStart
+ = WEBRTC_SPL_MIN((3 * aecm->sum
+ * aecm->aecmCore->mult) / (aecm->counter * 40), BUF_SIZE_FRAMES);
+ // buffersize has now been determined
+ aecm->checkBuffSize = 0;
+ }
+
+ if (aecm->checkBufSizeCtr * nBlocks10ms > 50)
+ {
+ // for really bad sound cards, don't disable echocanceller for more than 0.5 sec
+ aecm->bufSizeStart = WEBRTC_SPL_MIN((3 * aecm->msInSndCardBuf
+ * aecm->aecmCore->mult) / 40, BUF_SIZE_FRAMES);
+ aecm->checkBuffSize = 0;
+ }
+ }
+
+ // if checkBuffSize changed in the if-statement above
+ if (!aecm->checkBuffSize)
+ {
+ // soundcard buffer is now reasonably stable
+ // When the far end buffer is filled with approximately the same amount of
+ // data as the amount on the sound card we end the start up phase and start
+ // to cancel echoes.
+
+ if (nmbrOfFilledBuffers == aecm->bufSizeStart)
+ {
+ aecm->ECstartup = 0; // Enable the AECM
+ } else if (nmbrOfFilledBuffers > aecm->bufSizeStart)
+ {
+ WebRtc_MoveReadPtr(aecm->farendBuf,
+ (int) WebRtc_available_read(aecm->farendBuf)
+ - (int) aecm->bufSizeStart * FRAME_LEN);
+ aecm->ECstartup = 0;
+ }
+ }
+
+ } else
+ {
+ // AECM is enabled
+
+ // Note only 1 block supported for nb and 2 blocks for wb
+ for (i = 0; i < nFrames; i++)
+ {
+ int16_t farend[FRAME_LEN];
+ const int16_t* farend_ptr = NULL;
+
+ nmbrOfFilledBuffers =
+ (short) WebRtc_available_read(aecm->farendBuf) / FRAME_LEN;
+
+ // Check that there is data in the far end buffer
+ if (nmbrOfFilledBuffers > 0)
+ {
+ // Get the next 80 samples from the farend buffer
+ WebRtc_ReadBuffer(aecm->farendBuf, (void**) &farend_ptr, farend,
+ FRAME_LEN);
+
+ // Always store the last frame for use when we run out of data
+ memcpy(&(aecm->farendOld[i][0]), farend_ptr,
+ FRAME_LEN * sizeof(short));
+ } else
+ {
+ // We have no data so we use the last played frame
+ memcpy(farend, &(aecm->farendOld[i][0]), FRAME_LEN * sizeof(short));
+ farend_ptr = farend;
+ }
+
+ // Call buffer delay estimator when all data is extracted,
+ // i,e. i = 0 for NB and i = 1 for WB
+ if ((i == 0 && aecm->sampFreq == 8000) || (i == 1 && aecm->sampFreq == 16000))
+ {
+ WebRtcAecm_EstBufDelay(aecm, aecm->msInSndCardBuf);
+ }
+
+ // Call the AECM
+ /*WebRtcAecm_ProcessFrame(aecm->aecmCore, farend, &nearend[FRAME_LEN * i],
+ &out[FRAME_LEN * i], aecm->knownDelay);*/
+ if (WebRtcAecm_ProcessFrame(aecm->aecmCore,
+ farend_ptr,
+ &nearendNoisy[FRAME_LEN * i],
+ (nearendClean
+ ? &nearendClean[FRAME_LEN * i]
+ : NULL),
+ &out[FRAME_LEN * i]) == -1)
+ return -1;
+ }
+ }
+
+#ifdef AEC_DEBUG
+ msInAECBuf = (short) WebRtc_available_read(aecm->farendBuf) /
+ (kSampMsNb * aecm->aecmCore->mult);
+ fwrite(&msInAECBuf, 2, 1, aecm->bufFile);
+ fwrite(&(aecm->knownDelay), sizeof(aecm->knownDelay), 1, aecm->delayFile);
+#endif
+
+ return retVal;
+}
+
+int32_t WebRtcAecm_set_config(void *aecmInst, AecmConfig config)
+{
+ AecMobile* aecm = aecmInst;
+
+ if (aecm == NULL)
+ {
+ return -1;
+ }
+
+ if (aecm->initFlag != kInitCheck)
+ {
+ aecm->lastError = AECM_UNINITIALIZED_ERROR;
+ return -1;
+ }
+
+ if (config.cngMode != AecmFalse && config.cngMode != AecmTrue)
+ {
+ aecm->lastError = AECM_BAD_PARAMETER_ERROR;
+ return -1;
+ }
+ aecm->aecmCore->cngMode = config.cngMode;
+
+ if (config.echoMode < 0 || config.echoMode > 4)
+ {
+ aecm->lastError = AECM_BAD_PARAMETER_ERROR;
+ return -1;
+ }
+ aecm->echoMode = config.echoMode;
+
+ if (aecm->echoMode == 0)
+ {
+ aecm->aecmCore->supGain = SUPGAIN_DEFAULT >> 3;
+ aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT >> 3;
+ aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A >> 3;
+ aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D >> 3;
+ aecm->aecmCore->supGainErrParamDiffAB = (SUPGAIN_ERROR_PARAM_A >> 3)
+ - (SUPGAIN_ERROR_PARAM_B >> 3);
+ aecm->aecmCore->supGainErrParamDiffBD = (SUPGAIN_ERROR_PARAM_B >> 3)
+ - (SUPGAIN_ERROR_PARAM_D >> 3);
+ } else if (aecm->echoMode == 1)
+ {
+ aecm->aecmCore->supGain = SUPGAIN_DEFAULT >> 2;
+ aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT >> 2;
+ aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A >> 2;
+ aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D >> 2;
+ aecm->aecmCore->supGainErrParamDiffAB = (SUPGAIN_ERROR_PARAM_A >> 2)
+ - (SUPGAIN_ERROR_PARAM_B >> 2);
+ aecm->aecmCore->supGainErrParamDiffBD = (SUPGAIN_ERROR_PARAM_B >> 2)
+ - (SUPGAIN_ERROR_PARAM_D >> 2);
+ } else if (aecm->echoMode == 2)
+ {
+ aecm->aecmCore->supGain = SUPGAIN_DEFAULT >> 1;
+ aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT >> 1;
+ aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A >> 1;
+ aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D >> 1;
+ aecm->aecmCore->supGainErrParamDiffAB = (SUPGAIN_ERROR_PARAM_A >> 1)
+ - (SUPGAIN_ERROR_PARAM_B >> 1);
+ aecm->aecmCore->supGainErrParamDiffBD = (SUPGAIN_ERROR_PARAM_B >> 1)
+ - (SUPGAIN_ERROR_PARAM_D >> 1);
+ } else if (aecm->echoMode == 3)
+ {
+ aecm->aecmCore->supGain = SUPGAIN_DEFAULT;
+ aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT;
+ aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A;
+ aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D;
+ aecm->aecmCore->supGainErrParamDiffAB = SUPGAIN_ERROR_PARAM_A - SUPGAIN_ERROR_PARAM_B;
+ aecm->aecmCore->supGainErrParamDiffBD = SUPGAIN_ERROR_PARAM_B - SUPGAIN_ERROR_PARAM_D;
+ } else if (aecm->echoMode == 4)
+ {
+ aecm->aecmCore->supGain = SUPGAIN_DEFAULT << 1;
+ aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT << 1;
+ aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A << 1;
+ aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D << 1;
+ aecm->aecmCore->supGainErrParamDiffAB = (SUPGAIN_ERROR_PARAM_A << 1)
+ - (SUPGAIN_ERROR_PARAM_B << 1);
+ aecm->aecmCore->supGainErrParamDiffBD = (SUPGAIN_ERROR_PARAM_B << 1)
+ - (SUPGAIN_ERROR_PARAM_D << 1);
+ }
+
+ return 0;
+}
+
+int32_t WebRtcAecm_get_config(void *aecmInst, AecmConfig *config)
+{
+ AecMobile* aecm = aecmInst;
+
+ if (aecm == NULL)
+ {
+ return -1;
+ }
+
+ if (config == NULL)
+ {
+ aecm->lastError = AECM_NULL_POINTER_ERROR;
+ return -1;
+ }
+
+ if (aecm->initFlag != kInitCheck)
+ {
+ aecm->lastError = AECM_UNINITIALIZED_ERROR;
+ return -1;
+ }
+
+ config->cngMode = aecm->aecmCore->cngMode;
+ config->echoMode = aecm->echoMode;
+
+ return 0;
+}
+
+int32_t WebRtcAecm_InitEchoPath(void* aecmInst,
+ const void* echo_path,
+ size_t size_bytes)
+{
+ AecMobile* aecm = aecmInst;
+ const int16_t* echo_path_ptr = echo_path;
+
+ if (aecmInst == NULL) {
+ return -1;
+ }
+ if (echo_path == NULL) {
+ aecm->lastError = AECM_NULL_POINTER_ERROR;
+ return -1;
+ }
+ if (size_bytes != WebRtcAecm_echo_path_size_bytes())
+ {
+ // Input channel size does not match the size of AECM
+ aecm->lastError = AECM_BAD_PARAMETER_ERROR;
+ return -1;
+ }
+ if (aecm->initFlag != kInitCheck)
+ {
+ aecm->lastError = AECM_UNINITIALIZED_ERROR;
+ return -1;
+ }
+
+ WebRtcAecm_InitEchoPathCore(aecm->aecmCore, echo_path_ptr);
+
+ return 0;
+}
+
+int32_t WebRtcAecm_GetEchoPath(void* aecmInst,
+ void* echo_path,
+ size_t size_bytes)
+{
+ AecMobile* aecm = aecmInst;
+ int16_t* echo_path_ptr = echo_path;
+
+ if (aecmInst == NULL) {
+ return -1;
+ }
+ if (echo_path == NULL) {
+ aecm->lastError = AECM_NULL_POINTER_ERROR;
+ return -1;
+ }
+ if (size_bytes != WebRtcAecm_echo_path_size_bytes())
+ {
+ // Input channel size does not match the size of AECM
+ aecm->lastError = AECM_BAD_PARAMETER_ERROR;
+ return -1;
+ }
+ if (aecm->initFlag != kInitCheck)
+ {
+ aecm->lastError = AECM_UNINITIALIZED_ERROR;
+ return -1;
+ }
+
+ memcpy(echo_path_ptr, aecm->aecmCore->channelStored, size_bytes);
+ return 0;
+}
+
+size_t WebRtcAecm_echo_path_size_bytes()
+{
+ return (PART_LEN1 * sizeof(int16_t));
+}
+
+int32_t WebRtcAecm_get_error_code(void *aecmInst)
+{
+ AecMobile* aecm = aecmInst;
+
+ if (aecm == NULL)
+ {
+ return -1;
+ }
+
+ return aecm->lastError;
+}
+
+static int WebRtcAecm_EstBufDelay(AecMobile* aecm, short msInSndCardBuf) {
+ short delayNew, nSampSndCard;
+ short nSampFar = (short) WebRtc_available_read(aecm->farendBuf);
+ short diff;
+
+ nSampSndCard = msInSndCardBuf * kSampMsNb * aecm->aecmCore->mult;
+
+ delayNew = nSampSndCard - nSampFar;
+
+ if (delayNew < FRAME_LEN)
+ {
+ WebRtc_MoveReadPtr(aecm->farendBuf, FRAME_LEN);
+ delayNew += FRAME_LEN;
+ }
+
+ aecm->filtDelay = WEBRTC_SPL_MAX(0, (8 * aecm->filtDelay + 2 * delayNew) / 10);
+
+ diff = aecm->filtDelay - aecm->knownDelay;
+ if (diff > 224)
+ {
+ if (aecm->lastDelayDiff < 96)
+ {
+ aecm->timeForDelayChange = 0;
+ } else
+ {
+ aecm->timeForDelayChange++;
+ }
+ } else if (diff < 96 && aecm->knownDelay > 0)
+ {
+ if (aecm->lastDelayDiff > 224)
+ {
+ aecm->timeForDelayChange = 0;
+ } else
+ {
+ aecm->timeForDelayChange++;
+ }
+ } else
+ {
+ aecm->timeForDelayChange = 0;
+ }
+ aecm->lastDelayDiff = diff;
+
+ if (aecm->timeForDelayChange > 25)
+ {
+ aecm->knownDelay = WEBRTC_SPL_MAX((int)aecm->filtDelay - 160, 0);
+ }
+ return 0;
+}
+
+static int WebRtcAecm_DelayComp(AecMobile* aecm) {
+ int nSampFar = (int) WebRtc_available_read(aecm->farendBuf);
+ int nSampSndCard, delayNew, nSampAdd;
+ const int maxStuffSamp = 10 * FRAME_LEN;
+
+ nSampSndCard = aecm->msInSndCardBuf * kSampMsNb * aecm->aecmCore->mult;
+ delayNew = nSampSndCard - nSampFar;
+
+ if (delayNew > FAR_BUF_LEN - FRAME_LEN * aecm->aecmCore->mult)
+ {
+ // The difference of the buffer sizes is larger than the maximum
+ // allowed known delay. Compensate by stuffing the buffer.
+ nSampAdd = (int)(WEBRTC_SPL_MAX(((nSampSndCard >> 1) - nSampFar),
+ FRAME_LEN));
+ nSampAdd = WEBRTC_SPL_MIN(nSampAdd, maxStuffSamp);
+
+ WebRtc_MoveReadPtr(aecm->farendBuf, -nSampAdd);
+ aecm->delayChange = 1; // the delay needs to be updated
+ }
+
+ return 0;
+}
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h
new file mode 100644
index 00000000..7ae15c2a
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h
@@ -0,0 +1,218 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AECM_INCLUDE_ECHO_CONTROL_MOBILE_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AECM_INCLUDE_ECHO_CONTROL_MOBILE_H_
+
+#include <stdlib.h>
+
+#include "webrtc/typedefs.h"
+
+enum {
+ AecmFalse = 0,
+ AecmTrue
+};
+
+// Errors
+#define AECM_UNSPECIFIED_ERROR 12000
+#define AECM_UNSUPPORTED_FUNCTION_ERROR 12001
+#define AECM_UNINITIALIZED_ERROR 12002
+#define AECM_NULL_POINTER_ERROR 12003
+#define AECM_BAD_PARAMETER_ERROR 12004
+
+// Warnings
+#define AECM_BAD_PARAMETER_WARNING 12100
+
+typedef struct {
+ int16_t cngMode; // AECM_FALSE, AECM_TRUE (default)
+ int16_t echoMode; // 0, 1, 2, 3 (default), 4
+} AecmConfig;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Allocates the memory needed by the AECM. The memory needs to be
+ * initialized separately using the WebRtcAecm_Init() function.
+ * Returns a pointer to the instance and a nullptr at failure.
+ */
+void* WebRtcAecm_Create();
+
+/*
+ * This function releases the memory allocated by WebRtcAecm_Create()
+ *
+ * Inputs Description
+ * -------------------------------------------------------------------
+ * void* aecmInst Pointer to the AECM instance
+ */
+void WebRtcAecm_Free(void* aecmInst);
+
+/*
+ * Initializes an AECM instance.
+ *
+ * Inputs Description
+ * -------------------------------------------------------------------
+ * void* aecmInst Pointer to the AECM instance
+ * int32_t sampFreq Sampling frequency of data
+ *
+ * Outputs Description
+ * -------------------------------------------------------------------
+ * int32_t return 0: OK
+ * -1: error
+ */
+int32_t WebRtcAecm_Init(void* aecmInst, int32_t sampFreq);
+
+/*
+ * Inserts an 80 or 160 sample block of data into the farend buffer.
+ *
+ * Inputs Description
+ * -------------------------------------------------------------------
+ * void* aecmInst Pointer to the AECM instance
+ * int16_t* farend In buffer containing one frame of
+ * farend signal
+ * int16_t nrOfSamples Number of samples in farend buffer
+ *
+ * Outputs Description
+ * -------------------------------------------------------------------
+ * int32_t return 0: OK
+ * -1: error
+ */
+int32_t WebRtcAecm_BufferFarend(void* aecmInst,
+ const int16_t* farend,
+ size_t nrOfSamples);
+
+/*
+ * Runs the AECM on an 80 or 160 sample blocks of data.
+ *
+ * Inputs Description
+ * -------------------------------------------------------------------
+ * void* aecmInst Pointer to the AECM instance
+ * int16_t* nearendNoisy In buffer containing one frame of
+ * reference nearend+echo signal. If
+ * noise reduction is active, provide
+ * the noisy signal here.
+ * int16_t* nearendClean In buffer containing one frame of
+ * nearend+echo signal. If noise
+ * reduction is active, provide the
+ * clean signal here. Otherwise pass a
+ * NULL pointer.
+ * int16_t nrOfSamples Number of samples in nearend buffer
+ * int16_t msInSndCardBuf Delay estimate for sound card and
+ * system buffers
+ *
+ * Outputs Description
+ * -------------------------------------------------------------------
+ * int16_t* out Out buffer, one frame of processed nearend
+ * int32_t return 0: OK
+ * -1: error
+ */
+int32_t WebRtcAecm_Process(void* aecmInst,
+ const int16_t* nearendNoisy,
+ const int16_t* nearendClean,
+ int16_t* out,
+ size_t nrOfSamples,
+ int16_t msInSndCardBuf);
+
+/*
+ * This function enables the user to set certain parameters on-the-fly
+ *
+ * Inputs Description
+ * -------------------------------------------------------------------
+ * void* aecmInst Pointer to the AECM instance
+ * AecmConfig config Config instance that contains all
+ * properties to be set
+ *
+ * Outputs Description
+ * -------------------------------------------------------------------
+ * int32_t return 0: OK
+ * -1: error
+ */
+int32_t WebRtcAecm_set_config(void* aecmInst, AecmConfig config);
+
+/*
+ * This function enables the user to set certain parameters on-the-fly
+ *
+ * Inputs Description
+ * -------------------------------------------------------------------
+ * void* aecmInst Pointer to the AECM instance
+ *
+ * Outputs Description
+ * -------------------------------------------------------------------
+ * AecmConfig* config Pointer to the config instance that
+ * all properties will be written to
+ * int32_t return 0: OK
+ * -1: error
+ */
+int32_t WebRtcAecm_get_config(void *aecmInst, AecmConfig *config);
+
+/*
+ * This function enables the user to set the echo path on-the-fly.
+ *
+ * Inputs Description
+ * -------------------------------------------------------------------
+ * void* aecmInst Pointer to the AECM instance
+ * void* echo_path Pointer to the echo path to be set
+ * size_t size_bytes Size in bytes of the echo path
+ *
+ * Outputs Description
+ * -------------------------------------------------------------------
+ * int32_t return 0: OK
+ * -1: error
+ */
+int32_t WebRtcAecm_InitEchoPath(void* aecmInst,
+ const void* echo_path,
+ size_t size_bytes);
+
+/*
+ * This function enables the user to get the currently used echo path
+ * on-the-fly
+ *
+ * Inputs Description
+ * -------------------------------------------------------------------
+ * void* aecmInst Pointer to the AECM instance
+ * void* echo_path Pointer to echo path
+ * size_t size_bytes Size in bytes of the echo path
+ *
+ * Outputs Description
+ * -------------------------------------------------------------------
+ * int32_t return 0: OK
+ * -1: error
+ */
+int32_t WebRtcAecm_GetEchoPath(void* aecmInst,
+ void* echo_path,
+ size_t size_bytes);
+
+/*
+ * This function enables the user to get the echo path size in bytes
+ *
+ * Outputs Description
+ * -------------------------------------------------------------------
+ * size_t return Size in bytes
+ */
+size_t WebRtcAecm_echo_path_size_bytes();
+
+/*
+ * Gets the last error code.
+ *
+ * Inputs Description
+ * -------------------------------------------------------------------
+ * void* aecmInst Pointer to the AECM instance
+ *
+ * Outputs Description
+ * -------------------------------------------------------------------
+ * int32_t return 11000-11100: error code
+ */
+int32_t WebRtcAecm_get_error_code(void *aecmInst);
+
+#ifdef __cplusplus
+}
+#endif
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AECM_INCLUDE_ECHO_CONTROL_MOBILE_H_
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging.h b/third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging.h
new file mode 100644
index 00000000..3cf9ff89
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_
+
+#include <stdio.h>
+
+#include "webrtc/modules/audio_processing/logging/aec_logging_file_handling.h"
+
+// To enable AEC logging, invoke GYP with -Daec_debug_dump=1.
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+// Dumps a wav data to file.
+#define RTC_AEC_DEBUG_WAV_WRITE(file, data, num_samples) \
+ do { \
+ rtc_WavWriteSamples(file, data, num_samples); \
+ } while (0)
+
+// (Re)opens a wav file for writing using the specified sample rate.
+#define RTC_AEC_DEBUG_WAV_REOPEN(name, instance_index, process_rate, \
+ sample_rate, wav_file) \
+ do { \
+ WebRtcAec_ReopenWav(name, instance_index, process_rate, sample_rate, \
+ wav_file); \
+ } while (0)
+
+// Closes a wav file.
+#define RTC_AEC_DEBUG_WAV_CLOSE(wav_file) \
+ do { \
+ rtc_WavClose(wav_file); \
+ } while (0)
+
+// Dumps a raw data to file.
+#define RTC_AEC_DEBUG_RAW_WRITE(file, data, data_size) \
+ do { \
+ (void) fwrite(data, data_size, 1, file); \
+ } while (0)
+
+// Opens a raw data file for writing using the specified sample rate.
+#define RTC_AEC_DEBUG_RAW_OPEN(name, instance_counter, file) \
+ do { \
+ WebRtcAec_RawFileOpen(name, instance_counter, file); \
+ } while (0)
+
+// Closes a raw data file.
+#define RTC_AEC_DEBUG_RAW_CLOSE(file) \
+ do { \
+ fclose(file); \
+ } while (0)
+
+#else // RTC_AEC_DEBUG_DUMP
+#define RTC_AEC_DEBUG_WAV_WRITE(file, data, num_samples) \
+ do { \
+ } while (0)
+
+#define RTC_AEC_DEBUG_WAV_REOPEN(wav_file, name, instance_index, process_rate, \
+ sample_rate) \
+ do { \
+ } while (0)
+
+#define RTC_AEC_DEBUG_WAV_CLOSE(wav_file) \
+ do { \
+ } while (0)
+
+#define RTC_AEC_DEBUG_RAW_WRITE(file, data, data_size) \
+ do { \
+ } while (0)
+
+#define RTC_AEC_DEBUG_RAW_OPEN(file, name, instance_counter) \
+ do { \
+ } while (0)
+
+#define RTC_AEC_DEBUG_RAW_CLOSE(file) \
+ do { \
+ } while (0)
+
+#endif // WEBRTC_AEC_DEBUG_DUMP
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging_file_handling.cc b/third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging_file_handling.cc
new file mode 100644
index 00000000..3a434714
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging_file_handling.cc
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/logging/aec_logging_file_handling.h"
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include "webrtc/base/checks.h"
+#include "webrtc/base/stringutils.h"
+#include "webrtc/common_audio/wav_file.h"
+#include "webrtc/typedefs.h"
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+void WebRtcAec_ReopenWav(const char* name,
+ int instance_index,
+ int process_rate,
+ int sample_rate,
+ rtc_WavWriter** wav_file) {
+ if (*wav_file) {
+ if (rtc_WavSampleRate(*wav_file) == sample_rate)
+ return;
+ rtc_WavClose(*wav_file);
+ }
+ char filename[64];
+ int written = rtc::sprintfn(filename, sizeof(filename), "%s%d-%d.wav", name,
+ instance_index, process_rate);
+
+ // Ensure there was no buffer output error.
+ RTC_DCHECK_GE(written, 0);
+ // Ensure that the buffer size was sufficient.
+ RTC_DCHECK_LT(static_cast<size_t>(written), sizeof(filename));
+
+ *wav_file = rtc_WavOpen(filename, sample_rate, 1);
+}
+
+void WebRtcAec_RawFileOpen(const char* name, int instance_index, FILE** file) {
+ char filename[64];
+ int written = rtc::sprintfn(filename, sizeof(filename), "%s_%d.dat", name,
+ instance_index);
+
+ // Ensure there was no buffer output error.
+ RTC_DCHECK_GE(written, 0);
+ // Ensure that the buffer size was sufficient.
+ RTC_DCHECK_LT(static_cast<size_t>(written), sizeof(filename));
+
+ *file = fopen(filename, "wb");
+}
+
+#endif // WEBRTC_AEC_DEBUG_DUMP
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging_file_handling.h b/third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging_file_handling.h
new file mode 100644
index 00000000..5ec83948
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging_file_handling.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_FILE_HANDLING_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_FILE_HANDLING_
+
+#include <stdio.h>
+
+#include "webrtc/common_audio/wav_file.h"
+#include "webrtc/typedefs.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+// Opens a new Wav file for writing. If it was already open with a different
+// sample frequency, it closes it first.
+void WebRtcAec_ReopenWav(const char* name,
+ int instance_index,
+ int process_rate,
+ int sample_rate,
+ rtc_WavWriter** wav_file);
+
+// Opens dumpfile with instance-specific filename.
+void WebRtcAec_RawFileOpen(const char* name, int instance_index, FILE** file);
+
+#endif // WEBRTC_AEC_DEBUG_DUMP
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_FILE_HANDLING_
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/defines.h b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/defines.h
new file mode 100644
index 00000000..8271332c
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/defines.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_DEFINES_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_DEFINES_H_
+
+#define BLOCKL_MAX 160 // max processing block length: 160
+#define ANAL_BLOCKL_MAX 256 // max analysis block length: 256
+#define HALF_ANAL_BLOCKL 129 // half max analysis block length + 1
+#define NUM_HIGH_BANDS_MAX 2 // max number of high bands: 2
+
+#define QUANTILE (float)0.25
+
+#define SIMULT 3
+#define END_STARTUP_LONG 200
+#define END_STARTUP_SHORT 50
+#define FACTOR (float)40.0
+#define WIDTH (float)0.01
+
+// Length of fft work arrays.
+#define IP_LENGTH (ANAL_BLOCKL_MAX >> 1) // must be at least ceil(2 + sqrt(ANAL_BLOCKL_MAX/2))
+#define W_LENGTH (ANAL_BLOCKL_MAX >> 1)
+
+//PARAMETERS FOR NEW METHOD
+#define DD_PR_SNR (float)0.98 // DD update of prior SNR
+#define LRT_TAVG (float)0.50 // tavg parameter for LRT (previously 0.90)
+#define SPECT_FL_TAVG (float)0.30 // tavg parameter for spectral flatness measure
+#define SPECT_DIFF_TAVG (float)0.30 // tavg parameter for spectral difference measure
+#define PRIOR_UPDATE (float)0.10 // update parameter of prior model
+#define NOISE_UPDATE (float)0.90 // update parameter for noise
+#define SPEECH_UPDATE (float)0.99 // update parameter when likely speech
+#define WIDTH_PR_MAP (float)4.0 // width parameter in sigmoid map for prior model
+#define LRT_FEATURE_THR (float)0.5 // default threshold for LRT feature
+#define SF_FEATURE_THR (float)0.5 // default threshold for Spectral Flatness feature
+#define SD_FEATURE_THR (float)0.5 // default threshold for Spectral Difference feature
+#define PROB_RANGE (float)0.20 // probability threshold for noise state in
+ // speech/noise likelihood
+#define HIST_PAR_EST 1000 // histogram size for estimation of parameters
+#define GAMMA_PAUSE (float)0.05 // update for conservative noise estimate
+//
+#define B_LIM (float)0.5 // threshold in final energy gain factor calculation
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_DEFINES_H_
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/include/noise_suppression.h b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/include/noise_suppression.h
new file mode 100644
index 00000000..9dac56bd
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/include/noise_suppression.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_H_
+
+#include <stddef.h>
+
+#include "webrtc/typedefs.h"
+
+typedef struct NsHandleT NsHandle;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * This function creates an instance of the floating point Noise Suppression.
+ */
+NsHandle* WebRtcNs_Create();
+
+/*
+ * This function frees the dynamic memory of a specified noise suppression
+ * instance.
+ *
+ * Input:
+ * - NS_inst : Pointer to NS instance that should be freed
+ */
+void WebRtcNs_Free(NsHandle* NS_inst);
+
+/*
+ * This function initializes a NS instance and has to be called before any other
+ * processing is made.
+ *
+ * Input:
+ * - NS_inst : Instance that should be initialized
+ * - fs : sampling frequency
+ *
+ * Output:
+ * - NS_inst : Initialized instance
+ *
+ * Return value : 0 - Ok
+ * -1 - Error
+ */
+int WebRtcNs_Init(NsHandle* NS_inst, uint32_t fs);
+
+/*
+ * This changes the aggressiveness of the noise suppression method.
+ *
+ * Input:
+ * - NS_inst : Noise suppression instance.
+ * - mode : 0: Mild, 1: Medium , 2: Aggressive
+ *
+ * Output:
+ * - NS_inst : Updated instance.
+ *
+ * Return value : 0 - Ok
+ * -1 - Error
+ */
+int WebRtcNs_set_policy(NsHandle* NS_inst, int mode);
+
+/*
+ * This functions estimates the background noise for the inserted speech frame.
+ * The input and output signals should always be 10ms (80 or 160 samples).
+ *
+ * Input
+ * - NS_inst : Noise suppression instance.
+ * - spframe : Pointer to speech frame buffer for L band
+ *
+ * Output:
+ * - NS_inst : Updated NS instance
+ */
+void WebRtcNs_Analyze(NsHandle* NS_inst, const float* spframe);
+
+/*
+ * This functions does Noise Suppression for the inserted speech frame. The
+ * input and output signals should always be 10ms (80 or 160 samples).
+ *
+ * Input
+ * - NS_inst : Noise suppression instance.
+ * - spframe : Pointer to speech frame buffer for each band
+ * - num_bands : Number of bands
+ *
+ * Output:
+ * - NS_inst : Updated NS instance
+ * - outframe : Pointer to output frame for each band
+ */
+void WebRtcNs_Process(NsHandle* NS_inst,
+ const float* const* spframe,
+ size_t num_bands,
+ float* const* outframe);
+
+/* Returns the internally used prior speech probability of the current frame.
+ * There is a frequency bin based one as well, with which this should not be
+ * confused.
+ *
+ * Input
+ * - handle : Noise suppression instance.
+ *
+ * Return value : Prior speech probability in interval [0.0, 1.0].
+ * -1 - NULL pointer or uninitialized instance.
+ */
+float WebRtcNs_prior_speech_probability(NsHandle* handle);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_H_
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/include/noise_suppression_x.h b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/include/noise_suppression_x.h
new file mode 100644
index 00000000..88fe4cd6
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/include/noise_suppression_x.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_X_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_X_H_
+
+#include "webrtc/typedefs.h"
+
+typedef struct NsxHandleT NsxHandle;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * This function creates an instance of the fixed point Noise Suppression.
+ */
+NsxHandle* WebRtcNsx_Create();
+
+/*
+ * This function frees the dynamic memory of a specified Noise Suppression
+ * instance.
+ *
+ * Input:
+ * - nsxInst : Pointer to NS instance that should be freed
+ */
+void WebRtcNsx_Free(NsxHandle* nsxInst);
+
+/*
+ * This function initializes a NS instance
+ *
+ * Input:
+ * - nsxInst : Instance that should be initialized
+ * - fs : sampling frequency
+ *
+ * Output:
+ * - nsxInst : Initialized instance
+ *
+ * Return value : 0 - Ok
+ * -1 - Error
+ */
+int WebRtcNsx_Init(NsxHandle* nsxInst, uint32_t fs);
+
+/*
+ * This changes the aggressiveness of the noise suppression method.
+ *
+ * Input:
+ * - nsxInst : Instance that should be initialized
+ * - mode : 0: Mild, 1: Medium , 2: Aggressive
+ *
+ * Output:
+ * - nsxInst : Initialized instance
+ *
+ * Return value : 0 - Ok
+ * -1 - Error
+ */
+int WebRtcNsx_set_policy(NsxHandle* nsxInst, int mode);
+
+/*
+ * This functions does noise suppression for the inserted speech frame. The
+ * input and output signals should always be 10ms (80 or 160 samples).
+ *
+ * Input
+ * - nsxInst : NSx instance. Needs to be initiated before call.
+ * - speechFrame : Pointer to speech frame buffer for each band
+ * - num_bands : Number of bands
+ *
+ * Output:
+ * - nsxInst : Updated NSx instance
+ * - outFrame : Pointer to output frame for each band
+ */
+void WebRtcNsx_Process(NsxHandle* nsxInst,
+ const short* const* speechFrame,
+ int num_bands,
+ short* const* outFrame);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_X_H_
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/noise_suppression.c b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/noise_suppression.c
new file mode 100644
index 00000000..13f1b2d6
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/noise_suppression.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/ns/include/noise_suppression.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_processing/ns/defines.h"
+#include "webrtc/modules/audio_processing/ns/ns_core.h"
+
+NsHandle* WebRtcNs_Create() {
+ NoiseSuppressionC* self = malloc(sizeof(NoiseSuppressionC));
+ self->initFlag = 0;
+ return (NsHandle*)self;
+}
+
+void WebRtcNs_Free(NsHandle* NS_inst) {
+ free(NS_inst);
+}
+
+int WebRtcNs_Init(NsHandle* NS_inst, uint32_t fs) {
+ return WebRtcNs_InitCore((NoiseSuppressionC*)NS_inst, fs);
+}
+
+int WebRtcNs_set_policy(NsHandle* NS_inst, int mode) {
+ return WebRtcNs_set_policy_core((NoiseSuppressionC*)NS_inst, mode);
+}
+
+void WebRtcNs_Analyze(NsHandle* NS_inst, const float* spframe) {
+ WebRtcNs_AnalyzeCore((NoiseSuppressionC*)NS_inst, spframe);
+}
+
+void WebRtcNs_Process(NsHandle* NS_inst,
+ const float* const* spframe,
+ size_t num_bands,
+ float* const* outframe) {
+ WebRtcNs_ProcessCore((NoiseSuppressionC*)NS_inst, spframe, num_bands,
+ outframe);
+}
+
+float WebRtcNs_prior_speech_probability(NsHandle* handle) {
+ NoiseSuppressionC* self = (NoiseSuppressionC*)handle;
+ if (handle == NULL) {
+ return -1;
+ }
+ if (self->initFlag == 0) {
+ return -1;
+ }
+ return self->priorSpeechProb;
+}
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/noise_suppression_x.c b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/noise_suppression_x.c
new file mode 100644
index 00000000..150fe608
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/noise_suppression_x.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/ns/include/noise_suppression_x.h"
+
+#include <stdlib.h>
+
+#include "webrtc/common_audio/signal_processing/include/real_fft.h"
+#include "webrtc/modules/audio_processing/ns/nsx_core.h"
+#include "webrtc/modules/audio_processing/ns/nsx_defines.h"
+
+NsxHandle* WebRtcNsx_Create() {
+ NoiseSuppressionFixedC* self = malloc(sizeof(NoiseSuppressionFixedC));
+ WebRtcSpl_Init();
+ self->real_fft = NULL;
+ self->initFlag = 0;
+ return (NsxHandle*)self;
+}
+
+void WebRtcNsx_Free(NsxHandle* nsxInst) {
+ WebRtcSpl_FreeRealFFT(((NoiseSuppressionFixedC*)nsxInst)->real_fft);
+ free(nsxInst);
+}
+
+int WebRtcNsx_Init(NsxHandle* nsxInst, uint32_t fs) {
+ return WebRtcNsx_InitCore((NoiseSuppressionFixedC*)nsxInst, fs);
+}
+
+int WebRtcNsx_set_policy(NsxHandle* nsxInst, int mode) {
+ return WebRtcNsx_set_policy_core((NoiseSuppressionFixedC*)nsxInst, mode);
+}
+
+void WebRtcNsx_Process(NsxHandle* nsxInst,
+ const short* const* speechFrame,
+ int num_bands,
+ short* const* outFrame) {
+ WebRtcNsx_ProcessCore((NoiseSuppressionFixedC*)nsxInst, speechFrame,
+ num_bands, outFrame);
+}
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/ns_core.c b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/ns_core.c
new file mode 100644
index 00000000..1d609140
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/ns_core.c
@@ -0,0 +1,1416 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <math.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "webrtc/common_audio/fft4g.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_processing/ns/include/noise_suppression.h"
+#include "webrtc/modules/audio_processing/ns/ns_core.h"
+#include "webrtc/modules/audio_processing/ns/windows_private.h"
+
+// Set Feature Extraction Parameters.
+static void set_feature_extraction_parameters(NoiseSuppressionC* self) {
+ // Bin size of histogram.
+ self->featureExtractionParams.binSizeLrt = 0.1f;
+ self->featureExtractionParams.binSizeSpecFlat = 0.05f;
+ self->featureExtractionParams.binSizeSpecDiff = 0.1f;
+
+ // Range of histogram over which LRT threshold is computed.
+ self->featureExtractionParams.rangeAvgHistLrt = 1.f;
+
+ // Scale parameters: multiply dominant peaks of the histograms by scale factor
+ // to obtain thresholds for prior model.
+ // For LRT and spectral difference.
+ self->featureExtractionParams.factor1ModelPars = 1.2f;
+ // For spectral_flatness: used when noise is flatter than speech.
+ self->featureExtractionParams.factor2ModelPars = 0.9f;
+
+ // Peak limit for spectral flatness (varies between 0 and 1).
+ self->featureExtractionParams.thresPosSpecFlat = 0.6f;
+
+ // Limit on spacing of two highest peaks in histogram: spacing determined by
+ // bin size.
+ self->featureExtractionParams.limitPeakSpacingSpecFlat =
+ 2 * self->featureExtractionParams.binSizeSpecFlat;
+ self->featureExtractionParams.limitPeakSpacingSpecDiff =
+ 2 * self->featureExtractionParams.binSizeSpecDiff;
+
+ // Limit on relevance of second peak.
+ self->featureExtractionParams.limitPeakWeightsSpecFlat = 0.5f;
+ self->featureExtractionParams.limitPeakWeightsSpecDiff = 0.5f;
+
+ // Fluctuation limit of LRT feature.
+ self->featureExtractionParams.thresFluctLrt = 0.05f;
+
+ // Limit on the max and min values for the feature thresholds.
+ self->featureExtractionParams.maxLrt = 1.f;
+ self->featureExtractionParams.minLrt = 0.2f;
+
+ self->featureExtractionParams.maxSpecFlat = 0.95f;
+ self->featureExtractionParams.minSpecFlat = 0.1f;
+
+ self->featureExtractionParams.maxSpecDiff = 1.f;
+ self->featureExtractionParams.minSpecDiff = 0.16f;
+
+ // Criteria of weight of histogram peak to accept/reject feature.
+ self->featureExtractionParams.thresWeightSpecFlat =
+ (int)(0.3 * (self->modelUpdatePars[1])); // For spectral flatness.
+ self->featureExtractionParams.thresWeightSpecDiff =
+ (int)(0.3 * (self->modelUpdatePars[1])); // For spectral difference.
+}
+
+// Initialize state.
+int WebRtcNs_InitCore(NoiseSuppressionC* self, uint32_t fs) {
+ int i;
+ // Check for valid pointer.
+ if (self == NULL) {
+ return -1;
+ }
+
+ // Initialization of struct.
+ if (fs == 8000 || fs == 16000 || fs == 32000 || fs == 48000) {
+ self->fs = fs;
+ } else {
+ return -1;
+ }
+ self->windShift = 0;
+ // We only support 10ms frames.
+ if (fs == 8000) {
+ self->blockLen = 80;
+ self->anaLen = 128;
+ self->window = kBlocks80w128;
+ } else {
+ self->blockLen = 160;
+ self->anaLen = 256;
+ self->window = kBlocks160w256;
+ }
+ self->magnLen = self->anaLen / 2 + 1; // Number of frequency bins.
+
+ // Initialize FFT work arrays.
+ self->ip[0] = 0; // Setting this triggers initialization.
+ memset(self->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX);
+ WebRtc_rdft(self->anaLen, 1, self->dataBuf, self->ip, self->wfft);
+
+ memset(self->analyzeBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX);
+ memset(self->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX);
+ memset(self->syntBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX);
+
+ // For HB processing.
+ memset(self->dataBufHB,
+ 0,
+ sizeof(float) * NUM_HIGH_BANDS_MAX * ANAL_BLOCKL_MAX);
+
+ // For quantile noise estimation.
+ memset(self->quantile, 0, sizeof(float) * HALF_ANAL_BLOCKL);
+ for (i = 0; i < SIMULT * HALF_ANAL_BLOCKL; i++) {
+ self->lquantile[i] = 8.f;
+ self->density[i] = 0.3f;
+ }
+
+ for (i = 0; i < SIMULT; i++) {
+ self->counter[i] =
+ (int)floor((float)(END_STARTUP_LONG * (i + 1)) / (float)SIMULT);
+ }
+
+ self->updates = 0;
+
+ // Wiener filter initialization.
+ for (i = 0; i < HALF_ANAL_BLOCKL; i++) {
+ self->smooth[i] = 1.f;
+ }
+
+ // Set the aggressiveness: default.
+ self->aggrMode = 0;
+
+ // Initialize variables for new method.
+ self->priorSpeechProb = 0.5f; // Prior prob for speech/noise.
+ // Previous analyze mag spectrum.
+ memset(self->magnPrevAnalyze, 0, sizeof(float) * HALF_ANAL_BLOCKL);
+ // Previous process mag spectrum.
+ memset(self->magnPrevProcess, 0, sizeof(float) * HALF_ANAL_BLOCKL);
+ // Current noise-spectrum.
+ memset(self->noise, 0, sizeof(float) * HALF_ANAL_BLOCKL);
+ // Previous noise-spectrum.
+ memset(self->noisePrev, 0, sizeof(float) * HALF_ANAL_BLOCKL);
+ // Conservative noise spectrum estimate.
+ memset(self->magnAvgPause, 0, sizeof(float) * HALF_ANAL_BLOCKL);
+ // For estimation of HB in second pass.
+ memset(self->speechProb, 0, sizeof(float) * HALF_ANAL_BLOCKL);
+ // Initial average magnitude spectrum.
+ memset(self->initMagnEst, 0, sizeof(float) * HALF_ANAL_BLOCKL);
+ for (i = 0; i < HALF_ANAL_BLOCKL; i++) {
+ // Smooth LR (same as threshold).
+ self->logLrtTimeAvg[i] = LRT_FEATURE_THR;
+ }
+
+ // Feature quantities.
+ // Spectral flatness (start on threshold).
+ self->featureData[0] = SF_FEATURE_THR;
+ self->featureData[1] = 0.f; // Spectral entropy: not used in this version.
+ self->featureData[2] = 0.f; // Spectral variance: not used in this version.
+ // Average LRT factor (start on threshold).
+ self->featureData[3] = LRT_FEATURE_THR;
+ // Spectral template diff (start on threshold).
+ self->featureData[4] = SF_FEATURE_THR;
+ self->featureData[5] = 0.f; // Normalization for spectral difference.
+ // Window time-average of input magnitude spectrum.
+ self->featureData[6] = 0.f;
+
+ // Histogram quantities: used to estimate/update thresholds for features.
+ memset(self->histLrt, 0, sizeof(int) * HIST_PAR_EST);
+ memset(self->histSpecFlat, 0, sizeof(int) * HIST_PAR_EST);
+ memset(self->histSpecDiff, 0, sizeof(int) * HIST_PAR_EST);
+
+
+ self->blockInd = -1; // Frame counter.
+ // Default threshold for LRT feature.
+ self->priorModelPars[0] = LRT_FEATURE_THR;
+ // Threshold for spectral flatness: determined on-line.
+ self->priorModelPars[1] = 0.5f;
+ // sgn_map par for spectral measure: 1 for flatness measure.
+ self->priorModelPars[2] = 1.f;
+ // Threshold for template-difference feature: determined on-line.
+ self->priorModelPars[3] = 0.5f;
+ // Default weighting parameter for LRT feature.
+ self->priorModelPars[4] = 1.f;
+ // Default weighting parameter for spectral flatness feature.
+ self->priorModelPars[5] = 0.f;
+ // Default weighting parameter for spectral difference feature.
+ self->priorModelPars[6] = 0.f;
+
+ // Update flag for parameters:
+ // 0 no update, 1 = update once, 2 = update every window.
+ self->modelUpdatePars[0] = 2;
+ self->modelUpdatePars[1] = 500; // Window for update.
+ // Counter for update of conservative noise spectrum.
+ self->modelUpdatePars[2] = 0;
+ // Counter if the feature thresholds are updated during the sequence.
+ self->modelUpdatePars[3] = self->modelUpdatePars[1];
+
+ self->signalEnergy = 0.0;
+ self->sumMagn = 0.0;
+ self->whiteNoiseLevel = 0.0;
+ self->pinkNoiseNumerator = 0.0;
+ self->pinkNoiseExp = 0.0;
+
+ set_feature_extraction_parameters(self);
+
+ // Default mode.
+ WebRtcNs_set_policy_core(self, 0);
+
+ self->initFlag = 1;
+ return 0;
+}
+
+// Estimate noise.
+static void NoiseEstimation(NoiseSuppressionC* self,
+ float* magn,
+ float* noise) {
+ size_t i, s, offset;
+ float lmagn[HALF_ANAL_BLOCKL], delta;
+
+ if (self->updates < END_STARTUP_LONG) {
+ self->updates++;
+ }
+
+ for (i = 0; i < self->magnLen; i++) {
+ lmagn[i] = (float)log(magn[i]);
+ }
+
+ // Loop over simultaneous estimates.
+ for (s = 0; s < SIMULT; s++) {
+ offset = s * self->magnLen;
+
+ // newquantest(...)
+ for (i = 0; i < self->magnLen; i++) {
+ // Compute delta.
+ if (self->density[offset + i] > 1.0) {
+ delta = FACTOR * 1.f / self->density[offset + i];
+ } else {
+ delta = FACTOR;
+ }
+
+ // Update log quantile estimate.
+ if (lmagn[i] > self->lquantile[offset + i]) {
+ self->lquantile[offset + i] +=
+ QUANTILE * delta / (float)(self->counter[s] + 1);
+ } else {
+ self->lquantile[offset + i] -=
+ (1.f - QUANTILE) * delta / (float)(self->counter[s] + 1);
+ }
+
+ // Update density estimate.
+ if (fabs(lmagn[i] - self->lquantile[offset + i]) < WIDTH) {
+ self->density[offset + i] =
+ ((float)self->counter[s] * self->density[offset + i] +
+ 1.f / (2.f * WIDTH)) /
+ (float)(self->counter[s] + 1);
+ }
+ } // End loop over magnitude spectrum.
+
+ if (self->counter[s] >= END_STARTUP_LONG) {
+ self->counter[s] = 0;
+ if (self->updates >= END_STARTUP_LONG) {
+ for (i = 0; i < self->magnLen; i++) {
+ self->quantile[i] = (float)exp(self->lquantile[offset + i]);
+ }
+ }
+ }
+
+ self->counter[s]++;
+ } // End loop over simultaneous estimates.
+
+ // Sequentially update the noise during startup.
+ if (self->updates < END_STARTUP_LONG) {
+ // Use the last "s" to get noise during startup that differ from zero.
+ for (i = 0; i < self->magnLen; i++) {
+ self->quantile[i] = (float)exp(self->lquantile[offset + i]);
+ }
+ }
+
+ for (i = 0; i < self->magnLen; i++) {
+ noise[i] = self->quantile[i];
+ }
+}
+
+// Extract thresholds for feature parameters.
+// Histograms are computed over some window size (given by
+// self->modelUpdatePars[1]).
+// Thresholds and weights are extracted every window.
+// |flag| = 0 updates histogram only, |flag| = 1 computes the threshold/weights.
+// Threshold and weights are returned in: self->priorModelPars.
+static void FeatureParameterExtraction(NoiseSuppressionC* self, int flag) {
+ int i, useFeatureSpecFlat, useFeatureSpecDiff, numHistLrt;
+ int maxPeak1, maxPeak2;
+ int weightPeak1SpecFlat, weightPeak2SpecFlat, weightPeak1SpecDiff,
+ weightPeak2SpecDiff;
+
+ float binMid, featureSum;
+ float posPeak1SpecFlat, posPeak2SpecFlat, posPeak1SpecDiff, posPeak2SpecDiff;
+ float fluctLrt, avgHistLrt, avgSquareHistLrt, avgHistLrtCompl;
+
+ // 3 features: LRT, flatness, difference.
+ // lrt_feature = self->featureData[3];
+ // flat_feature = self->featureData[0];
+ // diff_feature = self->featureData[4];
+
+ // Update histograms.
+ if (flag == 0) {
+ // LRT
+ if ((self->featureData[3] <
+ HIST_PAR_EST * self->featureExtractionParams.binSizeLrt) &&
+ (self->featureData[3] >= 0.0)) {
+ i = (int)(self->featureData[3] /
+ self->featureExtractionParams.binSizeLrt);
+ self->histLrt[i]++;
+ }
+ // Spectral flatness.
+ if ((self->featureData[0] <
+ HIST_PAR_EST * self->featureExtractionParams.binSizeSpecFlat) &&
+ (self->featureData[0] >= 0.0)) {
+ i = (int)(self->featureData[0] /
+ self->featureExtractionParams.binSizeSpecFlat);
+ self->histSpecFlat[i]++;
+ }
+ // Spectral difference.
+ if ((self->featureData[4] <
+ HIST_PAR_EST * self->featureExtractionParams.binSizeSpecDiff) &&
+ (self->featureData[4] >= 0.0)) {
+ i = (int)(self->featureData[4] /
+ self->featureExtractionParams.binSizeSpecDiff);
+ self->histSpecDiff[i]++;
+ }
+ }
+
+ // Extract parameters for speech/noise probability.
+ if (flag == 1) {
+ // LRT feature: compute the average over
+ // self->featureExtractionParams.rangeAvgHistLrt.
+ avgHistLrt = 0.0;
+ avgHistLrtCompl = 0.0;
+ avgSquareHistLrt = 0.0;
+ numHistLrt = 0;
+ for (i = 0; i < HIST_PAR_EST; i++) {
+ binMid = ((float)i + 0.5f) * self->featureExtractionParams.binSizeLrt;
+ if (binMid <= self->featureExtractionParams.rangeAvgHistLrt) {
+ avgHistLrt += self->histLrt[i] * binMid;
+ numHistLrt += self->histLrt[i];
+ }
+ avgSquareHistLrt += self->histLrt[i] * binMid * binMid;
+ avgHistLrtCompl += self->histLrt[i] * binMid;
+ }
+ if (numHistLrt > 0) {
+ avgHistLrt = avgHistLrt / ((float)numHistLrt);
+ }
+ avgHistLrtCompl = avgHistLrtCompl / ((float)self->modelUpdatePars[1]);
+ avgSquareHistLrt = avgSquareHistLrt / ((float)self->modelUpdatePars[1]);
+ fluctLrt = avgSquareHistLrt - avgHistLrt * avgHistLrtCompl;
+ // Get threshold for LRT feature.
+ if (fluctLrt < self->featureExtractionParams.thresFluctLrt) {
+ // Very low fluctuation, so likely noise.
+ self->priorModelPars[0] = self->featureExtractionParams.maxLrt;
+ } else {
+ self->priorModelPars[0] =
+ self->featureExtractionParams.factor1ModelPars * avgHistLrt;
+ // Check if value is within min/max range.
+ if (self->priorModelPars[0] < self->featureExtractionParams.minLrt) {
+ self->priorModelPars[0] = self->featureExtractionParams.minLrt;
+ }
+ if (self->priorModelPars[0] > self->featureExtractionParams.maxLrt) {
+ self->priorModelPars[0] = self->featureExtractionParams.maxLrt;
+ }
+ }
+ // Done with LRT feature.
+
+ // For spectral flatness and spectral difference: compute the main peaks of
+ // histogram.
+ maxPeak1 = 0;
+ maxPeak2 = 0;
+ posPeak1SpecFlat = 0.0;
+ posPeak2SpecFlat = 0.0;
+ weightPeak1SpecFlat = 0;
+ weightPeak2SpecFlat = 0;
+
+ // Peaks for flatness.
+ for (i = 0; i < HIST_PAR_EST; i++) {
+ binMid =
+ (i + 0.5f) * self->featureExtractionParams.binSizeSpecFlat;
+ if (self->histSpecFlat[i] > maxPeak1) {
+ // Found new "first" peak.
+ maxPeak2 = maxPeak1;
+ weightPeak2SpecFlat = weightPeak1SpecFlat;
+ posPeak2SpecFlat = posPeak1SpecFlat;
+
+ maxPeak1 = self->histSpecFlat[i];
+ weightPeak1SpecFlat = self->histSpecFlat[i];
+ posPeak1SpecFlat = binMid;
+ } else if (self->histSpecFlat[i] > maxPeak2) {
+ // Found new "second" peak.
+ maxPeak2 = self->histSpecFlat[i];
+ weightPeak2SpecFlat = self->histSpecFlat[i];
+ posPeak2SpecFlat = binMid;
+ }
+ }
+
+ // Compute two peaks for spectral difference.
+ maxPeak1 = 0;
+ maxPeak2 = 0;
+ posPeak1SpecDiff = 0.0;
+ posPeak2SpecDiff = 0.0;
+ weightPeak1SpecDiff = 0;
+ weightPeak2SpecDiff = 0;
+ // Peaks for spectral difference.
+ for (i = 0; i < HIST_PAR_EST; i++) {
+ binMid =
+ ((float)i + 0.5f) * self->featureExtractionParams.binSizeSpecDiff;
+ if (self->histSpecDiff[i] > maxPeak1) {
+ // Found new "first" peak.
+ maxPeak2 = maxPeak1;
+ weightPeak2SpecDiff = weightPeak1SpecDiff;
+ posPeak2SpecDiff = posPeak1SpecDiff;
+
+ maxPeak1 = self->histSpecDiff[i];
+ weightPeak1SpecDiff = self->histSpecDiff[i];
+ posPeak1SpecDiff = binMid;
+ } else if (self->histSpecDiff[i] > maxPeak2) {
+ // Found new "second" peak.
+ maxPeak2 = self->histSpecDiff[i];
+ weightPeak2SpecDiff = self->histSpecDiff[i];
+ posPeak2SpecDiff = binMid;
+ }
+ }
+
+ // For spectrum flatness feature.
+ useFeatureSpecFlat = 1;
+ // Merge the two peaks if they are close.
+ if ((fabs(posPeak2SpecFlat - posPeak1SpecFlat) <
+ self->featureExtractionParams.limitPeakSpacingSpecFlat) &&
+ (weightPeak2SpecFlat >
+ self->featureExtractionParams.limitPeakWeightsSpecFlat *
+ weightPeak1SpecFlat)) {
+ weightPeak1SpecFlat += weightPeak2SpecFlat;
+ posPeak1SpecFlat = 0.5f * (posPeak1SpecFlat + posPeak2SpecFlat);
+ }
+ // Reject if weight of peaks is not large enough, or peak value too small.
+ if (weightPeak1SpecFlat <
+ self->featureExtractionParams.thresWeightSpecFlat ||
+ posPeak1SpecFlat < self->featureExtractionParams.thresPosSpecFlat) {
+ useFeatureSpecFlat = 0;
+ }
+ // If selected, get the threshold.
+ if (useFeatureSpecFlat == 1) {
+ // Compute the threshold.
+ self->priorModelPars[1] =
+ self->featureExtractionParams.factor2ModelPars * posPeak1SpecFlat;
+ // Check if value is within min/max range.
+ if (self->priorModelPars[1] < self->featureExtractionParams.minSpecFlat) {
+ self->priorModelPars[1] = self->featureExtractionParams.minSpecFlat;
+ }
+ if (self->priorModelPars[1] > self->featureExtractionParams.maxSpecFlat) {
+ self->priorModelPars[1] = self->featureExtractionParams.maxSpecFlat;
+ }
+ }
+ // Done with flatness feature.
+
+ // For template feature.
+ useFeatureSpecDiff = 1;
+ // Merge the two peaks if they are close.
+ if ((fabs(posPeak2SpecDiff - posPeak1SpecDiff) <
+ self->featureExtractionParams.limitPeakSpacingSpecDiff) &&
+ (weightPeak2SpecDiff >
+ self->featureExtractionParams.limitPeakWeightsSpecDiff *
+ weightPeak1SpecDiff)) {
+ weightPeak1SpecDiff += weightPeak2SpecDiff;
+ posPeak1SpecDiff = 0.5f * (posPeak1SpecDiff + posPeak2SpecDiff);
+ }
+ // Get the threshold value.
+ self->priorModelPars[3] =
+ self->featureExtractionParams.factor1ModelPars * posPeak1SpecDiff;
+ // Reject if weight of peaks is not large enough.
+ if (weightPeak1SpecDiff <
+ self->featureExtractionParams.thresWeightSpecDiff) {
+ useFeatureSpecDiff = 0;
+ }
+ // Check if value is within min/max range.
+ if (self->priorModelPars[3] < self->featureExtractionParams.minSpecDiff) {
+ self->priorModelPars[3] = self->featureExtractionParams.minSpecDiff;
+ }
+ if (self->priorModelPars[3] > self->featureExtractionParams.maxSpecDiff) {
+ self->priorModelPars[3] = self->featureExtractionParams.maxSpecDiff;
+ }
+ // Done with spectral difference feature.
+
+ // Don't use template feature if fluctuation of LRT feature is very low:
+ // most likely just noise state.
+ if (fluctLrt < self->featureExtractionParams.thresFluctLrt) {
+ useFeatureSpecDiff = 0;
+ }
+
+ // Select the weights between the features.
+ // self->priorModelPars[4] is weight for LRT: always selected.
+ // self->priorModelPars[5] is weight for spectral flatness.
+ // self->priorModelPars[6] is weight for spectral difference.
+ featureSum = (float)(1 + useFeatureSpecFlat + useFeatureSpecDiff);
+ self->priorModelPars[4] = 1.f / featureSum;
+ self->priorModelPars[5] = ((float)useFeatureSpecFlat) / featureSum;
+ self->priorModelPars[6] = ((float)useFeatureSpecDiff) / featureSum;
+
+ // Set hists to zero for next update.
+ if (self->modelUpdatePars[0] >= 1) {
+ for (i = 0; i < HIST_PAR_EST; i++) {
+ self->histLrt[i] = 0;
+ self->histSpecFlat[i] = 0;
+ self->histSpecDiff[i] = 0;
+ }
+ }
+ } // End of flag == 1.
+}
+
+// Compute spectral flatness on input spectrum.
+// |magnIn| is the magnitude spectrum.
+// Spectral flatness is returned in self->featureData[0].
+static void ComputeSpectralFlatness(NoiseSuppressionC* self,
+ const float* magnIn) {
+ size_t i;
+ size_t shiftLP = 1; // Option to remove first bin(s) from spectral measures.
+ float avgSpectralFlatnessNum, avgSpectralFlatnessDen, spectralTmp;
+
+ // Compute spectral measures.
+ // For flatness.
+ avgSpectralFlatnessNum = 0.0;
+ avgSpectralFlatnessDen = self->sumMagn;
+ for (i = 0; i < shiftLP; i++) {
+ avgSpectralFlatnessDen -= magnIn[i];
+ }
+ // Compute log of ratio of the geometric to arithmetic mean: check for log(0)
+ // case.
+ for (i = shiftLP; i < self->magnLen; i++) {
+ if (magnIn[i] > 0.0) {
+ avgSpectralFlatnessNum += (float)log(magnIn[i]);
+ } else {
+ self->featureData[0] -= SPECT_FL_TAVG * self->featureData[0];
+ return;
+ }
+ }
+ // Normalize.
+ avgSpectralFlatnessDen = avgSpectralFlatnessDen / self->magnLen;
+ avgSpectralFlatnessNum = avgSpectralFlatnessNum / self->magnLen;
+
+ // Ratio and inverse log: check for case of log(0).
+ spectralTmp = (float)exp(avgSpectralFlatnessNum) / avgSpectralFlatnessDen;
+
+ // Time-avg update of spectral flatness feature.
+ self->featureData[0] += SPECT_FL_TAVG * (spectralTmp - self->featureData[0]);
+ // Done with flatness feature.
+}
+
+// Compute prior and post SNR based on quantile noise estimation.
+// Compute DD estimate of prior SNR.
+// Inputs:
+// * |magn| is the signal magnitude spectrum estimate.
+// * |noise| is the magnitude noise spectrum estimate.
+// Outputs:
+// * |snrLocPrior| is the computed prior SNR.
+// * |snrLocPost| is the computed post SNR.
+static void ComputeSnr(const NoiseSuppressionC* self,
+ const float* magn,
+ const float* noise,
+ float* snrLocPrior,
+ float* snrLocPost) {
+ size_t i;
+
+ for (i = 0; i < self->magnLen; i++) {
+ // Previous post SNR.
+ // Previous estimate: based on previous frame with gain filter.
+ float previousEstimateStsa = self->magnPrevAnalyze[i] /
+ (self->noisePrev[i] + 0.0001f) * self->smooth[i];
+ // Post SNR.
+ snrLocPost[i] = 0.f;
+ if (magn[i] > noise[i]) {
+ snrLocPost[i] = magn[i] / (noise[i] + 0.0001f) - 1.f;
+ }
+ // DD estimate is sum of two terms: current estimate and previous estimate.
+ // Directed decision update of snrPrior.
+ snrLocPrior[i] =
+ DD_PR_SNR * previousEstimateStsa + (1.f - DD_PR_SNR) * snrLocPost[i];
+ } // End of loop over frequencies.
+}
+
+// Compute the difference measure between input spectrum and a template/learned
+// noise spectrum.
+// |magnIn| is the input spectrum.
+// The reference/template spectrum is self->magnAvgPause[i].
+// Returns (normalized) spectral difference in self->featureData[4].
+static void ComputeSpectralDifference(NoiseSuppressionC* self,
+ const float* magnIn) {
+ // avgDiffNormMagn = var(magnIn) - cov(magnIn, magnAvgPause)^2 /
+ // var(magnAvgPause)
+ size_t i;
+ float avgPause, avgMagn, covMagnPause, varPause, varMagn, avgDiffNormMagn;
+
+ avgPause = 0.0;
+ avgMagn = self->sumMagn;
+ // Compute average quantities.
+ for (i = 0; i < self->magnLen; i++) {
+ // Conservative smooth noise spectrum from pause frames.
+ avgPause += self->magnAvgPause[i];
+ }
+ avgPause /= self->magnLen;
+ avgMagn /= self->magnLen;
+
+ covMagnPause = 0.0;
+ varPause = 0.0;
+ varMagn = 0.0;
+ // Compute variance and covariance quantities.
+ for (i = 0; i < self->magnLen; i++) {
+ covMagnPause += (magnIn[i] - avgMagn) * (self->magnAvgPause[i] - avgPause);
+ varPause +=
+ (self->magnAvgPause[i] - avgPause) * (self->magnAvgPause[i] - avgPause);
+ varMagn += (magnIn[i] - avgMagn) * (magnIn[i] - avgMagn);
+ }
+ covMagnPause /= self->magnLen;
+ varPause /= self->magnLen;
+ varMagn /= self->magnLen;
+ // Update of average magnitude spectrum.
+ self->featureData[6] += self->signalEnergy;
+
+ avgDiffNormMagn =
+ varMagn - (covMagnPause * covMagnPause) / (varPause + 0.0001f);
+ // Normalize and compute time-avg update of difference feature.
+ avgDiffNormMagn = (float)(avgDiffNormMagn / (self->featureData[5] + 0.0001f));
+ self->featureData[4] +=
+ SPECT_DIFF_TAVG * (avgDiffNormMagn - self->featureData[4]);
+}
+
+// Compute speech/noise probability.
+// Speech/noise probability is returned in |probSpeechFinal|.
+// |magn| is the input magnitude spectrum.
+// |noise| is the noise spectrum.
+// |snrLocPrior| is the prior SNR for each frequency.
+// |snrLocPost| is the post SNR for each frequency.
+static void SpeechNoiseProb(NoiseSuppressionC* self,
+ float* probSpeechFinal,
+ const float* snrLocPrior,
+ const float* snrLocPost) {
+ size_t i;
+ int sgnMap;
+ float invLrt, gainPrior, indPrior;
+ float logLrtTimeAvgKsum, besselTmp;
+ float indicator0, indicator1, indicator2;
+ float tmpFloat1, tmpFloat2;
+ float weightIndPrior0, weightIndPrior1, weightIndPrior2;
+ float threshPrior0, threshPrior1, threshPrior2;
+ float widthPrior, widthPrior0, widthPrior1, widthPrior2;
+
+ widthPrior0 = WIDTH_PR_MAP;
+ // Width for pause region: lower range, so increase width in tanh map.
+ widthPrior1 = 2.f * WIDTH_PR_MAP;
+ widthPrior2 = 2.f * WIDTH_PR_MAP; // For spectral-difference measure.
+
+ // Threshold parameters for features.
+ threshPrior0 = self->priorModelPars[0];
+ threshPrior1 = self->priorModelPars[1];
+ threshPrior2 = self->priorModelPars[3];
+
+ // Sign for flatness feature.
+ sgnMap = (int)(self->priorModelPars[2]);
+
+ // Weight parameters for features.
+ weightIndPrior0 = self->priorModelPars[4];
+ weightIndPrior1 = self->priorModelPars[5];
+ weightIndPrior2 = self->priorModelPars[6];
+
+ // Compute feature based on average LR factor.
+ // This is the average over all frequencies of the smooth log LRT.
+ logLrtTimeAvgKsum = 0.0;
+ for (i = 0; i < self->magnLen; i++) {
+ tmpFloat1 = 1.f + 2.f * snrLocPrior[i];
+ tmpFloat2 = 2.f * snrLocPrior[i] / (tmpFloat1 + 0.0001f);
+ besselTmp = (snrLocPost[i] + 1.f) * tmpFloat2;
+ self->logLrtTimeAvg[i] +=
+ LRT_TAVG * (besselTmp - (float)log(tmpFloat1) - self->logLrtTimeAvg[i]);
+ logLrtTimeAvgKsum += self->logLrtTimeAvg[i];
+ }
+ logLrtTimeAvgKsum = (float)logLrtTimeAvgKsum / (self->magnLen);
+ self->featureData[3] = logLrtTimeAvgKsum;
+ // Done with computation of LR factor.
+
+ // Compute the indicator functions.
+ // Average LRT feature.
+ widthPrior = widthPrior0;
+ // Use larger width in tanh map for pause regions.
+ if (logLrtTimeAvgKsum < threshPrior0) {
+ widthPrior = widthPrior1;
+ }
+ // Compute indicator function: sigmoid map.
+ indicator0 =
+ 0.5f *
+ ((float)tanh(widthPrior * (logLrtTimeAvgKsum - threshPrior0)) + 1.f);
+
+ // Spectral flatness feature.
+ tmpFloat1 = self->featureData[0];
+ widthPrior = widthPrior0;
+ // Use larger width in tanh map for pause regions.
+ if (sgnMap == 1 && (tmpFloat1 > threshPrior1)) {
+ widthPrior = widthPrior1;
+ }
+ if (sgnMap == -1 && (tmpFloat1 < threshPrior1)) {
+ widthPrior = widthPrior1;
+ }
+ // Compute indicator function: sigmoid map.
+ indicator1 =
+ 0.5f *
+ ((float)tanh((float)sgnMap * widthPrior * (threshPrior1 - tmpFloat1)) +
+ 1.f);
+
+ // For template spectrum-difference.
+ tmpFloat1 = self->featureData[4];
+ widthPrior = widthPrior0;
+ // Use larger width in tanh map for pause regions.
+ if (tmpFloat1 < threshPrior2) {
+ widthPrior = widthPrior2;
+ }
+ // Compute indicator function: sigmoid map.
+ indicator2 =
+ 0.5f * ((float)tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.f);
+
+ // Combine the indicator function with the feature weights.
+ indPrior = weightIndPrior0 * indicator0 + weightIndPrior1 * indicator1 +
+ weightIndPrior2 * indicator2;
+ // Done with computing indicator function.
+
+ // Compute the prior probability.
+ self->priorSpeechProb += PRIOR_UPDATE * (indPrior - self->priorSpeechProb);
+ // Make sure probabilities are within range: keep floor to 0.01.
+ if (self->priorSpeechProb > 1.f) {
+ self->priorSpeechProb = 1.f;
+ }
+ if (self->priorSpeechProb < 0.01f) {
+ self->priorSpeechProb = 0.01f;
+ }
+
+ // Final speech probability: combine prior model with LR factor:.
+ gainPrior = (1.f - self->priorSpeechProb) / (self->priorSpeechProb + 0.0001f);
+ for (i = 0; i < self->magnLen; i++) {
+ invLrt = (float)exp(-self->logLrtTimeAvg[i]);
+ invLrt = (float)gainPrior * invLrt;
+ probSpeechFinal[i] = 1.f / (1.f + invLrt);
+ }
+}
+
+// Update the noise features.
+// Inputs:
+// * |magn| is the signal magnitude spectrum estimate.
+// * |updateParsFlag| is an update flag for parameters.
+static void FeatureUpdate(NoiseSuppressionC* self,
+ const float* magn,
+ int updateParsFlag) {
+ // Compute spectral flatness on input spectrum.
+ ComputeSpectralFlatness(self, magn);
+ // Compute difference of input spectrum with learned/estimated noise spectrum.
+ ComputeSpectralDifference(self, magn);
+ // Compute histograms for parameter decisions (thresholds and weights for
+ // features).
+ // Parameters are extracted once every window time.
+ // (=self->modelUpdatePars[1])
+ if (updateParsFlag >= 1) {
+ // Counter update.
+ self->modelUpdatePars[3]--;
+ // Update histogram.
+ if (self->modelUpdatePars[3] > 0) {
+ FeatureParameterExtraction(self, 0);
+ }
+ // Compute model parameters.
+ if (self->modelUpdatePars[3] == 0) {
+ FeatureParameterExtraction(self, 1);
+ self->modelUpdatePars[3] = self->modelUpdatePars[1];
+ // If wish to update only once, set flag to zero.
+ if (updateParsFlag == 1) {
+ self->modelUpdatePars[0] = 0;
+ } else {
+ // Update every window:
+ // Get normalization for spectral difference for next window estimate.
+ self->featureData[6] =
+ self->featureData[6] / ((float)self->modelUpdatePars[1]);
+ self->featureData[5] =
+ 0.5f * (self->featureData[6] + self->featureData[5]);
+ self->featureData[6] = 0.f;
+ }
+ }
+ }
+}
+
+// Update the noise estimate.
+// Inputs:
+// * |magn| is the signal magnitude spectrum estimate.
+// * |snrLocPrior| is the prior SNR.
+// * |snrLocPost| is the post SNR.
+// Output:
+// * |noise| is the updated noise magnitude spectrum estimate.
+static void UpdateNoiseEstimate(NoiseSuppressionC* self,
+ const float* magn,
+ const float* snrLocPrior,
+ const float* snrLocPost,
+ float* noise) {
+ size_t i;
+ float probSpeech, probNonSpeech;
+ // Time-avg parameter for noise update.
+ float gammaNoiseTmp = NOISE_UPDATE;
+ float gammaNoiseOld;
+ float noiseUpdateTmp;
+
+ for (i = 0; i < self->magnLen; i++) {
+ probSpeech = self->speechProb[i];
+ probNonSpeech = 1.f - probSpeech;
+ // Temporary noise update:
+ // Use it for speech frames if update value is less than previous.
+ noiseUpdateTmp = gammaNoiseTmp * self->noisePrev[i] +
+ (1.f - gammaNoiseTmp) * (probNonSpeech * magn[i] +
+ probSpeech * self->noisePrev[i]);
+ // Time-constant based on speech/noise state.
+ gammaNoiseOld = gammaNoiseTmp;
+ gammaNoiseTmp = NOISE_UPDATE;
+ // Increase gamma (i.e., less noise update) for frame likely to be speech.
+ if (probSpeech > PROB_RANGE) {
+ gammaNoiseTmp = SPEECH_UPDATE;
+ }
+ // Conservative noise update.
+ if (probSpeech < PROB_RANGE) {
+ self->magnAvgPause[i] += GAMMA_PAUSE * (magn[i] - self->magnAvgPause[i]);
+ }
+ // Noise update.
+ if (gammaNoiseTmp == gammaNoiseOld) {
+ noise[i] = noiseUpdateTmp;
+ } else {
+ noise[i] = gammaNoiseTmp * self->noisePrev[i] +
+ (1.f - gammaNoiseTmp) * (probNonSpeech * magn[i] +
+ probSpeech * self->noisePrev[i]);
+ // Allow for noise update downwards:
+ // If noise update decreases the noise, it is safe, so allow it to
+ // happen.
+ if (noiseUpdateTmp < noise[i]) {
+ noise[i] = noiseUpdateTmp;
+ }
+ }
+ } // End of freq loop.
+}
+
+// Updates |buffer| with a new |frame|.
+// Inputs:
+// * |frame| is a new speech frame or NULL for setting to zero.
+// * |frame_length| is the length of the new frame.
+// * |buffer_length| is the length of the buffer.
+// Output:
+// * |buffer| is the updated buffer.
+static void UpdateBuffer(const float* frame,
+ size_t frame_length,
+ size_t buffer_length,
+ float* buffer) {
+ assert(buffer_length < 2 * frame_length);
+
+ memcpy(buffer,
+ buffer + frame_length,
+ sizeof(*buffer) * (buffer_length - frame_length));
+ if (frame) {
+ memcpy(buffer + buffer_length - frame_length,
+ frame,
+ sizeof(*buffer) * frame_length);
+ } else {
+ memset(buffer + buffer_length - frame_length,
+ 0,
+ sizeof(*buffer) * frame_length);
+ }
+}
+
+// Transforms the signal from time to frequency domain.
+// Inputs:
+// * |time_data| is the signal in the time domain.
+// * |time_data_length| is the length of the analysis buffer.
+// * |magnitude_length| is the length of the spectrum magnitude, which equals
+// the length of both |real| and |imag| (time_data_length / 2 + 1).
+// Outputs:
+// * |time_data| is the signal in the frequency domain.
+// * |real| is the real part of the frequency domain.
+// * |imag| is the imaginary part of the frequency domain.
+// * |magn| is the calculated signal magnitude in the frequency domain.
+static void FFT(NoiseSuppressionC* self,
+ float* time_data,
+ size_t time_data_length,
+ size_t magnitude_length,
+ float* real,
+ float* imag,
+ float* magn) {
+ size_t i;
+
+ assert(magnitude_length == time_data_length / 2 + 1);
+
+ WebRtc_rdft(time_data_length, 1, time_data, self->ip, self->wfft);
+
+ imag[0] = 0;
+ real[0] = time_data[0];
+ magn[0] = fabsf(real[0]) + 1.f;
+ imag[magnitude_length - 1] = 0;
+ real[magnitude_length - 1] = time_data[1];
+ magn[magnitude_length - 1] = fabsf(real[magnitude_length - 1]) + 1.f;
+ for (i = 1; i < magnitude_length - 1; ++i) {
+ real[i] = time_data[2 * i];
+ imag[i] = time_data[2 * i + 1];
+ // Magnitude spectrum.
+ magn[i] = sqrtf(real[i] * real[i] + imag[i] * imag[i]) + 1.f;
+ }
+}
+
+// Transforms the signal from frequency to time domain.
+// Inputs:
+// * |real| is the real part of the frequency domain.
+// * |imag| is the imaginary part of the frequency domain.
+// * |magnitude_length| is the length of the spectrum magnitude, which equals
+// the length of both |real| and |imag|.
+// * |time_data_length| is the length of the analysis buffer
+// (2 * (magnitude_length - 1)).
+// Output:
+// * |time_data| is the signal in the time domain.
+static void IFFT(NoiseSuppressionC* self,
+ const float* real,
+ const float* imag,
+ size_t magnitude_length,
+ size_t time_data_length,
+ float* time_data) {
+ size_t i;
+
+ assert(time_data_length == 2 * (magnitude_length - 1));
+
+ time_data[0] = real[0];
+ time_data[1] = real[magnitude_length - 1];
+ for (i = 1; i < magnitude_length - 1; ++i) {
+ time_data[2 * i] = real[i];
+ time_data[2 * i + 1] = imag[i];
+ }
+ WebRtc_rdft(time_data_length, -1, time_data, self->ip, self->wfft);
+
+ for (i = 0; i < time_data_length; ++i) {
+ time_data[i] *= 2.f / time_data_length; // FFT scaling.
+ }
+}
+
+// Calculates the energy of a buffer.
+// Inputs:
+// * |buffer| is the buffer over which the energy is calculated.
+// * |length| is the length of the buffer.
+// Returns the calculated energy.
+static float Energy(const float* buffer, size_t length) {
+ size_t i;
+ float energy = 0.f;
+
+ for (i = 0; i < length; ++i) {
+ energy += buffer[i] * buffer[i];
+ }
+
+ return energy;
+}
+
+// Windows a buffer.
+// Inputs:
+// * |window| is the window by which to multiply.
+// * |data| is the data without windowing.
+// * |length| is the length of the window and data.
+// Output:
+// * |data_windowed| is the windowed data.
+static void Windowing(const float* window,
+ const float* data,
+ size_t length,
+ float* data_windowed) {
+ size_t i;
+
+ for (i = 0; i < length; ++i) {
+ data_windowed[i] = window[i] * data[i];
+ }
+}
+
+// Estimate prior SNR decision-directed and compute DD based Wiener Filter.
+// Input:
+// * |magn| is the signal magnitude spectrum estimate.
+// Output:
+// * |theFilter| is the frequency response of the computed Wiener filter.
+static void ComputeDdBasedWienerFilter(const NoiseSuppressionC* self,
+ const float* magn,
+ float* theFilter) {
+ size_t i;
+ float snrPrior, previousEstimateStsa, currentEstimateStsa;
+
+ for (i = 0; i < self->magnLen; i++) {
+ // Previous estimate: based on previous frame with gain filter.
+ previousEstimateStsa = self->magnPrevProcess[i] /
+ (self->noisePrev[i] + 0.0001f) * self->smooth[i];
+ // Post and prior SNR.
+ currentEstimateStsa = 0.f;
+ if (magn[i] > self->noise[i]) {
+ currentEstimateStsa = magn[i] / (self->noise[i] + 0.0001f) - 1.f;
+ }
+ // DD estimate is sum of two terms: current estimate and previous estimate.
+ // Directed decision update of |snrPrior|.
+ snrPrior = DD_PR_SNR * previousEstimateStsa +
+ (1.f - DD_PR_SNR) * currentEstimateStsa;
+ // Gain filter.
+ theFilter[i] = snrPrior / (self->overdrive + snrPrior);
+ } // End of loop over frequencies.
+}
+
+// Changes the aggressiveness of the noise suppression method.
+// |mode| = 0 is mild (6dB), |mode| = 1 is medium (10dB) and |mode| = 2 is
+// aggressive (15dB).
+// Returns 0 on success and -1 otherwise.
+int WebRtcNs_set_policy_core(NoiseSuppressionC* self, int mode) {
+ // Allow for modes: 0, 1, 2, 3.
+ if (mode < 0 || mode > 3) {
+ return (-1);
+ }
+
+ self->aggrMode = mode;
+ if (mode == 0) {
+ self->overdrive = 1.f;
+ self->denoiseBound = 0.5f;
+ self->gainmap = 0;
+ } else if (mode == 1) {
+ // self->overdrive = 1.25f;
+ self->overdrive = 1.f;
+ self->denoiseBound = 0.25f;
+ self->gainmap = 1;
+ } else if (mode == 2) {
+ // self->overdrive = 1.25f;
+ self->overdrive = 1.1f;
+ self->denoiseBound = 0.125f;
+ self->gainmap = 1;
+ } else if (mode == 3) {
+ // self->overdrive = 1.3f;
+ self->overdrive = 1.25f;
+ self->denoiseBound = 0.09f;
+ self->gainmap = 1;
+ }
+ return 0;
+}
+
+void WebRtcNs_AnalyzeCore(NoiseSuppressionC* self, const float* speechFrame) {
+ size_t i;
+ const size_t kStartBand = 5; // Skip first frequency bins during estimation.
+ int updateParsFlag;
+ float energy;
+ float signalEnergy = 0.f;
+ float sumMagn = 0.f;
+ float tmpFloat1, tmpFloat2, tmpFloat3;
+ float winData[ANAL_BLOCKL_MAX];
+ float magn[HALF_ANAL_BLOCKL], noise[HALF_ANAL_BLOCKL];
+ float snrLocPost[HALF_ANAL_BLOCKL], snrLocPrior[HALF_ANAL_BLOCKL];
+ float real[ANAL_BLOCKL_MAX], imag[HALF_ANAL_BLOCKL];
+ // Variables during startup.
+ float sum_log_i = 0.0;
+ float sum_log_i_square = 0.0;
+ float sum_log_magn = 0.0;
+ float sum_log_i_log_magn = 0.0;
+ float parametric_exp = 0.0;
+ float parametric_num = 0.0;
+
+ // Check that initiation has been done.
+ assert(self->initFlag == 1);
+ updateParsFlag = self->modelUpdatePars[0];
+
+ // Update analysis buffer for L band.
+ UpdateBuffer(speechFrame, self->blockLen, self->anaLen, self->analyzeBuf);
+
+ Windowing(self->window, self->analyzeBuf, self->anaLen, winData);
+ energy = Energy(winData, self->anaLen);
+ if (energy == 0.0) {
+ // We want to avoid updating statistics in this case:
+ // Updating feature statistics when we have zeros only will cause
+ // thresholds to move towards zero signal situations. This in turn has the
+ // effect that once the signal is "turned on" (non-zero values) everything
+ // will be treated as speech and there is no noise suppression effect.
+ // Depending on the duration of the inactive signal it takes a
+ // considerable amount of time for the system to learn what is noise and
+ // what is speech.
+ return;
+ }
+
+ self->blockInd++; // Update the block index only when we process a block.
+
+ FFT(self, winData, self->anaLen, self->magnLen, real, imag, magn);
+
+ for (i = 0; i < self->magnLen; i++) {
+ signalEnergy += real[i] * real[i] + imag[i] * imag[i];
+ sumMagn += magn[i];
+ if (self->blockInd < END_STARTUP_SHORT) {
+ if (i >= kStartBand) {
+ tmpFloat2 = logf((float)i);
+ sum_log_i += tmpFloat2;
+ sum_log_i_square += tmpFloat2 * tmpFloat2;
+ tmpFloat1 = logf(magn[i]);
+ sum_log_magn += tmpFloat1;
+ sum_log_i_log_magn += tmpFloat2 * tmpFloat1;
+ }
+ }
+ }
+ signalEnergy /= self->magnLen;
+ self->signalEnergy = signalEnergy;
+ self->sumMagn = sumMagn;
+
+ // Quantile noise estimate.
+ NoiseEstimation(self, magn, noise);
+ // Compute simplified noise model during startup.
+ if (self->blockInd < END_STARTUP_SHORT) {
+ // Estimate White noise.
+ self->whiteNoiseLevel += sumMagn / self->magnLen * self->overdrive;
+ // Estimate Pink noise parameters.
+ tmpFloat1 = sum_log_i_square * (self->magnLen - kStartBand);
+ tmpFloat1 -= (sum_log_i * sum_log_i);
+ tmpFloat2 =
+ (sum_log_i_square * sum_log_magn - sum_log_i * sum_log_i_log_magn);
+ tmpFloat3 = tmpFloat2 / tmpFloat1;
+ // Constrain the estimated spectrum to be positive.
+ if (tmpFloat3 < 0.f) {
+ tmpFloat3 = 0.f;
+ }
+ self->pinkNoiseNumerator += tmpFloat3;
+ tmpFloat2 = (sum_log_i * sum_log_magn);
+ tmpFloat2 -= (self->magnLen - kStartBand) * sum_log_i_log_magn;
+ tmpFloat3 = tmpFloat2 / tmpFloat1;
+ // Constrain the pink noise power to be in the interval [0, 1].
+ if (tmpFloat3 < 0.f) {
+ tmpFloat3 = 0.f;
+ }
+ if (tmpFloat3 > 1.f) {
+ tmpFloat3 = 1.f;
+ }
+ self->pinkNoiseExp += tmpFloat3;
+
+ // Calculate frequency independent parts of parametric noise estimate.
+ if (self->pinkNoiseExp > 0.f) {
+ // Use pink noise estimate.
+ parametric_num =
+ expf(self->pinkNoiseNumerator / (float)(self->blockInd + 1));
+ parametric_num *= (float)(self->blockInd + 1);
+ parametric_exp = self->pinkNoiseExp / (float)(self->blockInd + 1);
+ }
+ for (i = 0; i < self->magnLen; i++) {
+ // Estimate the background noise using the white and pink noise
+ // parameters.
+ if (self->pinkNoiseExp == 0.f) {
+ // Use white noise estimate.
+ self->parametricNoise[i] = self->whiteNoiseLevel;
+ } else {
+ // Use pink noise estimate.
+ float use_band = (float)(i < kStartBand ? kStartBand : i);
+ self->parametricNoise[i] =
+ parametric_num / powf(use_band, parametric_exp);
+ }
+ // Weight quantile noise with modeled noise.
+ noise[i] *= (self->blockInd);
+ tmpFloat2 =
+ self->parametricNoise[i] * (END_STARTUP_SHORT - self->blockInd);
+ noise[i] += (tmpFloat2 / (float)(self->blockInd + 1));
+ noise[i] /= END_STARTUP_SHORT;
+ }
+ }
+ // Compute average signal during END_STARTUP_LONG time:
+ // used to normalize spectral difference measure.
+ if (self->blockInd < END_STARTUP_LONG) {
+ self->featureData[5] *= self->blockInd;
+ self->featureData[5] += signalEnergy;
+ self->featureData[5] /= (self->blockInd + 1);
+ }
+
+ // Post and prior SNR needed for SpeechNoiseProb.
+ ComputeSnr(self, magn, noise, snrLocPrior, snrLocPost);
+
+ FeatureUpdate(self, magn, updateParsFlag);
+ SpeechNoiseProb(self, self->speechProb, snrLocPrior, snrLocPost);
+ UpdateNoiseEstimate(self, magn, snrLocPrior, snrLocPost, noise);
+
+ // Keep track of noise spectrum for next frame.
+ memcpy(self->noise, noise, sizeof(*noise) * self->magnLen);
+ memcpy(self->magnPrevAnalyze, magn, sizeof(*magn) * self->magnLen);
+}
+
+void WebRtcNs_ProcessCore(NoiseSuppressionC* self,
+ const float* const* speechFrame,
+ size_t num_bands,
+ float* const* outFrame) {
+ // Main routine for noise reduction.
+ int flagHB = 0;
+ size_t i, j;
+
+ float energy1, energy2, gain, factor, factor1, factor2;
+ float fout[BLOCKL_MAX];
+ float winData[ANAL_BLOCKL_MAX];
+ float magn[HALF_ANAL_BLOCKL];
+ float theFilter[HALF_ANAL_BLOCKL], theFilterTmp[HALF_ANAL_BLOCKL];
+ float real[ANAL_BLOCKL_MAX], imag[HALF_ANAL_BLOCKL];
+
+ // SWB variables.
+ int deltaBweHB = 1;
+ int deltaGainHB = 1;
+ float decayBweHB = 1.0;
+ float gainMapParHB = 1.0;
+ float gainTimeDomainHB = 1.0;
+ float avgProbSpeechHB, avgProbSpeechHBTmp, avgFilterGainHB, gainModHB;
+ float sumMagnAnalyze, sumMagnProcess;
+
+ // Check that initiation has been done.
+ assert(self->initFlag == 1);
+ assert((num_bands - 1) <= NUM_HIGH_BANDS_MAX);
+
+ const float* const* speechFrameHB = NULL;
+ float* const* outFrameHB = NULL;
+ size_t num_high_bands = 0;
+ if (num_bands > 1) {
+ speechFrameHB = &speechFrame[1];
+ outFrameHB = &outFrame[1];
+ num_high_bands = num_bands - 1;
+ flagHB = 1;
+ // Range for averaging low band quantities for H band gain.
+ deltaBweHB = (int)self->magnLen / 4;
+ deltaGainHB = deltaBweHB;
+ }
+
+ // Update analysis buffer for L band.
+ UpdateBuffer(speechFrame[0], self->blockLen, self->anaLen, self->dataBuf);
+
+ if (flagHB == 1) {
+ // Update analysis buffer for H bands.
+ for (i = 0; i < num_high_bands; ++i) {
+ UpdateBuffer(speechFrameHB[i],
+ self->blockLen,
+ self->anaLen,
+ self->dataBufHB[i]);
+ }
+ }
+
+ Windowing(self->window, self->dataBuf, self->anaLen, winData);
+ energy1 = Energy(winData, self->anaLen);
+ if (energy1 == 0.0) {
+ // Synthesize the special case of zero input.
+ // Read out fully processed segment.
+ for (i = self->windShift; i < self->blockLen + self->windShift; i++) {
+ fout[i - self->windShift] = self->syntBuf[i];
+ }
+ // Update synthesis buffer.
+ UpdateBuffer(NULL, self->blockLen, self->anaLen, self->syntBuf);
+
+ for (i = 0; i < self->blockLen; ++i)
+ outFrame[0][i] =
+ WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fout[i], WEBRTC_SPL_WORD16_MIN);
+
+ // For time-domain gain of HB.
+ if (flagHB == 1) {
+ for (i = 0; i < num_high_bands; ++i) {
+ for (j = 0; j < self->blockLen; ++j) {
+ outFrameHB[i][j] = WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
+ self->dataBufHB[i][j],
+ WEBRTC_SPL_WORD16_MIN);
+ }
+ }
+ }
+
+ return;
+ }
+
+ FFT(self, winData, self->anaLen, self->magnLen, real, imag, magn);
+
+ if (self->blockInd < END_STARTUP_SHORT) {
+ for (i = 0; i < self->magnLen; i++) {
+ self->initMagnEst[i] += magn[i];
+ }
+ }
+
+ ComputeDdBasedWienerFilter(self, magn, theFilter);
+
+ for (i = 0; i < self->magnLen; i++) {
+ // Flooring bottom.
+ if (theFilter[i] < self->denoiseBound) {
+ theFilter[i] = self->denoiseBound;
+ }
+ // Flooring top.
+ if (theFilter[i] > 1.f) {
+ theFilter[i] = 1.f;
+ }
+ if (self->blockInd < END_STARTUP_SHORT) {
+ theFilterTmp[i] =
+ (self->initMagnEst[i] - self->overdrive * self->parametricNoise[i]);
+ theFilterTmp[i] /= (self->initMagnEst[i] + 0.0001f);
+ // Flooring bottom.
+ if (theFilterTmp[i] < self->denoiseBound) {
+ theFilterTmp[i] = self->denoiseBound;
+ }
+ // Flooring top.
+ if (theFilterTmp[i] > 1.f) {
+ theFilterTmp[i] = 1.f;
+ }
+ // Weight the two suppression filters.
+ theFilter[i] *= (self->blockInd);
+ theFilterTmp[i] *= (END_STARTUP_SHORT - self->blockInd);
+ theFilter[i] += theFilterTmp[i];
+ theFilter[i] /= (END_STARTUP_SHORT);
+ }
+
+ self->smooth[i] = theFilter[i];
+ real[i] *= self->smooth[i];
+ imag[i] *= self->smooth[i];
+ }
+ // Keep track of |magn| spectrum for next frame.
+ memcpy(self->magnPrevProcess, magn, sizeof(*magn) * self->magnLen);
+ memcpy(self->noisePrev, self->noise, sizeof(self->noise[0]) * self->magnLen);
+ // Back to time domain.
+ IFFT(self, real, imag, self->magnLen, self->anaLen, winData);
+
+ // Scale factor: only do it after END_STARTUP_LONG time.
+ factor = 1.f;
+ if (self->gainmap == 1 && self->blockInd > END_STARTUP_LONG) {
+ factor1 = 1.f;
+ factor2 = 1.f;
+
+ energy2 = Energy(winData, self->anaLen);
+ gain = (float)sqrt(energy2 / (energy1 + 1.f));
+
+ // Scaling for new version.
+ if (gain > B_LIM) {
+ factor1 = 1.f + 1.3f * (gain - B_LIM);
+ if (gain * factor1 > 1.f) {
+ factor1 = 1.f / gain;
+ }
+ }
+ if (gain < B_LIM) {
+ // Don't reduce scale too much for pause regions:
+ // attenuation here should be controlled by flooring.
+ if (gain <= self->denoiseBound) {
+ gain = self->denoiseBound;
+ }
+ factor2 = 1.f - 0.3f * (B_LIM - gain);
+ }
+ // Combine both scales with speech/noise prob:
+ // note prior (priorSpeechProb) is not frequency dependent.
+ factor = self->priorSpeechProb * factor1 +
+ (1.f - self->priorSpeechProb) * factor2;
+ } // Out of self->gainmap == 1.
+
+ Windowing(self->window, winData, self->anaLen, winData);
+
+ // Synthesis.
+ for (i = 0; i < self->anaLen; i++) {
+ self->syntBuf[i] += factor * winData[i];
+ }
+ // Read out fully processed segment.
+ for (i = self->windShift; i < self->blockLen + self->windShift; i++) {
+ fout[i - self->windShift] = self->syntBuf[i];
+ }
+ // Update synthesis buffer.
+ UpdateBuffer(NULL, self->blockLen, self->anaLen, self->syntBuf);
+
+ for (i = 0; i < self->blockLen; ++i)
+ outFrame[0][i] =
+ WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fout[i], WEBRTC_SPL_WORD16_MIN);
+
+ // For time-domain gain of HB.
+ if (flagHB == 1) {
+ // Average speech prob from low band.
+ // Average over second half (i.e., 4->8kHz) of frequencies spectrum.
+ avgProbSpeechHB = 0.0;
+ for (i = self->magnLen - deltaBweHB - 1; i < self->magnLen - 1; i++) {
+ avgProbSpeechHB += self->speechProb[i];
+ }
+ avgProbSpeechHB = avgProbSpeechHB / ((float)deltaBweHB);
+ // If the speech was suppressed by a component between Analyze and
+ // Process, for example the AEC, then it should not be considered speech
+ // for high band suppression purposes.
+ sumMagnAnalyze = 0;
+ sumMagnProcess = 0;
+ for (i = 0; i < self->magnLen; ++i) {
+ sumMagnAnalyze += self->magnPrevAnalyze[i];
+ sumMagnProcess += self->magnPrevProcess[i];
+ }
+ avgProbSpeechHB *= sumMagnProcess / sumMagnAnalyze;
+ // Average filter gain from low band.
+ // Average over second half (i.e., 4->8kHz) of frequencies spectrum.
+ avgFilterGainHB = 0.0;
+ for (i = self->magnLen - deltaGainHB - 1; i < self->magnLen - 1; i++) {
+ avgFilterGainHB += self->smooth[i];
+ }
+ avgFilterGainHB = avgFilterGainHB / ((float)(deltaGainHB));
+ avgProbSpeechHBTmp = 2.f * avgProbSpeechHB - 1.f;
+ // Gain based on speech probability.
+ gainModHB = 0.5f * (1.f + (float)tanh(gainMapParHB * avgProbSpeechHBTmp));
+ // Combine gain with low band gain.
+ gainTimeDomainHB = 0.5f * gainModHB + 0.5f * avgFilterGainHB;
+ if (avgProbSpeechHB >= 0.5f) {
+ gainTimeDomainHB = 0.25f * gainModHB + 0.75f * avgFilterGainHB;
+ }
+ gainTimeDomainHB = gainTimeDomainHB * decayBweHB;
+ // Make sure gain is within flooring range.
+ // Flooring bottom.
+ if (gainTimeDomainHB < self->denoiseBound) {
+ gainTimeDomainHB = self->denoiseBound;
+ }
+ // Flooring top.
+ if (gainTimeDomainHB > 1.f) {
+ gainTimeDomainHB = 1.f;
+ }
+ // Apply gain.
+ for (i = 0; i < num_high_bands; ++i) {
+ for (j = 0; j < self->blockLen; j++) {
+ outFrameHB[i][j] =
+ WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
+ gainTimeDomainHB * self->dataBufHB[i][j],
+ WEBRTC_SPL_WORD16_MIN);
+ }
+ }
+ } // End of H band gain computation.
+}
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/ns_core.h b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/ns_core.h
new file mode 100644
index 00000000..aba1c468
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/ns_core.h
@@ -0,0 +1,190 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_NS_CORE_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_NS_CORE_H_
+
+#include "webrtc/modules/audio_processing/ns/defines.h"
+
+typedef struct NSParaExtract_ {
+ // Bin size of histogram.
+ float binSizeLrt;
+ float binSizeSpecFlat;
+ float binSizeSpecDiff;
+ // Range of histogram over which LRT threshold is computed.
+ float rangeAvgHistLrt;
+ // Scale parameters: multiply dominant peaks of the histograms by scale factor
+ // to obtain thresholds for prior model.
+ float factor1ModelPars; // For LRT and spectral difference.
+ float factor2ModelPars; // For spectral_flatness: used when noise is flatter
+ // than speech.
+ // Peak limit for spectral flatness (varies between 0 and 1).
+ float thresPosSpecFlat;
+ // Limit on spacing of two highest peaks in histogram: spacing determined by
+ // bin size.
+ float limitPeakSpacingSpecFlat;
+ float limitPeakSpacingSpecDiff;
+ // Limit on relevance of second peak.
+ float limitPeakWeightsSpecFlat;
+ float limitPeakWeightsSpecDiff;
+ // Limit on fluctuation of LRT feature.
+ float thresFluctLrt;
+ // Limit on the max and min values for the feature thresholds.
+ float maxLrt;
+ float minLrt;
+ float maxSpecFlat;
+ float minSpecFlat;
+ float maxSpecDiff;
+ float minSpecDiff;
+ // Criteria of weight of histogram peak to accept/reject feature.
+ int thresWeightSpecFlat;
+ int thresWeightSpecDiff;
+
+} NSParaExtract;
+
+typedef struct NoiseSuppressionC_ {
+ uint32_t fs;
+ size_t blockLen;
+ size_t windShift;
+ size_t anaLen;
+ size_t magnLen;
+ int aggrMode;
+ const float* window;
+ float analyzeBuf[ANAL_BLOCKL_MAX];
+ float dataBuf[ANAL_BLOCKL_MAX];
+ float syntBuf[ANAL_BLOCKL_MAX];
+
+ int initFlag;
+ // Parameters for quantile noise estimation.
+ float density[SIMULT * HALF_ANAL_BLOCKL];
+ float lquantile[SIMULT * HALF_ANAL_BLOCKL];
+ float quantile[HALF_ANAL_BLOCKL];
+ int counter[SIMULT];
+ int updates;
+ // Parameters for Wiener filter.
+ float smooth[HALF_ANAL_BLOCKL];
+ float overdrive;
+ float denoiseBound;
+ int gainmap;
+ // FFT work arrays.
+ size_t ip[IP_LENGTH];
+ float wfft[W_LENGTH];
+
+ // Parameters for new method: some not needed, will reduce/cleanup later.
+ int32_t blockInd; // Frame index counter.
+ int modelUpdatePars[4]; // Parameters for updating or estimating.
+ // Thresholds/weights for prior model.
+ float priorModelPars[7]; // Parameters for prior model.
+ float noise[HALF_ANAL_BLOCKL]; // Noise spectrum from current frame.
+ float noisePrev[HALF_ANAL_BLOCKL]; // Noise spectrum from previous frame.
+ // Magnitude spectrum of previous analyze frame.
+ float magnPrevAnalyze[HALF_ANAL_BLOCKL];
+ // Magnitude spectrum of previous process frame.
+ float magnPrevProcess[HALF_ANAL_BLOCKL];
+ float logLrtTimeAvg[HALF_ANAL_BLOCKL]; // Log LRT factor with time-smoothing.
+ float priorSpeechProb; // Prior speech/noise probability.
+ float featureData[7];
+ // Conservative noise spectrum estimate.
+ float magnAvgPause[HALF_ANAL_BLOCKL];
+ float signalEnergy; // Energy of |magn|.
+ float sumMagn;
+ float whiteNoiseLevel; // Initial noise estimate.
+ float initMagnEst[HALF_ANAL_BLOCKL]; // Initial magnitude spectrum estimate.
+ float pinkNoiseNumerator; // Pink noise parameter: numerator.
+ float pinkNoiseExp; // Pink noise parameter: power of frequencies.
+ float parametricNoise[HALF_ANAL_BLOCKL];
+ // Parameters for feature extraction.
+ NSParaExtract featureExtractionParams;
+ // Histograms for parameter estimation.
+ int histLrt[HIST_PAR_EST];
+ int histSpecFlat[HIST_PAR_EST];
+ int histSpecDiff[HIST_PAR_EST];
+ // Quantities for high band estimate.
+ float speechProb[HALF_ANAL_BLOCKL]; // Final speech/noise prob: prior + LRT.
+ // Buffering data for HB.
+ float dataBufHB[NUM_HIGH_BANDS_MAX][ANAL_BLOCKL_MAX];
+
+} NoiseSuppressionC;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/****************************************************************************
+ * WebRtcNs_InitCore(...)
+ *
+ * This function initializes a noise suppression instance
+ *
+ * Input:
+ * - self : Instance that should be initialized
+ * - fs : Sampling frequency
+ *
+ * Output:
+ * - self : Initialized instance
+ *
+ * Return value : 0 - Ok
+ * -1 - Error
+ */
+int WebRtcNs_InitCore(NoiseSuppressionC* self, uint32_t fs);
+
+/****************************************************************************
+ * WebRtcNs_set_policy_core(...)
+ *
+ * This changes the aggressiveness of the noise suppression method.
+ *
+ * Input:
+ * - self : Instance that should be initialized
+ * - mode : 0: Mild (6dB), 1: Medium (10dB), 2: Aggressive (15dB)
+ *
+ * Output:
+ * - self : Initialized instance
+ *
+ * Return value : 0 - Ok
+ * -1 - Error
+ */
+int WebRtcNs_set_policy_core(NoiseSuppressionC* self, int mode);
+
+/****************************************************************************
+ * WebRtcNs_AnalyzeCore
+ *
+ * Estimate the background noise.
+ *
+ * Input:
+ * - self : Instance that should be initialized
+ * - speechFrame : Input speech frame for lower band
+ *
+ * Output:
+ * - self : Updated instance
+ */
+void WebRtcNs_AnalyzeCore(NoiseSuppressionC* self, const float* speechFrame);
+
+/****************************************************************************
+ * WebRtcNs_ProcessCore
+ *
+ * Do noise suppression.
+ *
+ * Input:
+ * - self : Instance that should be initialized
+ * - inFrame : Input speech frame for each band
+ * - num_bands : Number of bands
+ *
+ * Output:
+ * - self : Updated instance
+ * - outFrame : Output speech frame for each band
+ */
+void WebRtcNs_ProcessCore(NoiseSuppressionC* self,
+ const float* const* inFrame,
+ size_t num_bands,
+ float* const* outFrame);
+
+#ifdef __cplusplus
+}
+#endif
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_NS_CORE_H_
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core.c b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core.c
new file mode 100644
index 00000000..ed6125aa
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core.c
@@ -0,0 +1,2112 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/ns/include/noise_suppression_x.h"
+
+#include <assert.h>
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "webrtc/common_audio/signal_processing/include/real_fft.h"
+#include "webrtc/modules/audio_processing/ns/nsx_core.h"
+#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h"
+
+#if (defined WEBRTC_DETECT_NEON || defined WEBRTC_HAS_NEON)
+/* Tables are defined in ARM assembly files. */
+extern const int16_t WebRtcNsx_kLogTable[9];
+extern const int16_t WebRtcNsx_kCounterDiv[201];
+extern const int16_t WebRtcNsx_kLogTableFrac[256];
+#else
+static const int16_t WebRtcNsx_kLogTable[9] = {
+ 0, 177, 355, 532, 710, 887, 1065, 1242, 1420
+};
+
+static const int16_t WebRtcNsx_kCounterDiv[201] = {
+ 32767, 16384, 10923, 8192, 6554, 5461, 4681, 4096, 3641, 3277, 2979, 2731,
+ 2521, 2341, 2185, 2048, 1928, 1820, 1725, 1638, 1560, 1489, 1425, 1365, 1311,
+ 1260, 1214, 1170, 1130, 1092, 1057, 1024, 993, 964, 936, 910, 886, 862, 840,
+ 819, 799, 780, 762, 745, 728, 712, 697, 683, 669, 655, 643, 630, 618, 607,
+ 596, 585, 575, 565, 555, 546, 537, 529, 520, 512, 504, 496, 489, 482, 475,
+ 468, 462, 455, 449, 443, 437, 431, 426, 420, 415, 410, 405, 400, 395, 390,
+ 386, 381, 377, 372, 368, 364, 360, 356, 352, 349, 345, 341, 338, 334, 331,
+ 328, 324, 321, 318, 315, 312, 309, 306, 303, 301, 298, 295, 293, 290, 287,
+ 285, 282, 280, 278, 275, 273, 271, 269, 266, 264, 262, 260, 258, 256, 254,
+ 252, 250, 248, 246, 245, 243, 241, 239, 237, 236, 234, 232, 231, 229, 228,
+ 226, 224, 223, 221, 220, 218, 217, 216, 214, 213, 211, 210, 209, 207, 206,
+ 205, 204, 202, 201, 200, 199, 197, 196, 195, 194, 193, 192, 191, 189, 188,
+ 187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174, 173,
+ 172, 172, 171, 170, 169, 168, 167, 166, 165, 165, 164, 163
+};
+
+static const int16_t WebRtcNsx_kLogTableFrac[256] = {
+ 0, 1, 3, 4, 6, 7, 9, 10, 11, 13, 14, 16, 17, 18, 20, 21,
+ 22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 36, 37, 38, 40, 41, 42,
+ 44, 45, 46, 47, 49, 50, 51, 52, 54, 55, 56, 57, 59, 60, 61, 62,
+ 63, 65, 66, 67, 68, 69, 71, 72, 73, 74, 75, 77, 78, 79, 80, 81,
+ 82, 84, 85, 86, 87, 88, 89, 90, 92, 93, 94, 95, 96, 97, 98, 99,
+ 100, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 116,
+ 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131,
+ 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146,
+ 147, 148, 149, 150, 151, 152, 153, 154, 155, 155, 156, 157, 158, 159, 160,
+ 161, 162, 163, 164, 165, 166, 167, 168, 169, 169, 170, 171, 172, 173, 174,
+ 175, 176, 177, 178, 178, 179, 180, 181, 182, 183, 184, 185, 185, 186, 187,
+ 188, 189, 190, 191, 192, 192, 193, 194, 195, 196, 197, 198, 198, 199, 200,
+ 201, 202, 203, 203, 204, 205, 206, 207, 208, 208, 209, 210, 211, 212, 212,
+ 213, 214, 215, 216, 216, 217, 218, 219, 220, 220, 221, 222, 223, 224, 224,
+ 225, 226, 227, 228, 228, 229, 230, 231, 231, 232, 233, 234, 234, 235, 236,
+ 237, 238, 238, 239, 240, 241, 241, 242, 243, 244, 244, 245, 246, 247, 247,
+ 248, 249, 249, 250, 251, 252, 252, 253, 254, 255, 255
+};
+#endif // WEBRTC_DETECT_NEON || WEBRTC_HAS_NEON
+
+// Skip first frequency bins during estimation. (0 <= value < 64)
+static const size_t kStartBand = 5;
+
+// hybrib Hanning & flat window
+static const int16_t kBlocks80w128x[128] = {
+ 0, 536, 1072, 1606, 2139, 2669, 3196, 3720, 4240, 4756, 5266,
+ 5771, 6270, 6762, 7246, 7723, 8192, 8652, 9102, 9543, 9974, 10394,
+ 10803, 11200, 11585, 11958, 12318, 12665, 12998, 13318, 13623, 13913, 14189,
+ 14449, 14694, 14924, 15137, 15334, 15515, 15679, 15826, 15956, 16069, 16165,
+ 16244, 16305, 16349, 16375, 16384, 16384, 16384, 16384, 16384, 16384, 16384,
+ 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384,
+ 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384,
+ 16384, 16384, 16384, 16384, 16375, 16349, 16305, 16244, 16165, 16069, 15956,
+ 15826, 15679, 15515, 15334, 15137, 14924, 14694, 14449, 14189, 13913, 13623,
+ 13318, 12998, 12665, 12318, 11958, 11585, 11200, 10803, 10394, 9974, 9543,
+ 9102, 8652, 8192, 7723, 7246, 6762, 6270, 5771, 5266, 4756, 4240,
+ 3720, 3196, 2669, 2139, 1606, 1072, 536
+};
+
+// hybrib Hanning & flat window
+static const int16_t kBlocks160w256x[256] = {
+ 0, 268, 536, 804, 1072, 1339, 1606, 1872,
+ 2139, 2404, 2669, 2933, 3196, 3459, 3720, 3981,
+ 4240, 4499, 4756, 5012, 5266, 5520, 5771, 6021,
+ 6270, 6517, 6762, 7005, 7246, 7486, 7723, 7959,
+ 8192, 8423, 8652, 8878, 9102, 9324, 9543, 9760,
+ 9974, 10185, 10394, 10600, 10803, 11003, 11200, 11394,
+ 11585, 11773, 11958, 12140, 12318, 12493, 12665, 12833,
+ 12998, 13160, 13318, 13472, 13623, 13770, 13913, 14053,
+ 14189, 14321, 14449, 14574, 14694, 14811, 14924, 15032,
+ 15137, 15237, 15334, 15426, 15515, 15599, 15679, 15754,
+ 15826, 15893, 15956, 16015, 16069, 16119, 16165, 16207,
+ 16244, 16277, 16305, 16329, 16349, 16364, 16375, 16382,
+ 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384,
+ 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384,
+ 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384,
+ 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384,
+ 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384,
+ 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384,
+ 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384,
+ 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384,
+ 16384, 16382, 16375, 16364, 16349, 16329, 16305, 16277,
+ 16244, 16207, 16165, 16119, 16069, 16015, 15956, 15893,
+ 15826, 15754, 15679, 15599, 15515, 15426, 15334, 15237,
+ 15137, 15032, 14924, 14811, 14694, 14574, 14449, 14321,
+ 14189, 14053, 13913, 13770, 13623, 13472, 13318, 13160,
+ 12998, 12833, 12665, 12493, 12318, 12140, 11958, 11773,
+ 11585, 11394, 11200, 11003, 10803, 10600, 10394, 10185,
+ 9974, 9760, 9543, 9324, 9102, 8878, 8652, 8423,
+ 8192, 7959, 7723, 7486, 7246, 7005, 6762, 6517,
+ 6270, 6021, 5771, 5520, 5266, 5012, 4756, 4499,
+ 4240, 3981, 3720, 3459, 3196, 2933, 2669, 2404,
+ 2139, 1872, 1606, 1339, 1072, 804, 536, 268
+};
+
+// Gain factor1 table: Input value in Q8 and output value in Q13
+// original floating point code
+// if (gain > blim) {
+// factor1 = 1.0 + 1.3 * (gain - blim);
+// if (gain * factor1 > 1.0) {
+// factor1 = 1.0 / gain;
+// }
+// } else {
+// factor1 = 1.0;
+// }
+static const int16_t kFactor1Table[257] = {
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8233, 8274, 8315, 8355, 8396, 8436, 8475, 8515, 8554, 8592, 8631, 8669,
+ 8707, 8745, 8783, 8820, 8857, 8894, 8931, 8967, 9003, 9039, 9075, 9111, 9146, 9181,
+ 9216, 9251, 9286, 9320, 9354, 9388, 9422, 9456, 9489, 9523, 9556, 9589, 9622, 9655,
+ 9687, 9719, 9752, 9784, 9816, 9848, 9879, 9911, 9942, 9973, 10004, 10035, 10066,
+ 10097, 10128, 10158, 10188, 10218, 10249, 10279, 10308, 10338, 10368, 10397, 10426,
+ 10456, 10485, 10514, 10543, 10572, 10600, 10629, 10657, 10686, 10714, 10742, 10770,
+ 10798, 10826, 10854, 10882, 10847, 10810, 10774, 10737, 10701, 10666, 10631, 10596,
+ 10562, 10527, 10494, 10460, 10427, 10394, 10362, 10329, 10297, 10266, 10235, 10203,
+ 10173, 10142, 10112, 10082, 10052, 10023, 9994, 9965, 9936, 9908, 9879, 9851, 9824,
+ 9796, 9769, 9742, 9715, 9689, 9662, 9636, 9610, 9584, 9559, 9534, 9508, 9484, 9459,
+ 9434, 9410, 9386, 9362, 9338, 9314, 9291, 9268, 9245, 9222, 9199, 9176, 9154, 9132,
+ 9110, 9088, 9066, 9044, 9023, 9002, 8980, 8959, 8939, 8918, 8897, 8877, 8857, 8836,
+ 8816, 8796, 8777, 8757, 8738, 8718, 8699, 8680, 8661, 8642, 8623, 8605, 8586, 8568,
+ 8550, 8532, 8514, 8496, 8478, 8460, 8443, 8425, 8408, 8391, 8373, 8356, 8339, 8323,
+ 8306, 8289, 8273, 8256, 8240, 8224, 8208, 8192
+};
+
+// For Factor2 tables
+// original floating point code
+// if (gain > blim) {
+// factor2 = 1.0;
+// } else {
+// factor2 = 1.0 - 0.3 * (blim - gain);
+// if (gain <= inst->denoiseBound) {
+// factor2 = 1.0 - 0.3 * (blim - inst->denoiseBound);
+// }
+// }
+//
+// Gain factor table: Input value in Q8 and output value in Q13
+static const int16_t kFactor2Aggressiveness1[257] = {
+ 7577, 7577, 7577, 7577, 7577, 7577,
+ 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7596, 7614, 7632,
+ 7650, 7667, 7683, 7699, 7715, 7731, 7746, 7761, 7775, 7790, 7804, 7818, 7832, 7845,
+ 7858, 7871, 7884, 7897, 7910, 7922, 7934, 7946, 7958, 7970, 7982, 7993, 8004, 8016,
+ 8027, 8038, 8049, 8060, 8070, 8081, 8091, 8102, 8112, 8122, 8132, 8143, 8152, 8162,
+ 8172, 8182, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192
+};
+
+// Gain factor table: Input value in Q8 and output value in Q13
+static const int16_t kFactor2Aggressiveness2[257] = {
+ 7270, 7270, 7270, 7270, 7270, 7306,
+ 7339, 7369, 7397, 7424, 7448, 7472, 7495, 7517, 7537, 7558, 7577, 7596, 7614, 7632,
+ 7650, 7667, 7683, 7699, 7715, 7731, 7746, 7761, 7775, 7790, 7804, 7818, 7832, 7845,
+ 7858, 7871, 7884, 7897, 7910, 7922, 7934, 7946, 7958, 7970, 7982, 7993, 8004, 8016,
+ 8027, 8038, 8049, 8060, 8070, 8081, 8091, 8102, 8112, 8122, 8132, 8143, 8152, 8162,
+ 8172, 8182, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192
+};
+
+// Gain factor table: Input value in Q8 and output value in Q13
+static const int16_t kFactor2Aggressiveness3[257] = {
+ 7184, 7184, 7184, 7229, 7270, 7306,
+ 7339, 7369, 7397, 7424, 7448, 7472, 7495, 7517, 7537, 7558, 7577, 7596, 7614, 7632,
+ 7650, 7667, 7683, 7699, 7715, 7731, 7746, 7761, 7775, 7790, 7804, 7818, 7832, 7845,
+ 7858, 7871, 7884, 7897, 7910, 7922, 7934, 7946, 7958, 7970, 7982, 7993, 8004, 8016,
+ 8027, 8038, 8049, 8060, 8070, 8081, 8091, 8102, 8112, 8122, 8132, 8143, 8152, 8162,
+ 8172, 8182, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192
+};
+
+// sum of log2(i) from table index to inst->anaLen2 in Q5
+// Note that the first table value is invalid, since log2(0) = -infinity
+static const int16_t kSumLogIndex[66] = {
+ 0, 22917, 22917, 22885, 22834, 22770, 22696, 22613,
+ 22524, 22428, 22326, 22220, 22109, 21994, 21876, 21754,
+ 21629, 21501, 21370, 21237, 21101, 20963, 20822, 20679,
+ 20535, 20388, 20239, 20089, 19937, 19783, 19628, 19470,
+ 19312, 19152, 18991, 18828, 18664, 18498, 18331, 18164,
+ 17994, 17824, 17653, 17480, 17306, 17132, 16956, 16779,
+ 16602, 16423, 16243, 16063, 15881, 15699, 15515, 15331,
+ 15146, 14960, 14774, 14586, 14398, 14209, 14019, 13829,
+ 13637, 13445
+};
+
+// sum of log2(i)^2 from table index to inst->anaLen2 in Q2
+// Note that the first table value is invalid, since log2(0) = -infinity
+static const int16_t kSumSquareLogIndex[66] = {
+ 0, 16959, 16959, 16955, 16945, 16929, 16908, 16881,
+ 16850, 16814, 16773, 16729, 16681, 16630, 16575, 16517,
+ 16456, 16392, 16325, 16256, 16184, 16109, 16032, 15952,
+ 15870, 15786, 15700, 15612, 15521, 15429, 15334, 15238,
+ 15140, 15040, 14938, 14834, 14729, 14622, 14514, 14404,
+ 14292, 14179, 14064, 13947, 13830, 13710, 13590, 13468,
+ 13344, 13220, 13094, 12966, 12837, 12707, 12576, 12444,
+ 12310, 12175, 12039, 11902, 11763, 11624, 11483, 11341,
+ 11198, 11054
+};
+
+// log2(table index) in Q12
+// Note that the first table value is invalid, since log2(0) = -infinity
+static const int16_t kLogIndex[129] = {
+ 0, 0, 4096, 6492, 8192, 9511, 10588, 11499,
+ 12288, 12984, 13607, 14170, 14684, 15157, 15595, 16003,
+ 16384, 16742, 17080, 17400, 17703, 17991, 18266, 18529,
+ 18780, 19021, 19253, 19476, 19691, 19898, 20099, 20292,
+ 20480, 20662, 20838, 21010, 21176, 21338, 21496, 21649,
+ 21799, 21945, 22087, 22226, 22362, 22495, 22625, 22752,
+ 22876, 22998, 23117, 23234, 23349, 23462, 23572, 23680,
+ 23787, 23892, 23994, 24095, 24195, 24292, 24388, 24483,
+ 24576, 24668, 24758, 24847, 24934, 25021, 25106, 25189,
+ 25272, 25354, 25434, 25513, 25592, 25669, 25745, 25820,
+ 25895, 25968, 26041, 26112, 26183, 26253, 26322, 26390,
+ 26458, 26525, 26591, 26656, 26721, 26784, 26848, 26910,
+ 26972, 27033, 27094, 27154, 27213, 27272, 27330, 27388,
+ 27445, 27502, 27558, 27613, 27668, 27722, 27776, 27830,
+ 27883, 27935, 27988, 28039, 28090, 28141, 28191, 28241,
+ 28291, 28340, 28388, 28437, 28484, 28532, 28579, 28626,
+ 28672
+};
+
+// determinant of estimation matrix in Q0 corresponding to the log2 tables above
+// Note that the first table value is invalid, since log2(0) = -infinity
+static const int16_t kDeterminantEstMatrix[66] = {
+ 0, 29814, 25574, 22640, 20351, 18469, 16873, 15491,
+ 14277, 13199, 12233, 11362, 10571, 9851, 9192, 8587,
+ 8030, 7515, 7038, 6596, 6186, 5804, 5448, 5115,
+ 4805, 4514, 4242, 3988, 3749, 3524, 3314, 3116,
+ 2930, 2755, 2590, 2435, 2289, 2152, 2022, 1900,
+ 1785, 1677, 1575, 1478, 1388, 1302, 1221, 1145,
+ 1073, 1005, 942, 881, 825, 771, 721, 674,
+ 629, 587, 547, 510, 475, 442, 411, 382,
+ 355, 330
+};
+
+// Update the noise estimation information.
+static void UpdateNoiseEstimate(NoiseSuppressionFixedC* inst, int offset) {
+ int32_t tmp32no1 = 0;
+ int32_t tmp32no2 = 0;
+ int16_t tmp16 = 0;
+ const int16_t kExp2Const = 11819; // Q13
+
+ size_t i = 0;
+
+ tmp16 = WebRtcSpl_MaxValueW16(inst->noiseEstLogQuantile + offset,
+ inst->magnLen);
+ // Guarantee a Q-domain as high as possible and still fit in int16
+ inst->qNoise = 14 - (int) WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
+ kExp2Const, tmp16, 21);
+ for (i = 0; i < inst->magnLen; i++) {
+ // inst->quantile[i]=exp(inst->lquantile[offset+i]);
+ // in Q21
+ tmp32no2 = kExp2Const * inst->noiseEstLogQuantile[offset + i];
+ tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac
+ tmp16 = (int16_t)(tmp32no2 >> 21);
+ tmp16 -= 21;// shift 21 to get result in Q0
+ tmp16 += (int16_t) inst->qNoise; //shift to get result in Q(qNoise)
+ if (tmp16 < 0) {
+ tmp32no1 >>= -tmp16;
+ } else {
+ tmp32no1 <<= tmp16;
+ }
+ inst->noiseEstQuantile[i] = WebRtcSpl_SatW32ToW16(tmp32no1);
+ }
+}
+
+// Noise Estimation
+static void NoiseEstimationC(NoiseSuppressionFixedC* inst,
+ uint16_t* magn,
+ uint32_t* noise,
+ int16_t* q_noise) {
+ int16_t lmagn[HALF_ANAL_BLOCKL], counter, countDiv;
+ int16_t countProd, delta, zeros, frac;
+ int16_t log2, tabind, logval, tmp16, tmp16no1, tmp16no2;
+ const int16_t log2_const = 22713; // Q15
+ const int16_t width_factor = 21845;
+
+ size_t i, s, offset;
+
+ tabind = inst->stages - inst->normData;
+ assert(tabind < 9);
+ assert(tabind > -9);
+ if (tabind < 0) {
+ logval = -WebRtcNsx_kLogTable[-tabind];
+ } else {
+ logval = WebRtcNsx_kLogTable[tabind];
+ }
+
+ // lmagn(i)=log(magn(i))=log(2)*log2(magn(i))
+ // magn is in Q(-stages), and the real lmagn values are:
+ // real_lmagn(i)=log(magn(i)*2^stages)=log(magn(i))+log(2^stages)
+ // lmagn in Q8
+ for (i = 0; i < inst->magnLen; i++) {
+ if (magn[i]) {
+ zeros = WebRtcSpl_NormU32((uint32_t)magn[i]);
+ frac = (int16_t)((((uint32_t)magn[i] << zeros)
+ & 0x7FFFFFFF) >> 23);
+ // log2(magn(i))
+ assert(frac < 256);
+ log2 = (int16_t)(((31 - zeros) << 8)
+ + WebRtcNsx_kLogTableFrac[frac]);
+ // log2(magn(i))*log(2)
+ lmagn[i] = (int16_t)((log2 * log2_const) >> 15);
+ // + log(2^stages)
+ lmagn[i] += logval;
+ } else {
+ lmagn[i] = logval;//0;
+ }
+ }
+
+ // loop over simultaneous estimates
+ for (s = 0; s < SIMULT; s++) {
+ offset = s * inst->magnLen;
+
+ // Get counter values from state
+ counter = inst->noiseEstCounter[s];
+ assert(counter < 201);
+ countDiv = WebRtcNsx_kCounterDiv[counter];
+ countProd = (int16_t)(counter * countDiv);
+
+ // quant_est(...)
+ for (i = 0; i < inst->magnLen; i++) {
+ // compute delta
+ if (inst->noiseEstDensity[offset + i] > 512) {
+ // Get the value for delta by shifting intead of dividing.
+ int factor = WebRtcSpl_NormW16(inst->noiseEstDensity[offset + i]);
+ delta = (int16_t)(FACTOR_Q16 >> (14 - factor));
+ } else {
+ delta = FACTOR_Q7;
+ if (inst->blockIndex < END_STARTUP_LONG) {
+ // Smaller step size during startup. This prevents from using
+ // unrealistic values causing overflow.
+ delta = FACTOR_Q7_STARTUP;
+ }
+ }
+
+ // update log quantile estimate
+ tmp16 = (int16_t)((delta * countDiv) >> 14);
+ if (lmagn[i] > inst->noiseEstLogQuantile[offset + i]) {
+ // +=QUANTILE*delta/(inst->counter[s]+1) QUANTILE=0.25, =1 in Q2
+ // CounterDiv=1/(inst->counter[s]+1) in Q15
+ tmp16 += 2;
+ inst->noiseEstLogQuantile[offset + i] += tmp16 / 4;
+ } else {
+ tmp16 += 1;
+ // *(1-QUANTILE), in Q2 QUANTILE=0.25, 1-0.25=0.75=3 in Q2
+ // TODO(bjornv): investigate why we need to truncate twice.
+ tmp16no2 = (int16_t)((tmp16 / 2) * 3 / 2);
+ inst->noiseEstLogQuantile[offset + i] -= tmp16no2;
+ if (inst->noiseEstLogQuantile[offset + i] < logval) {
+ // This is the smallest fixed point representation we can
+ // have, hence we limit the output.
+ inst->noiseEstLogQuantile[offset + i] = logval;
+ }
+ }
+
+ // update density estimate
+ if (WEBRTC_SPL_ABS_W16(lmagn[i] - inst->noiseEstLogQuantile[offset + i])
+ < WIDTH_Q8) {
+ tmp16no1 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
+ inst->noiseEstDensity[offset + i], countProd, 15);
+ tmp16no2 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
+ width_factor, countDiv, 15);
+ inst->noiseEstDensity[offset + i] = tmp16no1 + tmp16no2;
+ }
+ } // end loop over magnitude spectrum
+
+ if (counter >= END_STARTUP_LONG) {
+ inst->noiseEstCounter[s] = 0;
+ if (inst->blockIndex >= END_STARTUP_LONG) {
+ UpdateNoiseEstimate(inst, offset);
+ }
+ }
+ inst->noiseEstCounter[s]++;
+
+ } // end loop over simultaneous estimates
+
+ // Sequentially update the noise during startup
+ if (inst->blockIndex < END_STARTUP_LONG) {
+ UpdateNoiseEstimate(inst, offset);
+ }
+
+ for (i = 0; i < inst->magnLen; i++) {
+ noise[i] = (uint32_t)(inst->noiseEstQuantile[i]); // Q(qNoise)
+ }
+ (*q_noise) = (int16_t)inst->qNoise;
+}
+
+// Filter the data in the frequency domain, and create spectrum.
+static void PrepareSpectrumC(NoiseSuppressionFixedC* inst, int16_t* freq_buf) {
+ size_t i = 0, j = 0;
+
+ for (i = 0; i < inst->magnLen; i++) {
+ inst->real[i] = (int16_t)((inst->real[i] *
+ (int16_t)(inst->noiseSupFilter[i])) >> 14); // Q(normData-stages)
+ inst->imag[i] = (int16_t)((inst->imag[i] *
+ (int16_t)(inst->noiseSupFilter[i])) >> 14); // Q(normData-stages)
+ }
+
+ freq_buf[0] = inst->real[0];
+ freq_buf[1] = -inst->imag[0];
+ for (i = 1, j = 2; i < inst->anaLen2; i += 1, j += 2) {
+ freq_buf[j] = inst->real[i];
+ freq_buf[j + 1] = -inst->imag[i];
+ }
+ freq_buf[inst->anaLen] = inst->real[inst->anaLen2];
+ freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2];
+}
+
+// Denormalize the real-valued signal |in|, the output from inverse FFT.
+static void DenormalizeC(NoiseSuppressionFixedC* inst,
+ int16_t* in,
+ int factor) {
+ size_t i = 0;
+ int32_t tmp32 = 0;
+ for (i = 0; i < inst->anaLen; i += 1) {
+ tmp32 = WEBRTC_SPL_SHIFT_W32((int32_t)in[i],
+ factor - inst->normData);
+ inst->real[i] = WebRtcSpl_SatW32ToW16(tmp32); // Q0
+ }
+}
+
+// For the noise supression process, synthesis, read out fully processed
+// segment, and update synthesis buffer.
+static void SynthesisUpdateC(NoiseSuppressionFixedC* inst,
+ int16_t* out_frame,
+ int16_t gain_factor) {
+ size_t i = 0;
+ int16_t tmp16a = 0;
+ int16_t tmp16b = 0;
+ int32_t tmp32 = 0;
+
+ // synthesis
+ for (i = 0; i < inst->anaLen; i++) {
+ tmp16a = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
+ inst->window[i], inst->real[i], 14); // Q0, window in Q14
+ tmp32 = WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(tmp16a, gain_factor, 13); // Q0
+ // Down shift with rounding
+ tmp16b = WebRtcSpl_SatW32ToW16(tmp32); // Q0
+ inst->synthesisBuffer[i] = WebRtcSpl_AddSatW16(inst->synthesisBuffer[i],
+ tmp16b); // Q0
+ }
+
+ // read out fully processed segment
+ for (i = 0; i < inst->blockLen10ms; i++) {
+ out_frame[i] = inst->synthesisBuffer[i]; // Q0
+ }
+
+ // update synthesis buffer
+ memcpy(inst->synthesisBuffer, inst->synthesisBuffer + inst->blockLen10ms,
+ (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->synthesisBuffer));
+ WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer
+ + inst->anaLen - inst->blockLen10ms, inst->blockLen10ms);
+}
+
+// Update analysis buffer for lower band, and window data before FFT.
+static void AnalysisUpdateC(NoiseSuppressionFixedC* inst,
+ int16_t* out,
+ int16_t* new_speech) {
+ size_t i = 0;
+
+ // For lower band update analysis buffer.
+ memcpy(inst->analysisBuffer, inst->analysisBuffer + inst->blockLen10ms,
+ (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->analysisBuffer));
+ memcpy(inst->analysisBuffer + inst->anaLen - inst->blockLen10ms, new_speech,
+ inst->blockLen10ms * sizeof(*inst->analysisBuffer));
+
+ // Window data before FFT.
+ for (i = 0; i < inst->anaLen; i++) {
+ out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
+ inst->window[i], inst->analysisBuffer[i], 14); // Q0
+ }
+}
+
+// Normalize the real-valued signal |in|, the input to forward FFT.
+static void NormalizeRealBufferC(NoiseSuppressionFixedC* inst,
+ const int16_t* in,
+ int16_t* out) {
+ size_t i = 0;
+ assert(inst->normData >= 0);
+ for (i = 0; i < inst->anaLen; ++i) {
+ out[i] = in[i] << inst->normData; // Q(normData)
+ }
+}
+
+// Declare function pointers.
+NoiseEstimation WebRtcNsx_NoiseEstimation;
+PrepareSpectrum WebRtcNsx_PrepareSpectrum;
+SynthesisUpdate WebRtcNsx_SynthesisUpdate;
+AnalysisUpdate WebRtcNsx_AnalysisUpdate;
+Denormalize WebRtcNsx_Denormalize;
+NormalizeRealBuffer WebRtcNsx_NormalizeRealBuffer;
+
+#if (defined WEBRTC_DETECT_NEON || defined WEBRTC_HAS_NEON)
+// Initialize function pointers for ARM Neon platform.
+static void WebRtcNsx_InitNeon(void) {
+ WebRtcNsx_NoiseEstimation = WebRtcNsx_NoiseEstimationNeon;
+ WebRtcNsx_PrepareSpectrum = WebRtcNsx_PrepareSpectrumNeon;
+ WebRtcNsx_SynthesisUpdate = WebRtcNsx_SynthesisUpdateNeon;
+ WebRtcNsx_AnalysisUpdate = WebRtcNsx_AnalysisUpdateNeon;
+}
+#endif
+
+#if defined(MIPS32_LE)
+// Initialize function pointers for MIPS platform.
+static void WebRtcNsx_InitMips(void) {
+ WebRtcNsx_PrepareSpectrum = WebRtcNsx_PrepareSpectrum_mips;
+ WebRtcNsx_SynthesisUpdate = WebRtcNsx_SynthesisUpdate_mips;
+ WebRtcNsx_AnalysisUpdate = WebRtcNsx_AnalysisUpdate_mips;
+ WebRtcNsx_NormalizeRealBuffer = WebRtcNsx_NormalizeRealBuffer_mips;
+#if defined(MIPS_DSP_R1_LE)
+ WebRtcNsx_Denormalize = WebRtcNsx_Denormalize_mips;
+#endif
+}
+#endif
+
+void WebRtcNsx_CalcParametricNoiseEstimate(NoiseSuppressionFixedC* inst,
+ int16_t pink_noise_exp_avg,
+ int32_t pink_noise_num_avg,
+ int freq_index,
+ uint32_t* noise_estimate,
+ uint32_t* noise_estimate_avg) {
+ int32_t tmp32no1 = 0;
+ int32_t tmp32no2 = 0;
+
+ int16_t int_part = 0;
+ int16_t frac_part = 0;
+
+ // Use pink noise estimate
+ // noise_estimate = 2^(pinkNoiseNumerator + pinkNoiseExp * log2(j))
+ assert(freq_index >= 0);
+ assert(freq_index < 129);
+ tmp32no2 = (pink_noise_exp_avg * kLogIndex[freq_index]) >> 15; // Q11
+ tmp32no1 = pink_noise_num_avg - tmp32no2; // Q11
+
+ // Calculate output: 2^tmp32no1
+ // Output in Q(minNorm-stages)
+ tmp32no1 += (inst->minNorm - inst->stages) << 11;
+ if (tmp32no1 > 0) {
+ int_part = (int16_t)(tmp32no1 >> 11);
+ frac_part = (int16_t)(tmp32no1 & 0x000007ff); // Q11
+ // Piecewise linear approximation of 'b' in
+ // 2^(int_part+frac_part) = 2^int_part * (1 + b)
+ // 'b' is given in Q11 and below stored in frac_part.
+ if (frac_part >> 10) {
+ // Upper fractional part
+ tmp32no2 = (2048 - frac_part) * 1244; // Q21
+ tmp32no2 = 2048 - (tmp32no2 >> 10);
+ } else {
+ // Lower fractional part
+ tmp32no2 = (frac_part * 804) >> 10;
+ }
+ // Shift fractional part to Q(minNorm-stages)
+ tmp32no2 = WEBRTC_SPL_SHIFT_W32(tmp32no2, int_part - 11);
+ *noise_estimate_avg = (1 << int_part) + (uint32_t)tmp32no2;
+ // Scale up to initMagnEst, which is not block averaged
+ *noise_estimate = (*noise_estimate_avg) * (uint32_t)(inst->blockIndex + 1);
+ }
+}
+
+// Initialize state
+int32_t WebRtcNsx_InitCore(NoiseSuppressionFixedC* inst, uint32_t fs) {
+ int i;
+
+ //check for valid pointer
+ if (inst == NULL) {
+ return -1;
+ }
+ //
+
+ // Initialization of struct
+ if (fs == 8000 || fs == 16000 || fs == 32000 || fs == 48000) {
+ inst->fs = fs;
+ } else {
+ return -1;
+ }
+
+ if (fs == 8000) {
+ inst->blockLen10ms = 80;
+ inst->anaLen = 128;
+ inst->stages = 7;
+ inst->window = kBlocks80w128x;
+ inst->thresholdLogLrt = 131072; //default threshold for LRT feature
+ inst->maxLrt = 0x0040000;
+ inst->minLrt = 52429;
+ } else {
+ inst->blockLen10ms = 160;
+ inst->anaLen = 256;
+ inst->stages = 8;
+ inst->window = kBlocks160w256x;
+ inst->thresholdLogLrt = 212644; //default threshold for LRT feature
+ inst->maxLrt = 0x0080000;
+ inst->minLrt = 104858;
+ }
+ inst->anaLen2 = inst->anaLen / 2;
+ inst->magnLen = inst->anaLen2 + 1;
+
+ if (inst->real_fft != NULL) {
+ WebRtcSpl_FreeRealFFT(inst->real_fft);
+ }
+ inst->real_fft = WebRtcSpl_CreateRealFFT(inst->stages);
+ if (inst->real_fft == NULL) {
+ return -1;
+ }
+
+ WebRtcSpl_ZerosArrayW16(inst->analysisBuffer, ANAL_BLOCKL_MAX);
+ WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer, ANAL_BLOCKL_MAX);
+
+ // for HB processing
+ WebRtcSpl_ZerosArrayW16(inst->dataBufHBFX[0],
+ NUM_HIGH_BANDS_MAX * ANAL_BLOCKL_MAX);
+ // for quantile noise estimation
+ WebRtcSpl_ZerosArrayW16(inst->noiseEstQuantile, HALF_ANAL_BLOCKL);
+ for (i = 0; i < SIMULT * HALF_ANAL_BLOCKL; i++) {
+ inst->noiseEstLogQuantile[i] = 2048; // Q8
+ inst->noiseEstDensity[i] = 153; // Q9
+ }
+ for (i = 0; i < SIMULT; i++) {
+ inst->noiseEstCounter[i] = (int16_t)(END_STARTUP_LONG * (i + 1)) / SIMULT;
+ }
+
+ // Initialize suppression filter with ones
+ WebRtcSpl_MemSetW16((int16_t*)inst->noiseSupFilter, 16384, HALF_ANAL_BLOCKL);
+
+ // Set the aggressiveness: default
+ inst->aggrMode = 0;
+
+ //initialize variables for new method
+ inst->priorNonSpeechProb = 8192; // Q14(0.5) prior probability for speech/noise
+ for (i = 0; i < HALF_ANAL_BLOCKL; i++) {
+ inst->prevMagnU16[i] = 0;
+ inst->prevNoiseU32[i] = 0; //previous noise-spectrum
+ inst->logLrtTimeAvgW32[i] = 0; //smooth LR ratio
+ inst->avgMagnPause[i] = 0; //conservative noise spectrum estimate
+ inst->initMagnEst[i] = 0; //initial average magnitude spectrum
+ }
+
+ //feature quantities
+ inst->thresholdSpecDiff = 50; //threshold for difference feature: determined on-line
+ inst->thresholdSpecFlat = 20480; //threshold for flatness: determined on-line
+ inst->featureLogLrt = inst->thresholdLogLrt; //average LRT factor (= threshold)
+ inst->featureSpecFlat = inst->thresholdSpecFlat; //spectral flatness (= threshold)
+ inst->featureSpecDiff = inst->thresholdSpecDiff; //spectral difference (= threshold)
+ inst->weightLogLrt = 6; //default weighting par for LRT feature
+ inst->weightSpecFlat = 0; //default weighting par for spectral flatness feature
+ inst->weightSpecDiff = 0; //default weighting par for spectral difference feature
+
+ inst->curAvgMagnEnergy = 0; //window time-average of input magnitude spectrum
+ inst->timeAvgMagnEnergy = 0; //normalization for spectral difference
+ inst->timeAvgMagnEnergyTmp = 0; //normalization for spectral difference
+
+ //histogram quantities: used to estimate/update thresholds for features
+ WebRtcSpl_ZerosArrayW16(inst->histLrt, HIST_PAR_EST);
+ WebRtcSpl_ZerosArrayW16(inst->histSpecDiff, HIST_PAR_EST);
+ WebRtcSpl_ZerosArrayW16(inst->histSpecFlat, HIST_PAR_EST);
+
+ inst->blockIndex = -1; //frame counter
+
+ //inst->modelUpdate = 500; //window for update
+ inst->modelUpdate = (1 << STAT_UPDATES); //window for update
+ inst->cntThresUpdate = 0; //counter feature thresholds updates
+
+ inst->sumMagn = 0;
+ inst->magnEnergy = 0;
+ inst->prevQMagn = 0;
+ inst->qNoise = 0;
+ inst->prevQNoise = 0;
+
+ inst->energyIn = 0;
+ inst->scaleEnergyIn = 0;
+
+ inst->whiteNoiseLevel = 0;
+ inst->pinkNoiseNumerator = 0;
+ inst->pinkNoiseExp = 0;
+ inst->minNorm = 15; // Start with full scale
+ inst->zeroInputSignal = 0;
+
+ //default mode
+ WebRtcNsx_set_policy_core(inst, 0);
+
+#ifdef NS_FILEDEBUG
+ inst->infile = fopen("indebug.pcm", "wb");
+ inst->outfile = fopen("outdebug.pcm", "wb");
+ inst->file1 = fopen("file1.pcm", "wb");
+ inst->file2 = fopen("file2.pcm", "wb");
+ inst->file3 = fopen("file3.pcm", "wb");
+ inst->file4 = fopen("file4.pcm", "wb");
+ inst->file5 = fopen("file5.pcm", "wb");
+#endif
+
+ // Initialize function pointers.
+ WebRtcNsx_NoiseEstimation = NoiseEstimationC;
+ WebRtcNsx_PrepareSpectrum = PrepareSpectrumC;
+ WebRtcNsx_SynthesisUpdate = SynthesisUpdateC;
+ WebRtcNsx_AnalysisUpdate = AnalysisUpdateC;
+ WebRtcNsx_Denormalize = DenormalizeC;
+ WebRtcNsx_NormalizeRealBuffer = NormalizeRealBufferC;
+
+#ifdef WEBRTC_DETECT_NEON
+ uint64_t features = WebRtc_GetCPUFeaturesARM();
+ if ((features & kCPUFeatureNEON) != 0) {
+ WebRtcNsx_InitNeon();
+ }
+#elif defined(WEBRTC_HAS_NEON)
+ WebRtcNsx_InitNeon();
+#endif
+
+#if defined(MIPS32_LE)
+ WebRtcNsx_InitMips();
+#endif
+
+ inst->initFlag = 1;
+
+ return 0;
+}
+
+int WebRtcNsx_set_policy_core(NoiseSuppressionFixedC* inst, int mode) {
+ // allow for modes:0,1,2,3
+ if (mode < 0 || mode > 3) {
+ return -1;
+ }
+
+ inst->aggrMode = mode;
+ if (mode == 0) {
+ inst->overdrive = 256; // Q8(1.0)
+ inst->denoiseBound = 8192; // Q14(0.5)
+ inst->gainMap = 0; // No gain compensation
+ } else if (mode == 1) {
+ inst->overdrive = 256; // Q8(1.0)
+ inst->denoiseBound = 4096; // Q14(0.25)
+ inst->factor2Table = kFactor2Aggressiveness1;
+ inst->gainMap = 1;
+ } else if (mode == 2) {
+ inst->overdrive = 282; // ~= Q8(1.1)
+ inst->denoiseBound = 2048; // Q14(0.125)
+ inst->factor2Table = kFactor2Aggressiveness2;
+ inst->gainMap = 1;
+ } else if (mode == 3) {
+ inst->overdrive = 320; // Q8(1.25)
+ inst->denoiseBound = 1475; // ~= Q14(0.09)
+ inst->factor2Table = kFactor2Aggressiveness3;
+ inst->gainMap = 1;
+ }
+ return 0;
+}
+
+// Extract thresholds for feature parameters
+// histograms are computed over some window_size (given by window_pars)
+// thresholds and weights are extracted every window
+// flag 0 means update histogram only, flag 1 means compute the thresholds/weights
+// threshold and weights are returned in: inst->priorModelPars
+void WebRtcNsx_FeatureParameterExtraction(NoiseSuppressionFixedC* inst,
+ int flag) {
+ uint32_t tmpU32;
+ uint32_t histIndex;
+ uint32_t posPeak1SpecFlatFX, posPeak2SpecFlatFX;
+ uint32_t posPeak1SpecDiffFX, posPeak2SpecDiffFX;
+
+ int32_t tmp32;
+ int32_t fluctLrtFX, thresFluctLrtFX;
+ int32_t avgHistLrtFX, avgSquareHistLrtFX, avgHistLrtComplFX;
+
+ int16_t j;
+ int16_t numHistLrt;
+
+ int i;
+ int useFeatureSpecFlat, useFeatureSpecDiff, featureSum;
+ int maxPeak1, maxPeak2;
+ int weightPeak1SpecFlat, weightPeak2SpecFlat;
+ int weightPeak1SpecDiff, weightPeak2SpecDiff;
+
+ //update histograms
+ if (!flag) {
+ // LRT
+ // Type casting to UWord32 is safe since negative values will not be wrapped to larger
+ // values than HIST_PAR_EST
+ histIndex = (uint32_t)(inst->featureLogLrt);
+ if (histIndex < HIST_PAR_EST) {
+ inst->histLrt[histIndex]++;
+ }
+ // Spectral flatness
+ // (inst->featureSpecFlat*20)>>10 = (inst->featureSpecFlat*5)>>8
+ histIndex = (inst->featureSpecFlat * 5) >> 8;
+ if (histIndex < HIST_PAR_EST) {
+ inst->histSpecFlat[histIndex]++;
+ }
+ // Spectral difference
+ histIndex = HIST_PAR_EST;
+ if (inst->timeAvgMagnEnergy > 0) {
+ // Guard against division by zero
+ // If timeAvgMagnEnergy == 0 we have no normalizing statistics and
+ // therefore can't update the histogram
+ histIndex = ((inst->featureSpecDiff * 5) >> inst->stages) /
+ inst->timeAvgMagnEnergy;
+ }
+ if (histIndex < HIST_PAR_EST) {
+ inst->histSpecDiff[histIndex]++;
+ }
+ }
+
+ // extract parameters for speech/noise probability
+ if (flag) {
+ useFeatureSpecDiff = 1;
+ //for LRT feature:
+ // compute the average over inst->featureExtractionParams.rangeAvgHistLrt
+ avgHistLrtFX = 0;
+ avgSquareHistLrtFX = 0;
+ numHistLrt = 0;
+ for (i = 0; i < BIN_SIZE_LRT; i++) {
+ j = (2 * i + 1);
+ tmp32 = inst->histLrt[i] * j;
+ avgHistLrtFX += tmp32;
+ numHistLrt += inst->histLrt[i];
+ avgSquareHistLrtFX += tmp32 * j;
+ }
+ avgHistLrtComplFX = avgHistLrtFX;
+ for (; i < HIST_PAR_EST; i++) {
+ j = (2 * i + 1);
+ tmp32 = inst->histLrt[i] * j;
+ avgHistLrtComplFX += tmp32;
+ avgSquareHistLrtFX += tmp32 * j;
+ }
+ fluctLrtFX = avgSquareHistLrtFX * numHistLrt -
+ avgHistLrtFX * avgHistLrtComplFX;
+ thresFluctLrtFX = THRES_FLUCT_LRT * numHistLrt;
+ // get threshold for LRT feature:
+ tmpU32 = (FACTOR_1_LRT_DIFF * (uint32_t)avgHistLrtFX);
+ if ((fluctLrtFX < thresFluctLrtFX) || (numHistLrt == 0) ||
+ (tmpU32 > (uint32_t)(100 * numHistLrt))) {
+ //very low fluctuation, so likely noise
+ inst->thresholdLogLrt = inst->maxLrt;
+ } else {
+ tmp32 = (int32_t)((tmpU32 << (9 + inst->stages)) / numHistLrt /
+ 25);
+ // check if value is within min/max range
+ inst->thresholdLogLrt = WEBRTC_SPL_SAT(inst->maxLrt,
+ tmp32,
+ inst->minLrt);
+ }
+ if (fluctLrtFX < thresFluctLrtFX) {
+ // Do not use difference feature if fluctuation of LRT feature is very low:
+ // most likely just noise state
+ useFeatureSpecDiff = 0;
+ }
+
+ // for spectral flatness and spectral difference: compute the main peaks of histogram
+ maxPeak1 = 0;
+ maxPeak2 = 0;
+ posPeak1SpecFlatFX = 0;
+ posPeak2SpecFlatFX = 0;
+ weightPeak1SpecFlat = 0;
+ weightPeak2SpecFlat = 0;
+
+ // peaks for flatness
+ for (i = 0; i < HIST_PAR_EST; i++) {
+ if (inst->histSpecFlat[i] > maxPeak1) {
+ // Found new "first" peak
+ maxPeak2 = maxPeak1;
+ weightPeak2SpecFlat = weightPeak1SpecFlat;
+ posPeak2SpecFlatFX = posPeak1SpecFlatFX;
+
+ maxPeak1 = inst->histSpecFlat[i];
+ weightPeak1SpecFlat = inst->histSpecFlat[i];
+ posPeak1SpecFlatFX = (uint32_t)(2 * i + 1);
+ } else if (inst->histSpecFlat[i] > maxPeak2) {
+ // Found new "second" peak
+ maxPeak2 = inst->histSpecFlat[i];
+ weightPeak2SpecFlat = inst->histSpecFlat[i];
+ posPeak2SpecFlatFX = (uint32_t)(2 * i + 1);
+ }
+ }
+
+ // for spectral flatness feature
+ useFeatureSpecFlat = 1;
+ // merge the two peaks if they are close
+ if ((posPeak1SpecFlatFX - posPeak2SpecFlatFX < LIM_PEAK_SPACE_FLAT_DIFF)
+ && (weightPeak2SpecFlat * LIM_PEAK_WEIGHT_FLAT_DIFF > weightPeak1SpecFlat)) {
+ weightPeak1SpecFlat += weightPeak2SpecFlat;
+ posPeak1SpecFlatFX = (posPeak1SpecFlatFX + posPeak2SpecFlatFX) >> 1;
+ }
+ //reject if weight of peaks is not large enough, or peak value too small
+ if (weightPeak1SpecFlat < THRES_WEIGHT_FLAT_DIFF || posPeak1SpecFlatFX
+ < THRES_PEAK_FLAT) {
+ useFeatureSpecFlat = 0;
+ } else { // if selected, get the threshold
+ // compute the threshold and check if value is within min/max range
+ inst->thresholdSpecFlat = WEBRTC_SPL_SAT(MAX_FLAT_Q10, FACTOR_2_FLAT_Q10
+ * posPeak1SpecFlatFX, MIN_FLAT_Q10); //Q10
+ }
+ // done with flatness feature
+
+ if (useFeatureSpecDiff) {
+ //compute two peaks for spectral difference
+ maxPeak1 = 0;
+ maxPeak2 = 0;
+ posPeak1SpecDiffFX = 0;
+ posPeak2SpecDiffFX = 0;
+ weightPeak1SpecDiff = 0;
+ weightPeak2SpecDiff = 0;
+ // peaks for spectral difference
+ for (i = 0; i < HIST_PAR_EST; i++) {
+ if (inst->histSpecDiff[i] > maxPeak1) {
+ // Found new "first" peak
+ maxPeak2 = maxPeak1;
+ weightPeak2SpecDiff = weightPeak1SpecDiff;
+ posPeak2SpecDiffFX = posPeak1SpecDiffFX;
+
+ maxPeak1 = inst->histSpecDiff[i];
+ weightPeak1SpecDiff = inst->histSpecDiff[i];
+ posPeak1SpecDiffFX = (uint32_t)(2 * i + 1);
+ } else if (inst->histSpecDiff[i] > maxPeak2) {
+ // Found new "second" peak
+ maxPeak2 = inst->histSpecDiff[i];
+ weightPeak2SpecDiff = inst->histSpecDiff[i];
+ posPeak2SpecDiffFX = (uint32_t)(2 * i + 1);
+ }
+ }
+
+ // merge the two peaks if they are close
+ if ((posPeak1SpecDiffFX - posPeak2SpecDiffFX < LIM_PEAK_SPACE_FLAT_DIFF)
+ && (weightPeak2SpecDiff * LIM_PEAK_WEIGHT_FLAT_DIFF > weightPeak1SpecDiff)) {
+ weightPeak1SpecDiff += weightPeak2SpecDiff;
+ posPeak1SpecDiffFX = (posPeak1SpecDiffFX + posPeak2SpecDiffFX) >> 1;
+ }
+ // get the threshold value and check if value is within min/max range
+ inst->thresholdSpecDiff = WEBRTC_SPL_SAT(MAX_DIFF, FACTOR_1_LRT_DIFF
+ * posPeak1SpecDiffFX, MIN_DIFF); //5x bigger
+ //reject if weight of peaks is not large enough
+ if (weightPeak1SpecDiff < THRES_WEIGHT_FLAT_DIFF) {
+ useFeatureSpecDiff = 0;
+ }
+ // done with spectral difference feature
+ }
+
+ // select the weights between the features
+ // inst->priorModelPars[4] is weight for LRT: always selected
+ featureSum = 6 / (1 + useFeatureSpecFlat + useFeatureSpecDiff);
+ inst->weightLogLrt = featureSum;
+ inst->weightSpecFlat = useFeatureSpecFlat * featureSum;
+ inst->weightSpecDiff = useFeatureSpecDiff * featureSum;
+
+ // set histograms to zero for next update
+ WebRtcSpl_ZerosArrayW16(inst->histLrt, HIST_PAR_EST);
+ WebRtcSpl_ZerosArrayW16(inst->histSpecDiff, HIST_PAR_EST);
+ WebRtcSpl_ZerosArrayW16(inst->histSpecFlat, HIST_PAR_EST);
+ } // end of flag == 1
+}
+
+
+// Compute spectral flatness on input spectrum
+// magn is the magnitude spectrum
+// spectral flatness is returned in inst->featureSpecFlat
+void WebRtcNsx_ComputeSpectralFlatness(NoiseSuppressionFixedC* inst,
+ uint16_t* magn) {
+ uint32_t tmpU32;
+ uint32_t avgSpectralFlatnessNum, avgSpectralFlatnessDen;
+
+ int32_t tmp32;
+ int32_t currentSpectralFlatness, logCurSpectralFlatness;
+
+ int16_t zeros, frac, intPart;
+
+ size_t i;
+
+ // for flatness
+ avgSpectralFlatnessNum = 0;
+ avgSpectralFlatnessDen = inst->sumMagn - (uint32_t)magn[0]; // Q(normData-stages)
+
+ // compute log of ratio of the geometric to arithmetic mean: check for log(0) case
+ // flatness = exp( sum(log(magn[i]))/N - log(sum(magn[i])/N) )
+ // = exp( sum(log(magn[i]))/N ) * N / sum(magn[i])
+ // = 2^( sum(log2(magn[i]))/N - (log2(sum(magn[i])) - log2(N)) ) [This is used]
+ for (i = 1; i < inst->magnLen; i++) {
+ // First bin is excluded from spectrum measures. Number of bins is now a power of 2
+ if (magn[i]) {
+ zeros = WebRtcSpl_NormU32((uint32_t)magn[i]);
+ frac = (int16_t)(((uint32_t)((uint32_t)(magn[i]) << zeros)
+ & 0x7FFFFFFF) >> 23);
+ // log2(magn(i))
+ assert(frac < 256);
+ tmpU32 = (uint32_t)(((31 - zeros) << 8)
+ + WebRtcNsx_kLogTableFrac[frac]); // Q8
+ avgSpectralFlatnessNum += tmpU32; // Q8
+ } else {
+ //if at least one frequency component is zero, treat separately
+ tmpU32 = WEBRTC_SPL_UMUL_32_16(inst->featureSpecFlat, SPECT_FLAT_TAVG_Q14); // Q24
+ inst->featureSpecFlat -= tmpU32 >> 14; // Q10
+ return;
+ }
+ }
+ //ratio and inverse log: check for case of log(0)
+ zeros = WebRtcSpl_NormU32(avgSpectralFlatnessDen);
+ frac = (int16_t)(((avgSpectralFlatnessDen << zeros) & 0x7FFFFFFF) >> 23);
+ // log2(avgSpectralFlatnessDen)
+ assert(frac < 256);
+ tmp32 = (int32_t)(((31 - zeros) << 8) + WebRtcNsx_kLogTableFrac[frac]); // Q8
+ logCurSpectralFlatness = (int32_t)avgSpectralFlatnessNum;
+ logCurSpectralFlatness += ((int32_t)(inst->stages - 1) << (inst->stages + 7)); // Q(8+stages-1)
+ logCurSpectralFlatness -= (tmp32 << (inst->stages - 1));
+ logCurSpectralFlatness <<= (10 - inst->stages); // Q17
+ tmp32 = (int32_t)(0x00020000 | (WEBRTC_SPL_ABS_W32(logCurSpectralFlatness)
+ & 0x0001FFFF)); //Q17
+ intPart = 7 - (logCurSpectralFlatness >> 17); // Add 7 for output in Q10.
+ if (intPart > 0) {
+ currentSpectralFlatness = tmp32 >> intPart;
+ } else {
+ currentSpectralFlatness = tmp32 << -intPart;
+ }
+
+ //time average update of spectral flatness feature
+ tmp32 = currentSpectralFlatness - (int32_t)inst->featureSpecFlat; // Q10
+ tmp32 *= SPECT_FLAT_TAVG_Q14; // Q24
+ inst->featureSpecFlat += tmp32 >> 14; // Q10
+ // done with flatness feature
+}
+
+
+// Compute the difference measure between input spectrum and a template/learned noise spectrum
+// magn_tmp is the input spectrum
+// the reference/template spectrum is inst->magn_avg_pause[i]
+// returns (normalized) spectral difference in inst->featureSpecDiff
+void WebRtcNsx_ComputeSpectralDifference(NoiseSuppressionFixedC* inst,
+ uint16_t* magnIn) {
+ // This is to be calculated:
+ // avgDiffNormMagn = var(magnIn) - cov(magnIn, magnAvgPause)^2 / var(magnAvgPause)
+
+ uint32_t tmpU32no1, tmpU32no2;
+ uint32_t varMagnUFX, varPauseUFX, avgDiffNormMagnUFX;
+
+ int32_t tmp32no1, tmp32no2;
+ int32_t avgPauseFX, avgMagnFX, covMagnPauseFX;
+ int32_t maxPause, minPause;
+
+ int16_t tmp16no1;
+
+ size_t i;
+ int norm32, nShifts;
+
+ avgPauseFX = 0;
+ maxPause = 0;
+ minPause = inst->avgMagnPause[0]; // Q(prevQMagn)
+ // compute average quantities
+ for (i = 0; i < inst->magnLen; i++) {
+ // Compute mean of magn_pause
+ avgPauseFX += inst->avgMagnPause[i]; // in Q(prevQMagn)
+ maxPause = WEBRTC_SPL_MAX(maxPause, inst->avgMagnPause[i]);
+ minPause = WEBRTC_SPL_MIN(minPause, inst->avgMagnPause[i]);
+ }
+ // normalize by replacing div of "inst->magnLen" with "inst->stages-1" shifts
+ avgPauseFX >>= inst->stages - 1;
+ avgMagnFX = inst->sumMagn >> (inst->stages - 1);
+ // Largest possible deviation in magnPause for (co)var calculations
+ tmp32no1 = WEBRTC_SPL_MAX(maxPause - avgPauseFX, avgPauseFX - minPause);
+ // Get number of shifts to make sure we don't get wrap around in varPause
+ nShifts = WEBRTC_SPL_MAX(0, 10 + inst->stages - WebRtcSpl_NormW32(tmp32no1));
+
+ varMagnUFX = 0;
+ varPauseUFX = 0;
+ covMagnPauseFX = 0;
+ for (i = 0; i < inst->magnLen; i++) {
+ // Compute var and cov of magn and magn_pause
+ tmp16no1 = (int16_t)((int32_t)magnIn[i] - avgMagnFX);
+ tmp32no2 = inst->avgMagnPause[i] - avgPauseFX;
+ varMagnUFX += (uint32_t)(tmp16no1 * tmp16no1); // Q(2*qMagn)
+ tmp32no1 = tmp32no2 * tmp16no1; // Q(prevQMagn+qMagn)
+ covMagnPauseFX += tmp32no1; // Q(prevQMagn+qMagn)
+ tmp32no1 = tmp32no2 >> nShifts; // Q(prevQMagn-minPause).
+ varPauseUFX += tmp32no1 * tmp32no1; // Q(2*(prevQMagn-minPause))
+ }
+ //update of average magnitude spectrum: Q(-2*stages) and averaging replaced by shifts
+ inst->curAvgMagnEnergy +=
+ inst->magnEnergy >> (2 * inst->normData + inst->stages - 1);
+
+ avgDiffNormMagnUFX = varMagnUFX; // Q(2*qMagn)
+ if ((varPauseUFX) && (covMagnPauseFX)) {
+ tmpU32no1 = (uint32_t)WEBRTC_SPL_ABS_W32(covMagnPauseFX); // Q(prevQMagn+qMagn)
+ norm32 = WebRtcSpl_NormU32(tmpU32no1) - 16;
+ if (norm32 > 0) {
+ tmpU32no1 <<= norm32; // Q(prevQMagn+qMagn+norm32)
+ } else {
+ tmpU32no1 >>= -norm32; // Q(prevQMagn+qMagn+norm32)
+ }
+ tmpU32no2 = WEBRTC_SPL_UMUL(tmpU32no1, tmpU32no1); // Q(2*(prevQMagn+qMagn-norm32))
+
+ nShifts += norm32;
+ nShifts <<= 1;
+ if (nShifts < 0) {
+ varPauseUFX >>= (-nShifts); // Q(2*(qMagn+norm32+minPause))
+ nShifts = 0;
+ }
+ if (varPauseUFX > 0) {
+ // Q(2*(qMagn+norm32-16+minPause))
+ tmpU32no1 = tmpU32no2 / varPauseUFX;
+ tmpU32no1 >>= nShifts;
+
+ // Q(2*qMagn)
+ avgDiffNormMagnUFX -= WEBRTC_SPL_MIN(avgDiffNormMagnUFX, tmpU32no1);
+ } else {
+ avgDiffNormMagnUFX = 0;
+ }
+ }
+ //normalize and compute time average update of difference feature
+ tmpU32no1 = avgDiffNormMagnUFX >> (2 * inst->normData);
+ if (inst->featureSpecDiff > tmpU32no1) {
+ tmpU32no2 = WEBRTC_SPL_UMUL_32_16(inst->featureSpecDiff - tmpU32no1,
+ SPECT_DIFF_TAVG_Q8); // Q(8-2*stages)
+ inst->featureSpecDiff -= tmpU32no2 >> 8; // Q(-2*stages)
+ } else {
+ tmpU32no2 = WEBRTC_SPL_UMUL_32_16(tmpU32no1 - inst->featureSpecDiff,
+ SPECT_DIFF_TAVG_Q8); // Q(8-2*stages)
+ inst->featureSpecDiff += tmpU32no2 >> 8; // Q(-2*stages)
+ }
+}
+
+// Transform input (speechFrame) to frequency domain magnitude (magnU16)
+void WebRtcNsx_DataAnalysis(NoiseSuppressionFixedC* inst,
+ short* speechFrame,
+ uint16_t* magnU16) {
+ uint32_t tmpU32no1;
+
+ int32_t tmp_1_w32 = 0;
+ int32_t tmp_2_w32 = 0;
+ int32_t sum_log_magn = 0;
+ int32_t sum_log_i_log_magn = 0;
+
+ uint16_t sum_log_magn_u16 = 0;
+ uint16_t tmp_u16 = 0;
+
+ int16_t sum_log_i = 0;
+ int16_t sum_log_i_square = 0;
+ int16_t frac = 0;
+ int16_t log2 = 0;
+ int16_t matrix_determinant = 0;
+ int16_t maxWinData;
+
+ size_t i, j;
+ int zeros;
+ int net_norm = 0;
+ int right_shifts_in_magnU16 = 0;
+ int right_shifts_in_initMagnEst = 0;
+
+ int16_t winData_buff[ANAL_BLOCKL_MAX * 2 + 16];
+ int16_t realImag_buff[ANAL_BLOCKL_MAX * 2 + 16];
+
+ // Align the structures to 32-byte boundary for the FFT function.
+ int16_t* winData = (int16_t*) (((uintptr_t)winData_buff + 31) & ~31);
+ int16_t* realImag = (int16_t*) (((uintptr_t) realImag_buff + 31) & ~31);
+
+ // Update analysis buffer for lower band, and window data before FFT.
+ WebRtcNsx_AnalysisUpdate(inst, winData, speechFrame);
+
+ // Get input energy
+ inst->energyIn =
+ WebRtcSpl_Energy(winData, inst->anaLen, &inst->scaleEnergyIn);
+
+ // Reset zero input flag
+ inst->zeroInputSignal = 0;
+ // Acquire norm for winData
+ maxWinData = WebRtcSpl_MaxAbsValueW16(winData, inst->anaLen);
+ inst->normData = WebRtcSpl_NormW16(maxWinData);
+ if (maxWinData == 0) {
+ // Treat zero input separately.
+ inst->zeroInputSignal = 1;
+ return;
+ }
+
+ // Determine the net normalization in the frequency domain
+ net_norm = inst->stages - inst->normData;
+ // Track lowest normalization factor and use it to prevent wrap around in shifting
+ right_shifts_in_magnU16 = inst->normData - inst->minNorm;
+ right_shifts_in_initMagnEst = WEBRTC_SPL_MAX(-right_shifts_in_magnU16, 0);
+ inst->minNorm -= right_shifts_in_initMagnEst;
+ right_shifts_in_magnU16 = WEBRTC_SPL_MAX(right_shifts_in_magnU16, 0);
+
+ // create realImag as winData interleaved with zeros (= imag. part), normalize it
+ WebRtcNsx_NormalizeRealBuffer(inst, winData, realImag);
+
+ // FFT output will be in winData[].
+ WebRtcSpl_RealForwardFFT(inst->real_fft, realImag, winData);
+
+ inst->imag[0] = 0; // Q(normData-stages)
+ inst->imag[inst->anaLen2] = 0;
+ inst->real[0] = winData[0]; // Q(normData-stages)
+ inst->real[inst->anaLen2] = winData[inst->anaLen];
+ // Q(2*(normData-stages))
+ inst->magnEnergy = (uint32_t)(inst->real[0] * inst->real[0]);
+ inst->magnEnergy += (uint32_t)(inst->real[inst->anaLen2] *
+ inst->real[inst->anaLen2]);
+ magnU16[0] = (uint16_t)WEBRTC_SPL_ABS_W16(inst->real[0]); // Q(normData-stages)
+ magnU16[inst->anaLen2] = (uint16_t)WEBRTC_SPL_ABS_W16(inst->real[inst->anaLen2]);
+ inst->sumMagn = (uint32_t)magnU16[0]; // Q(normData-stages)
+ inst->sumMagn += (uint32_t)magnU16[inst->anaLen2];
+
+ if (inst->blockIndex >= END_STARTUP_SHORT) {
+ for (i = 1, j = 2; i < inst->anaLen2; i += 1, j += 2) {
+ inst->real[i] = winData[j];
+ inst->imag[i] = -winData[j + 1];
+ // magnitude spectrum
+ // energy in Q(2*(normData-stages))
+ tmpU32no1 = (uint32_t)(winData[j] * winData[j]);
+ tmpU32no1 += (uint32_t)(winData[j + 1] * winData[j + 1]);
+ inst->magnEnergy += tmpU32no1; // Q(2*(normData-stages))
+
+ magnU16[i] = (uint16_t)WebRtcSpl_SqrtFloor(tmpU32no1); // Q(normData-stages)
+ inst->sumMagn += (uint32_t)magnU16[i]; // Q(normData-stages)
+ }
+ } else {
+ //
+ // Gather information during startup for noise parameter estimation
+ //
+
+ // Switch initMagnEst to Q(minNorm-stages)
+ inst->initMagnEst[0] >>= right_shifts_in_initMagnEst;
+ inst->initMagnEst[inst->anaLen2] >>= right_shifts_in_initMagnEst;
+
+ // Update initMagnEst with magnU16 in Q(minNorm-stages).
+ inst->initMagnEst[0] += magnU16[0] >> right_shifts_in_magnU16;
+ inst->initMagnEst[inst->anaLen2] +=
+ magnU16[inst->anaLen2] >> right_shifts_in_magnU16;
+
+ log2 = 0;
+ if (magnU16[inst->anaLen2]) {
+ // Calculate log2(magnU16[inst->anaLen2])
+ zeros = WebRtcSpl_NormU32((uint32_t)magnU16[inst->anaLen2]);
+ frac = (int16_t)((((uint32_t)magnU16[inst->anaLen2] << zeros) &
+ 0x7FFFFFFF) >> 23); // Q8
+ // log2(magnU16(i)) in Q8
+ assert(frac < 256);
+ log2 = (int16_t)(((31 - zeros) << 8) + WebRtcNsx_kLogTableFrac[frac]);
+ }
+
+ sum_log_magn = (int32_t)log2; // Q8
+ // sum_log_i_log_magn in Q17
+ sum_log_i_log_magn = (kLogIndex[inst->anaLen2] * log2) >> 3;
+
+ for (i = 1, j = 2; i < inst->anaLen2; i += 1, j += 2) {
+ inst->real[i] = winData[j];
+ inst->imag[i] = -winData[j + 1];
+ // magnitude spectrum
+ // energy in Q(2*(normData-stages))
+ tmpU32no1 = (uint32_t)(winData[j] * winData[j]);
+ tmpU32no1 += (uint32_t)(winData[j + 1] * winData[j + 1]);
+ inst->magnEnergy += tmpU32no1; // Q(2*(normData-stages))
+
+ magnU16[i] = (uint16_t)WebRtcSpl_SqrtFloor(tmpU32no1); // Q(normData-stages)
+ inst->sumMagn += (uint32_t)magnU16[i]; // Q(normData-stages)
+
+ // Switch initMagnEst to Q(minNorm-stages)
+ inst->initMagnEst[i] >>= right_shifts_in_initMagnEst;
+
+ // Update initMagnEst with magnU16 in Q(minNorm-stages).
+ inst->initMagnEst[i] += magnU16[i] >> right_shifts_in_magnU16;
+
+ if (i >= kStartBand) {
+ // For pink noise estimation. Collect data neglecting lower frequency band
+ log2 = 0;
+ if (magnU16[i]) {
+ zeros = WebRtcSpl_NormU32((uint32_t)magnU16[i]);
+ frac = (int16_t)((((uint32_t)magnU16[i] << zeros) &
+ 0x7FFFFFFF) >> 23);
+ // log2(magnU16(i)) in Q8
+ assert(frac < 256);
+ log2 = (int16_t)(((31 - zeros) << 8)
+ + WebRtcNsx_kLogTableFrac[frac]);
+ }
+ sum_log_magn += (int32_t)log2; // Q8
+ // sum_log_i_log_magn in Q17
+ sum_log_i_log_magn += (kLogIndex[i] * log2) >> 3;
+ }
+ }
+
+ //
+ //compute simplified noise model during startup
+ //
+
+ // Estimate White noise
+
+ // Switch whiteNoiseLevel to Q(minNorm-stages)
+ inst->whiteNoiseLevel >>= right_shifts_in_initMagnEst;
+
+ // Update the average magnitude spectrum, used as noise estimate.
+ tmpU32no1 = WEBRTC_SPL_UMUL_32_16(inst->sumMagn, inst->overdrive);
+ tmpU32no1 >>= inst->stages + 8;
+
+ // Replacing division above with 'stages' shifts
+ // Shift to same Q-domain as whiteNoiseLevel
+ tmpU32no1 >>= right_shifts_in_magnU16;
+ // This operation is safe from wrap around as long as END_STARTUP_SHORT < 128
+ assert(END_STARTUP_SHORT < 128);
+ inst->whiteNoiseLevel += tmpU32no1; // Q(minNorm-stages)
+
+ // Estimate Pink noise parameters
+ // Denominator used in both parameter estimates.
+ // The value is only dependent on the size of the frequency band (kStartBand)
+ // and to reduce computational complexity stored in a table (kDeterminantEstMatrix[])
+ assert(kStartBand < 66);
+ matrix_determinant = kDeterminantEstMatrix[kStartBand]; // Q0
+ sum_log_i = kSumLogIndex[kStartBand]; // Q5
+ sum_log_i_square = kSumSquareLogIndex[kStartBand]; // Q2
+ if (inst->fs == 8000) {
+ // Adjust values to shorter blocks in narrow band.
+ tmp_1_w32 = (int32_t)matrix_determinant;
+ tmp_1_w32 += (kSumLogIndex[65] * sum_log_i) >> 9;
+ tmp_1_w32 -= (kSumLogIndex[65] * kSumLogIndex[65]) >> 10;
+ tmp_1_w32 -= (int32_t)sum_log_i_square << 4;
+ tmp_1_w32 -= ((inst->magnLen - kStartBand) * kSumSquareLogIndex[65]) >> 2;
+ matrix_determinant = (int16_t)tmp_1_w32;
+ sum_log_i -= kSumLogIndex[65]; // Q5
+ sum_log_i_square -= kSumSquareLogIndex[65]; // Q2
+ }
+
+ // Necessary number of shifts to fit sum_log_magn in a word16
+ zeros = 16 - WebRtcSpl_NormW32(sum_log_magn);
+ if (zeros < 0) {
+ zeros = 0;
+ }
+ tmp_1_w32 = sum_log_magn << 1; // Q9
+ sum_log_magn_u16 = (uint16_t)(tmp_1_w32 >> zeros); // Q(9-zeros).
+
+ // Calculate and update pinkNoiseNumerator. Result in Q11.
+ tmp_2_w32 = WEBRTC_SPL_MUL_16_U16(sum_log_i_square, sum_log_magn_u16); // Q(11-zeros)
+ tmpU32no1 = sum_log_i_log_magn >> 12; // Q5
+
+ // Shift the largest value of sum_log_i and tmp32no3 before multiplication
+ tmp_u16 = ((uint16_t)sum_log_i << 1); // Q6
+ if ((uint32_t)sum_log_i > tmpU32no1) {
+ tmp_u16 >>= zeros;
+ } else {
+ tmpU32no1 >>= zeros;
+ }
+ tmp_2_w32 -= (int32_t)WEBRTC_SPL_UMUL_32_16(tmpU32no1, tmp_u16); // Q(11-zeros)
+ matrix_determinant >>= zeros; // Q(-zeros)
+ tmp_2_w32 = WebRtcSpl_DivW32W16(tmp_2_w32, matrix_determinant); // Q11
+ tmp_2_w32 += (int32_t)net_norm << 11; // Q11
+ if (tmp_2_w32 < 0) {
+ tmp_2_w32 = 0;
+ }
+ inst->pinkNoiseNumerator += tmp_2_w32; // Q11
+
+ // Calculate and update pinkNoiseExp. Result in Q14.
+ tmp_2_w32 = WEBRTC_SPL_MUL_16_U16(sum_log_i, sum_log_magn_u16); // Q(14-zeros)
+ tmp_1_w32 = sum_log_i_log_magn >> (3 + zeros);
+ tmp_1_w32 *= inst->magnLen - kStartBand;
+ tmp_2_w32 -= tmp_1_w32; // Q(14-zeros)
+ if (tmp_2_w32 > 0) {
+ // If the exponential parameter is negative force it to zero, which means a
+ // flat spectrum.
+ tmp_1_w32 = WebRtcSpl_DivW32W16(tmp_2_w32, matrix_determinant); // Q14
+ inst->pinkNoiseExp += WEBRTC_SPL_SAT(16384, tmp_1_w32, 0); // Q14
+ }
+ }
+}
+
+void WebRtcNsx_DataSynthesis(NoiseSuppressionFixedC* inst, short* outFrame) {
+ int32_t energyOut;
+
+ int16_t realImag_buff[ANAL_BLOCKL_MAX * 2 + 16];
+ int16_t rfft_out_buff[ANAL_BLOCKL_MAX * 2 + 16];
+
+ // Align the structures to 32-byte boundary for the FFT function.
+ int16_t* realImag = (int16_t*) (((uintptr_t)realImag_buff + 31) & ~31);
+ int16_t* rfft_out = (int16_t*) (((uintptr_t) rfft_out_buff + 31) & ~31);
+
+ int16_t tmp16no1, tmp16no2;
+ int16_t energyRatio;
+ int16_t gainFactor, gainFactor1, gainFactor2;
+
+ size_t i;
+ int outCIFFT;
+ int scaleEnergyOut = 0;
+
+ if (inst->zeroInputSignal) {
+ // synthesize the special case of zero input
+ // read out fully processed segment
+ for (i = 0; i < inst->blockLen10ms; i++) {
+ outFrame[i] = inst->synthesisBuffer[i]; // Q0
+ }
+ // update synthesis buffer
+ memcpy(inst->synthesisBuffer, inst->synthesisBuffer + inst->blockLen10ms,
+ (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->synthesisBuffer));
+ WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer + inst->anaLen - inst->blockLen10ms,
+ inst->blockLen10ms);
+ return;
+ }
+
+ // Filter the data in the frequency domain, and create spectrum.
+ WebRtcNsx_PrepareSpectrum(inst, realImag);
+
+ // Inverse FFT output will be in rfft_out[].
+ outCIFFT = WebRtcSpl_RealInverseFFT(inst->real_fft, realImag, rfft_out);
+
+ WebRtcNsx_Denormalize(inst, rfft_out, outCIFFT);
+
+ //scale factor: only do it after END_STARTUP_LONG time
+ gainFactor = 8192; // 8192 = Q13(1.0)
+ if (inst->gainMap == 1 &&
+ inst->blockIndex > END_STARTUP_LONG &&
+ inst->energyIn > 0) {
+ // Q(-scaleEnergyOut)
+ energyOut = WebRtcSpl_Energy(inst->real, inst->anaLen, &scaleEnergyOut);
+ if (scaleEnergyOut == 0 && !(energyOut & 0x7f800000)) {
+ energyOut = WEBRTC_SPL_SHIFT_W32(energyOut, 8 + scaleEnergyOut
+ - inst->scaleEnergyIn);
+ } else {
+ // |energyIn| is currently in Q(|scaleEnergyIn|), but to later on end up
+ // with an |energyRatio| in Q8 we need to change the Q-domain to
+ // Q(-8-scaleEnergyOut).
+ inst->energyIn >>= 8 + scaleEnergyOut - inst->scaleEnergyIn;
+ }
+
+ assert(inst->energyIn > 0);
+ energyRatio = (energyOut + inst->energyIn / 2) / inst->energyIn; // Q8
+ // Limit the ratio to [0, 1] in Q8, i.e., [0, 256]
+ energyRatio = WEBRTC_SPL_SAT(256, energyRatio, 0);
+
+ // all done in lookup tables now
+ assert(energyRatio < 257);
+ gainFactor1 = kFactor1Table[energyRatio]; // Q8
+ gainFactor2 = inst->factor2Table[energyRatio]; // Q8
+
+ //combine both scales with speech/noise prob: note prior (priorSpeechProb) is not frequency dependent
+
+ // factor = inst->priorSpeechProb*factor1 + (1.0-inst->priorSpeechProb)*factor2; // original code
+ tmp16no1 = (int16_t)(((16384 - inst->priorNonSpeechProb) * gainFactor1) >>
+ 14); // in Q13, where 16384 = Q14(1.0)
+ tmp16no2 = (int16_t)((inst->priorNonSpeechProb * gainFactor2) >> 14);
+ gainFactor = tmp16no1 + tmp16no2; // Q13
+ } // out of flag_gain_map==1
+
+ // Synthesis, read out fully processed segment, and update synthesis buffer.
+ WebRtcNsx_SynthesisUpdate(inst, outFrame, gainFactor);
+}
+
+void WebRtcNsx_ProcessCore(NoiseSuppressionFixedC* inst,
+ const short* const* speechFrame,
+ int num_bands,
+ short* const* outFrame) {
+ // main routine for noise suppression
+
+ uint32_t tmpU32no1, tmpU32no2, tmpU32no3;
+ uint32_t satMax, maxNoiseU32;
+ uint32_t tmpMagnU32, tmpNoiseU32;
+ uint32_t nearMagnEst;
+ uint32_t noiseUpdateU32;
+ uint32_t noiseU32[HALF_ANAL_BLOCKL];
+ uint32_t postLocSnr[HALF_ANAL_BLOCKL];
+ uint32_t priorLocSnr[HALF_ANAL_BLOCKL];
+ uint32_t prevNearSnr[HALF_ANAL_BLOCKL];
+ uint32_t curNearSnr;
+ uint32_t priorSnr;
+ uint32_t noise_estimate = 0;
+ uint32_t noise_estimate_avg = 0;
+ uint32_t numerator = 0;
+
+ int32_t tmp32no1, tmp32no2;
+ int32_t pink_noise_num_avg = 0;
+
+ uint16_t tmpU16no1;
+ uint16_t magnU16[HALF_ANAL_BLOCKL];
+ uint16_t prevNoiseU16[HALF_ANAL_BLOCKL];
+ uint16_t nonSpeechProbFinal[HALF_ANAL_BLOCKL];
+ uint16_t gammaNoise, prevGammaNoise;
+ uint16_t noiseSupFilterTmp[HALF_ANAL_BLOCKL];
+
+ int16_t qMagn, qNoise;
+ int16_t avgProbSpeechHB, gainModHB, avgFilterGainHB, gainTimeDomainHB;
+ int16_t pink_noise_exp_avg = 0;
+
+ size_t i, j;
+ int nShifts, postShifts;
+ int norm32no1, norm32no2;
+ int flag, sign;
+ int q_domain_to_use = 0;
+
+ // Code for ARMv7-Neon platform assumes the following:
+ assert(inst->anaLen > 0);
+ assert(inst->anaLen2 > 0);
+ assert(inst->anaLen % 16 == 0);
+ assert(inst->anaLen2 % 8 == 0);
+ assert(inst->blockLen10ms > 0);
+ assert(inst->blockLen10ms % 16 == 0);
+ assert(inst->magnLen == inst->anaLen2 + 1);
+
+#ifdef NS_FILEDEBUG
+ if (fwrite(spframe, sizeof(short),
+ inst->blockLen10ms, inst->infile) != inst->blockLen10ms) {
+ assert(false);
+ }
+#endif
+
+ // Check that initialization has been done
+ assert(inst->initFlag == 1);
+ assert((num_bands - 1) <= NUM_HIGH_BANDS_MAX);
+
+ const short* const* speechFrameHB = NULL;
+ short* const* outFrameHB = NULL;
+ size_t num_high_bands = 0;
+ if (num_bands > 1) {
+ speechFrameHB = &speechFrame[1];
+ outFrameHB = &outFrame[1];
+ num_high_bands = (size_t)(num_bands - 1);
+ }
+
+ // Store speechFrame and transform to frequency domain
+ WebRtcNsx_DataAnalysis(inst, (short*)speechFrame[0], magnU16);
+
+ if (inst->zeroInputSignal) {
+ WebRtcNsx_DataSynthesis(inst, outFrame[0]);
+
+ if (num_bands > 1) {
+ // update analysis buffer for H band
+ // append new data to buffer FX
+ for (i = 0; i < num_high_bands; ++i) {
+ int block_shift = inst->anaLen - inst->blockLen10ms;
+ memcpy(inst->dataBufHBFX[i], inst->dataBufHBFX[i] + inst->blockLen10ms,
+ block_shift * sizeof(*inst->dataBufHBFX[i]));
+ memcpy(inst->dataBufHBFX[i] + block_shift, speechFrameHB[i],
+ inst->blockLen10ms * sizeof(*inst->dataBufHBFX[i]));
+ for (j = 0; j < inst->blockLen10ms; j++) {
+ outFrameHB[i][j] = inst->dataBufHBFX[i][j]; // Q0
+ }
+ }
+ } // end of H band gain computation
+ return;
+ }
+
+ // Update block index when we have something to process
+ inst->blockIndex++;
+ //
+
+ // Norm of magn
+ qMagn = inst->normData - inst->stages;
+
+ // Compute spectral flatness on input spectrum
+ WebRtcNsx_ComputeSpectralFlatness(inst, magnU16);
+
+ // quantile noise estimate
+ WebRtcNsx_NoiseEstimation(inst, magnU16, noiseU32, &qNoise);
+
+ //noise estimate from previous frame
+ for (i = 0; i < inst->magnLen; i++) {
+ prevNoiseU16[i] = (uint16_t)(inst->prevNoiseU32[i] >> 11); // Q(prevQNoise)
+ }
+
+ if (inst->blockIndex < END_STARTUP_SHORT) {
+ // Noise Q-domain to be used later; see description at end of section.
+ q_domain_to_use = WEBRTC_SPL_MIN((int)qNoise, inst->minNorm - inst->stages);
+
+ // Calculate frequency independent parts in parametric noise estimate and calculate
+ // the estimate for the lower frequency band (same values for all frequency bins)
+ if (inst->pinkNoiseExp) {
+ pink_noise_exp_avg = (int16_t)WebRtcSpl_DivW32W16(inst->pinkNoiseExp,
+ (int16_t)(inst->blockIndex + 1)); // Q14
+ pink_noise_num_avg = WebRtcSpl_DivW32W16(inst->pinkNoiseNumerator,
+ (int16_t)(inst->blockIndex + 1)); // Q11
+ WebRtcNsx_CalcParametricNoiseEstimate(inst,
+ pink_noise_exp_avg,
+ pink_noise_num_avg,
+ kStartBand,
+ &noise_estimate,
+ &noise_estimate_avg);
+ } else {
+ // Use white noise estimate if we have poor pink noise parameter estimates
+ noise_estimate = inst->whiteNoiseLevel; // Q(minNorm-stages)
+ noise_estimate_avg = noise_estimate / (inst->blockIndex + 1); // Q(minNorm-stages)
+ }
+ for (i = 0; i < inst->magnLen; i++) {
+ // Estimate the background noise using the pink noise parameters if permitted
+ if ((inst->pinkNoiseExp) && (i >= kStartBand)) {
+ // Reset noise_estimate
+ noise_estimate = 0;
+ noise_estimate_avg = 0;
+ // Calculate the parametric noise estimate for current frequency bin
+ WebRtcNsx_CalcParametricNoiseEstimate(inst,
+ pink_noise_exp_avg,
+ pink_noise_num_avg,
+ i,
+ &noise_estimate,
+ &noise_estimate_avg);
+ }
+ // Calculate parametric Wiener filter
+ noiseSupFilterTmp[i] = inst->denoiseBound;
+ if (inst->initMagnEst[i]) {
+ // numerator = (initMagnEst - noise_estimate * overdrive)
+ // Result in Q(8+minNorm-stages)
+ tmpU32no1 = WEBRTC_SPL_UMUL_32_16(noise_estimate, inst->overdrive);
+ numerator = inst->initMagnEst[i] << 8;
+ if (numerator > tmpU32no1) {
+ // Suppression filter coefficient larger than zero, so calculate.
+ numerator -= tmpU32no1;
+
+ // Determine number of left shifts in numerator for best accuracy after
+ // division
+ nShifts = WebRtcSpl_NormU32(numerator);
+ nShifts = WEBRTC_SPL_SAT(6, nShifts, 0);
+
+ // Shift numerator to Q(nShifts+8+minNorm-stages)
+ numerator <<= nShifts;
+
+ // Shift denominator to Q(nShifts-6+minNorm-stages)
+ tmpU32no1 = inst->initMagnEst[i] >> (6 - nShifts);
+ if (tmpU32no1 == 0) {
+ // This is only possible if numerator = 0, in which case
+ // we don't need any division.
+ tmpU32no1 = 1;
+ }
+ tmpU32no2 = numerator / tmpU32no1; // Q14
+ noiseSupFilterTmp[i] = (uint16_t)WEBRTC_SPL_SAT(16384, tmpU32no2,
+ (uint32_t)(inst->denoiseBound)); // Q14
+ }
+ }
+ // Weight quantile noise 'noiseU32' with modeled noise 'noise_estimate_avg'
+ // 'noiseU32 is in Q(qNoise) and 'noise_estimate' in Q(minNorm-stages)
+ // To guarantee that we do not get wrap around when shifting to the same domain
+ // we use the lowest one. Furthermore, we need to save 6 bits for the weighting.
+ // 'noise_estimate_avg' can handle this operation by construction, but 'noiseU32'
+ // may not.
+
+ // Shift 'noiseU32' to 'q_domain_to_use'
+ tmpU32no1 = noiseU32[i] >> (qNoise - q_domain_to_use);
+ // Shift 'noise_estimate_avg' to 'q_domain_to_use'
+ tmpU32no2 = noise_estimate_avg >>
+ (inst->minNorm - inst->stages - q_domain_to_use);
+ // Make a simple check to see if we have enough room for weighting 'tmpU32no1'
+ // without wrap around
+ nShifts = 0;
+ if (tmpU32no1 & 0xfc000000) {
+ tmpU32no1 >>= 6;
+ tmpU32no2 >>= 6;
+ nShifts = 6;
+ }
+ tmpU32no1 *= inst->blockIndex;
+ tmpU32no2 *= (END_STARTUP_SHORT - inst->blockIndex);
+ // Add them together and divide by startup length
+ noiseU32[i] = WebRtcSpl_DivU32U16(tmpU32no1 + tmpU32no2, END_STARTUP_SHORT);
+ // Shift back if necessary
+ noiseU32[i] <<= nShifts;
+ }
+ // Update new Q-domain for 'noiseU32'
+ qNoise = q_domain_to_use;
+ }
+ // compute average signal during END_STARTUP_LONG time:
+ // used to normalize spectral difference measure
+ if (inst->blockIndex < END_STARTUP_LONG) {
+ // substituting division with shift ending up in Q(-2*stages)
+ inst->timeAvgMagnEnergyTmp +=
+ inst->magnEnergy >> (2 * inst->normData + inst->stages - 1);
+ inst->timeAvgMagnEnergy = WebRtcSpl_DivU32U16(inst->timeAvgMagnEnergyTmp,
+ inst->blockIndex + 1);
+ }
+
+ //start processing at frames == converged+1
+ // STEP 1: compute prior and post SNR based on quantile noise estimates
+
+ // compute direct decision (DD) estimate of prior SNR: needed for new method
+ satMax = (uint32_t)1048575;// Largest possible value without getting overflow despite shifting 12 steps
+ postShifts = 6 + qMagn - qNoise;
+ nShifts = 5 - inst->prevQMagn + inst->prevQNoise;
+ for (i = 0; i < inst->magnLen; i++) {
+ // FLOAT:
+ // post SNR
+ // postLocSnr[i] = 0.0;
+ // if (magn[i] > noise[i])
+ // {
+ // postLocSnr[i] = magn[i] / (noise[i] + 0.0001);
+ // }
+ // // previous post SNR
+ // // previous estimate: based on previous frame with gain filter (smooth is previous filter)
+ //
+ // prevNearSnr[i] = inst->prevMagnU16[i] / (inst->noisePrev[i] + 0.0001) * (inst->smooth[i]);
+ //
+ // // DD estimate is sum of two terms: current estimate and previous estimate
+ // // directed decision update of priorSnr (or we actually store [2*priorSnr+1])
+ //
+ // priorLocSnr[i] = DD_PR_SNR * prevNearSnr[i] + (1.0 - DD_PR_SNR) * (postLocSnr[i] - 1.0);
+
+ // calculate post SNR: output in Q11
+ postLocSnr[i] = 2048; // 1.0 in Q11
+ tmpU32no1 = (uint32_t)magnU16[i] << 6; // Q(6+qMagn)
+ if (postShifts < 0) {
+ tmpU32no2 = noiseU32[i] >> -postShifts; // Q(6+qMagn)
+ } else {
+ tmpU32no2 = noiseU32[i] << postShifts; // Q(6+qMagn)
+ }
+ if (tmpU32no1 > tmpU32no2) {
+ // Current magnitude larger than noise
+ tmpU32no1 <<= 11; // Q(17+qMagn)
+ if (tmpU32no2 > 0) {
+ tmpU32no1 /= tmpU32no2; // Q11
+ postLocSnr[i] = WEBRTC_SPL_MIN(satMax, tmpU32no1); // Q11
+ } else {
+ postLocSnr[i] = satMax;
+ }
+ }
+
+ // calculate prevNearSnr[i] and save for later instead of recalculating it later
+ // |nearMagnEst| in Q(prevQMagn + 14)
+ nearMagnEst = inst->prevMagnU16[i] * inst->noiseSupFilter[i];
+ tmpU32no1 = nearMagnEst << 3; // Q(prevQMagn+17)
+ tmpU32no2 = inst->prevNoiseU32[i] >> nShifts; // Q(prevQMagn+6)
+
+ if (tmpU32no2 > 0) {
+ tmpU32no1 /= tmpU32no2; // Q11
+ tmpU32no1 = WEBRTC_SPL_MIN(satMax, tmpU32no1); // Q11
+ } else {
+ tmpU32no1 = satMax; // Q11
+ }
+ prevNearSnr[i] = tmpU32no1; // Q11
+
+ //directed decision update of priorSnr
+ tmpU32no1 = WEBRTC_SPL_UMUL_32_16(prevNearSnr[i], DD_PR_SNR_Q11); // Q22
+ tmpU32no2 = WEBRTC_SPL_UMUL_32_16(postLocSnr[i] - 2048, ONE_MINUS_DD_PR_SNR_Q11); // Q22
+ priorSnr = tmpU32no1 + tmpU32no2 + 512; // Q22 (added 512 for rounding)
+ // priorLocSnr = 1 + 2*priorSnr
+ priorLocSnr[i] = 2048 + (priorSnr >> 10); // Q11
+ } // end of loop over frequencies
+ // done with step 1: DD computation of prior and post SNR
+
+ // STEP 2: compute speech/noise likelihood
+
+ //compute difference of input spectrum with learned/estimated noise spectrum
+ WebRtcNsx_ComputeSpectralDifference(inst, magnU16);
+ //compute histograms for determination of parameters (thresholds and weights for features)
+ //parameters are extracted once every window time (=inst->modelUpdate)
+ //counter update
+ inst->cntThresUpdate++;
+ flag = (int)(inst->cntThresUpdate == inst->modelUpdate);
+ //update histogram
+ WebRtcNsx_FeatureParameterExtraction(inst, flag);
+ //compute model parameters
+ if (flag) {
+ inst->cntThresUpdate = 0; // Reset counter
+ //update every window:
+ // get normalization for spectral difference for next window estimate
+
+ // Shift to Q(-2*stages)
+ inst->curAvgMagnEnergy >>= STAT_UPDATES;
+
+ tmpU32no1 = (inst->curAvgMagnEnergy + inst->timeAvgMagnEnergy + 1) >> 1; //Q(-2*stages)
+ // Update featureSpecDiff
+ if ((tmpU32no1 != inst->timeAvgMagnEnergy) && (inst->featureSpecDiff) &&
+ (inst->timeAvgMagnEnergy > 0)) {
+ norm32no1 = 0;
+ tmpU32no3 = tmpU32no1;
+ while (0xFFFF0000 & tmpU32no3) {
+ tmpU32no3 >>= 1;
+ norm32no1++;
+ }
+ tmpU32no2 = inst->featureSpecDiff;
+ while (0xFFFF0000 & tmpU32no2) {
+ tmpU32no2 >>= 1;
+ norm32no1++;
+ }
+ tmpU32no3 = WEBRTC_SPL_UMUL(tmpU32no3, tmpU32no2);
+ tmpU32no3 /= inst->timeAvgMagnEnergy;
+ if (WebRtcSpl_NormU32(tmpU32no3) < norm32no1) {
+ inst->featureSpecDiff = 0x007FFFFF;
+ } else {
+ inst->featureSpecDiff = WEBRTC_SPL_MIN(0x007FFFFF,
+ tmpU32no3 << norm32no1);
+ }
+ }
+
+ inst->timeAvgMagnEnergy = tmpU32no1; // Q(-2*stages)
+ inst->curAvgMagnEnergy = 0;
+ }
+
+ //compute speech/noise probability
+ WebRtcNsx_SpeechNoiseProb(inst, nonSpeechProbFinal, priorLocSnr, postLocSnr);
+
+ //time-avg parameter for noise update
+ gammaNoise = NOISE_UPDATE_Q8; // Q8
+
+ maxNoiseU32 = 0;
+ postShifts = inst->prevQNoise - qMagn;
+ nShifts = inst->prevQMagn - qMagn;
+ for (i = 0; i < inst->magnLen; i++) {
+ // temporary noise update: use it for speech frames if update value is less than previous
+ // the formula has been rewritten into:
+ // noiseUpdate = noisePrev[i] + (1 - gammaNoise) * nonSpeechProb * (magn[i] - noisePrev[i])
+
+ if (postShifts < 0) {
+ tmpU32no2 = magnU16[i] >> -postShifts; // Q(prevQNoise)
+ } else {
+ tmpU32no2 = (uint32_t)magnU16[i] << postShifts; // Q(prevQNoise)
+ }
+ if (prevNoiseU16[i] > tmpU32no2) {
+ sign = -1;
+ tmpU32no1 = prevNoiseU16[i] - tmpU32no2;
+ } else {
+ sign = 1;
+ tmpU32no1 = tmpU32no2 - prevNoiseU16[i];
+ }
+ noiseUpdateU32 = inst->prevNoiseU32[i]; // Q(prevQNoise+11)
+ tmpU32no3 = 0;
+ if ((tmpU32no1) && (nonSpeechProbFinal[i])) {
+ // This value will be used later, if gammaNoise changes
+ tmpU32no3 = WEBRTC_SPL_UMUL_32_16(tmpU32no1, nonSpeechProbFinal[i]); // Q(prevQNoise+8)
+ if (0x7c000000 & tmpU32no3) {
+ // Shifting required before multiplication
+ tmpU32no2 = (tmpU32no3 >> 5) * gammaNoise; // Q(prevQNoise+11)
+ } else {
+ // We can do shifting after multiplication
+ tmpU32no2 = (tmpU32no3 * gammaNoise) >> 5; // Q(prevQNoise+11)
+ }
+ if (sign > 0) {
+ noiseUpdateU32 += tmpU32no2; // Q(prevQNoise+11)
+ } else {
+ // This operation is safe. We can never get wrap around, since worst
+ // case scenario means magnU16 = 0
+ noiseUpdateU32 -= tmpU32no2; // Q(prevQNoise+11)
+ }
+ }
+
+ //increase gamma (i.e., less noise update) for frame likely to be speech
+ prevGammaNoise = gammaNoise;
+ gammaNoise = NOISE_UPDATE_Q8;
+ //time-constant based on speech/noise state
+ //increase gamma (i.e., less noise update) for frames likely to be speech
+ if (nonSpeechProbFinal[i] < ONE_MINUS_PROB_RANGE_Q8) {
+ gammaNoise = GAMMA_NOISE_TRANS_AND_SPEECH_Q8;
+ }
+
+ if (prevGammaNoise != gammaNoise) {
+ // new noise update
+ // this line is the same as above, only that the result is stored in a different variable and the gammaNoise
+ // has changed
+ //
+ // noiseUpdate = noisePrev[i] + (1 - gammaNoise) * nonSpeechProb * (magn[i] - noisePrev[i])
+
+ if (0x7c000000 & tmpU32no3) {
+ // Shifting required before multiplication
+ tmpU32no2 = (tmpU32no3 >> 5) * gammaNoise; // Q(prevQNoise+11)
+ } else {
+ // We can do shifting after multiplication
+ tmpU32no2 = (tmpU32no3 * gammaNoise) >> 5; // Q(prevQNoise+11)
+ }
+ if (sign > 0) {
+ tmpU32no1 = inst->prevNoiseU32[i] + tmpU32no2; // Q(prevQNoise+11)
+ } else {
+ tmpU32no1 = inst->prevNoiseU32[i] - tmpU32no2; // Q(prevQNoise+11)
+ }
+ if (noiseUpdateU32 > tmpU32no1) {
+ noiseUpdateU32 = tmpU32no1; // Q(prevQNoise+11)
+ }
+ }
+ noiseU32[i] = noiseUpdateU32; // Q(prevQNoise+11)
+ if (noiseUpdateU32 > maxNoiseU32) {
+ maxNoiseU32 = noiseUpdateU32;
+ }
+
+ // conservative noise update
+ // // original FLOAT code
+ // if (prob_speech < PROB_RANGE) {
+ // inst->avgMagnPause[i] = inst->avgMagnPause[i] + (1.0 - gamma_pause)*(magn[i] - inst->avgMagnPause[i]);
+ // }
+
+ tmp32no2 = WEBRTC_SPL_SHIFT_W32(inst->avgMagnPause[i], -nShifts);
+ if (nonSpeechProbFinal[i] > ONE_MINUS_PROB_RANGE_Q8) {
+ if (nShifts < 0) {
+ tmp32no1 = (int32_t)magnU16[i] - tmp32no2; // Q(qMagn)
+ tmp32no1 *= ONE_MINUS_GAMMA_PAUSE_Q8; // Q(8+prevQMagn+nShifts)
+ tmp32no1 = (tmp32no1 + 128) >> 8; // Q(qMagn).
+ } else {
+ // In Q(qMagn+nShifts)
+ tmp32no1 = ((int32_t)magnU16[i] << nShifts) - inst->avgMagnPause[i];
+ tmp32no1 *= ONE_MINUS_GAMMA_PAUSE_Q8; // Q(8+prevQMagn+nShifts)
+ tmp32no1 = (tmp32no1 + (128 << nShifts)) >> (8 + nShifts); // Q(qMagn).
+ }
+ tmp32no2 += tmp32no1; // Q(qMagn)
+ }
+ inst->avgMagnPause[i] = tmp32no2;
+ } // end of frequency loop
+
+ norm32no1 = WebRtcSpl_NormU32(maxNoiseU32);
+ qNoise = inst->prevQNoise + norm32no1 - 5;
+ // done with step 2: noise update
+
+ // STEP 3: compute dd update of prior snr and post snr based on new noise estimate
+ nShifts = inst->prevQNoise + 11 - qMagn;
+ for (i = 0; i < inst->magnLen; i++) {
+ // FLOAT code
+ // // post and prior SNR
+ // curNearSnr = 0.0;
+ // if (magn[i] > noise[i])
+ // {
+ // curNearSnr = magn[i] / (noise[i] + 0.0001) - 1.0;
+ // }
+ // // DD estimate is sum of two terms: current estimate and previous estimate
+ // // directed decision update of snrPrior
+ // snrPrior = DD_PR_SNR * prevNearSnr[i] + (1.0 - DD_PR_SNR) * curNearSnr;
+ // // gain filter
+ // tmpFloat1 = inst->overdrive + snrPrior;
+ // tmpFloat2 = snrPrior / tmpFloat1;
+ // theFilter[i] = tmpFloat2;
+
+ // calculate curNearSnr again, this is necessary because a new noise estimate has been made since then. for the original
+ curNearSnr = 0; // Q11
+ if (nShifts < 0) {
+ // This case is equivalent with magn < noise which implies curNearSnr = 0;
+ tmpMagnU32 = (uint32_t)magnU16[i]; // Q(qMagn)
+ tmpNoiseU32 = noiseU32[i] << -nShifts; // Q(qMagn)
+ } else if (nShifts > 17) {
+ tmpMagnU32 = (uint32_t)magnU16[i] << 17; // Q(qMagn+17)
+ tmpNoiseU32 = noiseU32[i] >> (nShifts - 17); // Q(qMagn+17)
+ } else {
+ tmpMagnU32 = (uint32_t)magnU16[i] << nShifts; // Q(qNoise_prev+11)
+ tmpNoiseU32 = noiseU32[i]; // Q(qNoise_prev+11)
+ }
+ if (tmpMagnU32 > tmpNoiseU32) {
+ tmpU32no1 = tmpMagnU32 - tmpNoiseU32; // Q(qCur)
+ norm32no2 = WEBRTC_SPL_MIN(11, WebRtcSpl_NormU32(tmpU32no1));
+ tmpU32no1 <<= norm32no2; // Q(qCur+norm32no2)
+ tmpU32no2 = tmpNoiseU32 >> (11 - norm32no2); // Q(qCur+norm32no2-11)
+ if (tmpU32no2 > 0) {
+ tmpU32no1 /= tmpU32no2; // Q11
+ }
+ curNearSnr = WEBRTC_SPL_MIN(satMax, tmpU32no1); // Q11
+ }
+
+ //directed decision update of priorSnr
+ // FLOAT
+ // priorSnr = DD_PR_SNR * prevNearSnr + (1.0-DD_PR_SNR) * curNearSnr;
+
+ tmpU32no1 = WEBRTC_SPL_UMUL_32_16(prevNearSnr[i], DD_PR_SNR_Q11); // Q22
+ tmpU32no2 = WEBRTC_SPL_UMUL_32_16(curNearSnr, ONE_MINUS_DD_PR_SNR_Q11); // Q22
+ priorSnr = tmpU32no1 + tmpU32no2; // Q22
+
+ //gain filter
+ tmpU32no1 = inst->overdrive + ((priorSnr + 8192) >> 14); // Q8
+ assert(inst->overdrive > 0);
+ tmpU16no1 = (priorSnr + tmpU32no1 / 2) / tmpU32no1; // Q14
+ inst->noiseSupFilter[i] = WEBRTC_SPL_SAT(16384, tmpU16no1, inst->denoiseBound); // 16384 = Q14(1.0) // Q14
+
+ // Weight in the parametric Wiener filter during startup
+ if (inst->blockIndex < END_STARTUP_SHORT) {
+ // Weight the two suppression filters
+ tmpU32no1 = inst->noiseSupFilter[i] * inst->blockIndex;
+ tmpU32no2 = noiseSupFilterTmp[i] *
+ (END_STARTUP_SHORT - inst->blockIndex);
+ tmpU32no1 += tmpU32no2;
+ inst->noiseSupFilter[i] = (uint16_t)WebRtcSpl_DivU32U16(tmpU32no1,
+ END_STARTUP_SHORT);
+ }
+ } // end of loop over frequencies
+ //done with step3
+
+ // save noise and magnitude spectrum for next frame
+ inst->prevQNoise = qNoise;
+ inst->prevQMagn = qMagn;
+ if (norm32no1 > 5) {
+ for (i = 0; i < inst->magnLen; i++) {
+ inst->prevNoiseU32[i] = noiseU32[i] << (norm32no1 - 5); // Q(qNoise+11)
+ inst->prevMagnU16[i] = magnU16[i]; // Q(qMagn)
+ }
+ } else {
+ for (i = 0; i < inst->magnLen; i++) {
+ inst->prevNoiseU32[i] = noiseU32[i] >> (5 - norm32no1); // Q(qNoise+11)
+ inst->prevMagnU16[i] = magnU16[i]; // Q(qMagn)
+ }
+ }
+
+ WebRtcNsx_DataSynthesis(inst, outFrame[0]);
+#ifdef NS_FILEDEBUG
+ if (fwrite(outframe, sizeof(short),
+ inst->blockLen10ms, inst->outfile) != inst->blockLen10ms) {
+ assert(false);
+ }
+#endif
+
+ //for H band:
+ // only update data buffer, then apply time-domain gain is applied derived from L band
+ if (num_bands > 1) {
+ // update analysis buffer for H band
+ // append new data to buffer FX
+ for (i = 0; i < num_high_bands; ++i) {
+ memcpy(inst->dataBufHBFX[i], inst->dataBufHBFX[i] + inst->blockLen10ms,
+ (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->dataBufHBFX[i]));
+ memcpy(inst->dataBufHBFX[i] + inst->anaLen - inst->blockLen10ms,
+ speechFrameHB[i], inst->blockLen10ms * sizeof(*inst->dataBufHBFX[i]));
+ }
+ // range for averaging low band quantities for H band gain
+
+ gainTimeDomainHB = 16384; // 16384 = Q14(1.0)
+ //average speech prob from low band
+ //average filter gain from low band
+ //avg over second half (i.e., 4->8kHz) of freq. spectrum
+ tmpU32no1 = 0; // Q12
+ tmpU16no1 = 0; // Q8
+ for (i = inst->anaLen2 - (inst->anaLen2 >> 2); i < inst->anaLen2; i++) {
+ tmpU16no1 += nonSpeechProbFinal[i]; // Q8
+ tmpU32no1 += (uint32_t)(inst->noiseSupFilter[i]); // Q14
+ }
+ assert(inst->stages >= 7);
+ avgProbSpeechHB = (4096 - (tmpU16no1 >> (inst->stages - 7))); // Q12
+ avgFilterGainHB = (int16_t)(tmpU32no1 >> (inst->stages - 3)); // Q14
+
+ // // original FLOAT code
+ // // gain based on speech probability:
+ // avg_prob_speech_tt=(float)2.0*avg_prob_speech-(float)1.0;
+ // gain_mod=(float)0.5*((float)1.0+(float)tanh(avg_prob_speech_tt)); // between 0 and 1
+
+ // gain based on speech probability:
+ // original expression: "0.5 * (1 + tanh(2x-1))"
+ // avgProbSpeechHB has been anyway saturated to a value between 0 and 1 so the other cases don't have to be dealt with
+ // avgProbSpeechHB and gainModHB are in Q12, 3607 = Q12(0.880615234375) which is a zero point of
+ // |0.5 * (1 + tanh(2x-1)) - x| - |0.5 * (1 + tanh(2x-1)) - 0.880615234375| meaning that from that point the error of approximating
+ // the expression with f(x) = x would be greater than the error of approximating the expression with f(x) = 0.880615234375
+ // error: "|0.5 * (1 + tanh(2x-1)) - x| from x=0 to 0.880615234375" -> http://www.wolframalpha.com/input/?i=|0.5+*+(1+%2B+tanh(2x-1))+-+x|+from+x%3D0+to+0.880615234375
+ // and: "|0.5 * (1 + tanh(2x-1)) - 0.880615234375| from x=0.880615234375 to 1" -> http://www.wolframalpha.com/input/?i=+|0.5+*+(1+%2B+tanh(2x-1))+-+0.880615234375|+from+x%3D0.880615234375+to+1
+ gainModHB = WEBRTC_SPL_MIN(avgProbSpeechHB, 3607);
+
+ // // original FLOAT code
+ // //combine gain with low band gain
+ // if (avg_prob_speech < (float)0.5) {
+ // gain_time_domain_HB=(float)0.5*gain_mod+(float)0.5*avg_filter_gain;
+ // }
+ // else {
+ // gain_time_domain_HB=(float)0.25*gain_mod+(float)0.75*avg_filter_gain;
+ // }
+
+
+ //combine gain with low band gain
+ if (avgProbSpeechHB < 2048) {
+ // 2048 = Q12(0.5)
+ // the next two lines in float are "gain_time_domain = 0.5 * gain_mod + 0.5 * avg_filter_gain"; Q2(0.5) = 2 equals one left shift
+ gainTimeDomainHB = (gainModHB << 1) + (avgFilterGainHB >> 1); // Q14
+ } else {
+ // "gain_time_domain = 0.25 * gain_mod + 0.75 * agv_filter_gain;"
+ gainTimeDomainHB = (int16_t)((3 * avgFilterGainHB) >> 2); // 3 = Q2(0.75)
+ gainTimeDomainHB += gainModHB; // Q14
+ }
+ //make sure gain is within flooring range
+ gainTimeDomainHB
+ = WEBRTC_SPL_SAT(16384, gainTimeDomainHB, (int16_t)(inst->denoiseBound)); // 16384 = Q14(1.0)
+
+
+ //apply gain
+ for (i = 0; i < num_high_bands; ++i) {
+ for (j = 0; j < inst->blockLen10ms; j++) {
+ outFrameHB[i][j] = (int16_t)((gainTimeDomainHB *
+ inst->dataBufHBFX[i][j]) >> 14); // Q0
+ }
+ }
+ } // end of H band gain computation
+}
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core.h b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core.h
new file mode 100644
index 00000000..f463dbbe
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core.h
@@ -0,0 +1,263 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_CORE_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_CORE_H_
+
+#ifdef NS_FILEDEBUG
+#include <stdio.h>
+#endif
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_processing/ns/nsx_defines.h"
+#include "webrtc/typedefs.h"
+
+typedef struct NoiseSuppressionFixedC_ {
+ uint32_t fs;
+
+ const int16_t* window;
+ int16_t analysisBuffer[ANAL_BLOCKL_MAX];
+ int16_t synthesisBuffer[ANAL_BLOCKL_MAX];
+ uint16_t noiseSupFilter[HALF_ANAL_BLOCKL];
+ uint16_t overdrive; /* Q8 */
+ uint16_t denoiseBound; /* Q14 */
+ const int16_t* factor2Table;
+ int16_t noiseEstLogQuantile[SIMULT* HALF_ANAL_BLOCKL];
+ int16_t noiseEstDensity[SIMULT* HALF_ANAL_BLOCKL];
+ int16_t noiseEstCounter[SIMULT];
+ int16_t noiseEstQuantile[HALF_ANAL_BLOCKL];
+
+ size_t anaLen;
+ size_t anaLen2;
+ size_t magnLen;
+ int aggrMode;
+ int stages;
+ int initFlag;
+ int gainMap;
+
+ int32_t maxLrt;
+ int32_t minLrt;
+ // Log LRT factor with time-smoothing in Q8.
+ int32_t logLrtTimeAvgW32[HALF_ANAL_BLOCKL];
+ int32_t featureLogLrt;
+ int32_t thresholdLogLrt;
+ int16_t weightLogLrt;
+
+ uint32_t featureSpecDiff;
+ uint32_t thresholdSpecDiff;
+ int16_t weightSpecDiff;
+
+ uint32_t featureSpecFlat;
+ uint32_t thresholdSpecFlat;
+ int16_t weightSpecFlat;
+
+ // Conservative estimate of noise spectrum.
+ int32_t avgMagnPause[HALF_ANAL_BLOCKL];
+ uint32_t magnEnergy;
+ uint32_t sumMagn;
+ uint32_t curAvgMagnEnergy;
+ uint32_t timeAvgMagnEnergy;
+ uint32_t timeAvgMagnEnergyTmp;
+
+ uint32_t whiteNoiseLevel; // Initial noise estimate.
+ // Initial magnitude spectrum estimate.
+ uint32_t initMagnEst[HALF_ANAL_BLOCKL];
+ // Pink noise parameters:
+ int32_t pinkNoiseNumerator; // Numerator.
+ int32_t pinkNoiseExp; // Power of freq.
+ int minNorm; // Smallest normalization factor.
+ int zeroInputSignal; // Zero input signal flag.
+
+ // Noise spectrum from previous frame.
+ uint32_t prevNoiseU32[HALF_ANAL_BLOCKL];
+ // Magnitude spectrum from previous frame.
+ uint16_t prevMagnU16[HALF_ANAL_BLOCKL];
+ // Prior speech/noise probability in Q14.
+ int16_t priorNonSpeechProb;
+
+ int blockIndex; // Frame index counter.
+ // Parameter for updating or estimating thresholds/weights for prior model.
+ int modelUpdate;
+ int cntThresUpdate;
+
+ // Histograms for parameter estimation.
+ int16_t histLrt[HIST_PAR_EST];
+ int16_t histSpecFlat[HIST_PAR_EST];
+ int16_t histSpecDiff[HIST_PAR_EST];
+
+ // Quantities for high band estimate.
+ int16_t dataBufHBFX[NUM_HIGH_BANDS_MAX][ANAL_BLOCKL_MAX];
+
+ int qNoise;
+ int prevQNoise;
+ int prevQMagn;
+ size_t blockLen10ms;
+
+ int16_t real[ANAL_BLOCKL_MAX];
+ int16_t imag[ANAL_BLOCKL_MAX];
+ int32_t energyIn;
+ int scaleEnergyIn;
+ int normData;
+
+ struct RealFFT* real_fft;
+} NoiseSuppressionFixedC;
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/****************************************************************************
+ * WebRtcNsx_InitCore(...)
+ *
+ * This function initializes a noise suppression instance
+ *
+ * Input:
+ * - inst : Instance that should be initialized
+ * - fs : Sampling frequency
+ *
+ * Output:
+ * - inst : Initialized instance
+ *
+ * Return value : 0 - Ok
+ * -1 - Error
+ */
+int32_t WebRtcNsx_InitCore(NoiseSuppressionFixedC* inst, uint32_t fs);
+
+/****************************************************************************
+ * WebRtcNsx_set_policy_core(...)
+ *
+ * This changes the aggressiveness of the noise suppression method.
+ *
+ * Input:
+ * - inst : Instance that should be initialized
+ * - mode : 0: Mild (6 dB), 1: Medium (10 dB), 2: Aggressive (15 dB)
+ *
+ * Output:
+ * - inst : Initialized instance
+ *
+ * Return value : 0 - Ok
+ * -1 - Error
+ */
+int WebRtcNsx_set_policy_core(NoiseSuppressionFixedC* inst, int mode);
+
+/****************************************************************************
+ * WebRtcNsx_ProcessCore
+ *
+ * Do noise suppression.
+ *
+ * Input:
+ * - inst : Instance that should be initialized
+ * - inFrame : Input speech frame for each band
+ * - num_bands : Number of bands
+ *
+ * Output:
+ * - inst : Updated instance
+ * - outFrame : Output speech frame for each band
+ */
+void WebRtcNsx_ProcessCore(NoiseSuppressionFixedC* inst,
+ const short* const* inFrame,
+ int num_bands,
+ short* const* outFrame);
+
+/****************************************************************************
+ * Some function pointers, for internal functions shared by ARM NEON and
+ * generic C code.
+ */
+// Noise Estimation.
+typedef void (*NoiseEstimation)(NoiseSuppressionFixedC* inst,
+ uint16_t* magn,
+ uint32_t* noise,
+ int16_t* q_noise);
+extern NoiseEstimation WebRtcNsx_NoiseEstimation;
+
+// Filter the data in the frequency domain, and create spectrum.
+typedef void (*PrepareSpectrum)(NoiseSuppressionFixedC* inst,
+ int16_t* freq_buff);
+extern PrepareSpectrum WebRtcNsx_PrepareSpectrum;
+
+// For the noise supression process, synthesis, read out fully processed
+// segment, and update synthesis buffer.
+typedef void (*SynthesisUpdate)(NoiseSuppressionFixedC* inst,
+ int16_t* out_frame,
+ int16_t gain_factor);
+extern SynthesisUpdate WebRtcNsx_SynthesisUpdate;
+
+// Update analysis buffer for lower band, and window data before FFT.
+typedef void (*AnalysisUpdate)(NoiseSuppressionFixedC* inst,
+ int16_t* out,
+ int16_t* new_speech);
+extern AnalysisUpdate WebRtcNsx_AnalysisUpdate;
+
+// Denormalize the real-valued signal |in|, the output from inverse FFT.
+typedef void (*Denormalize)(NoiseSuppressionFixedC* inst,
+ int16_t* in,
+ int factor);
+extern Denormalize WebRtcNsx_Denormalize;
+
+// Normalize the real-valued signal |in|, the input to forward FFT.
+typedef void (*NormalizeRealBuffer)(NoiseSuppressionFixedC* inst,
+ const int16_t* in,
+ int16_t* out);
+extern NormalizeRealBuffer WebRtcNsx_NormalizeRealBuffer;
+
+// Compute speech/noise probability.
+// Intended to be private.
+void WebRtcNsx_SpeechNoiseProb(NoiseSuppressionFixedC* inst,
+ uint16_t* nonSpeechProbFinal,
+ uint32_t* priorLocSnr,
+ uint32_t* postLocSnr);
+
+#if (defined WEBRTC_DETECT_NEON || defined WEBRTC_HAS_NEON)
+// For the above function pointers, functions for generic platforms are declared
+// and defined as static in file nsx_core.c, while those for ARM Neon platforms
+// are declared below and defined in file nsx_core_neon.c.
+void WebRtcNsx_NoiseEstimationNeon(NoiseSuppressionFixedC* inst,
+ uint16_t* magn,
+ uint32_t* noise,
+ int16_t* q_noise);
+void WebRtcNsx_SynthesisUpdateNeon(NoiseSuppressionFixedC* inst,
+ int16_t* out_frame,
+ int16_t gain_factor);
+void WebRtcNsx_AnalysisUpdateNeon(NoiseSuppressionFixedC* inst,
+ int16_t* out,
+ int16_t* new_speech);
+void WebRtcNsx_PrepareSpectrumNeon(NoiseSuppressionFixedC* inst,
+ int16_t* freq_buff);
+#endif
+
+#if defined(MIPS32_LE)
+// For the above function pointers, functions for generic platforms are declared
+// and defined as static in file nsx_core.c, while those for MIPS platforms
+// are declared below and defined in file nsx_core_mips.c.
+void WebRtcNsx_SynthesisUpdate_mips(NoiseSuppressionFixedC* inst,
+ int16_t* out_frame,
+ int16_t gain_factor);
+void WebRtcNsx_AnalysisUpdate_mips(NoiseSuppressionFixedC* inst,
+ int16_t* out,
+ int16_t* new_speech);
+void WebRtcNsx_PrepareSpectrum_mips(NoiseSuppressionFixedC* inst,
+ int16_t* freq_buff);
+void WebRtcNsx_NormalizeRealBuffer_mips(NoiseSuppressionFixedC* inst,
+ const int16_t* in,
+ int16_t* out);
+#if defined(MIPS_DSP_R1_LE)
+void WebRtcNsx_Denormalize_mips(NoiseSuppressionFixedC* inst,
+ int16_t* in,
+ int factor);
+#endif
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_CORE_H_
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core_c.c b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core_c.c
new file mode 100644
index 00000000..14322d38
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core_c.c
@@ -0,0 +1,261 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "webrtc/modules/audio_processing/ns/include/noise_suppression_x.h"
+#include "webrtc/modules/audio_processing/ns/nsx_core.h"
+#include "webrtc/modules/audio_processing/ns/nsx_defines.h"
+
+static const int16_t kIndicatorTable[17] = {
+ 0, 2017, 3809, 5227, 6258, 6963, 7424, 7718,
+ 7901, 8014, 8084, 8126, 8152, 8168, 8177, 8183, 8187
+};
+
+// Compute speech/noise probability
+// speech/noise probability is returned in: probSpeechFinal
+//snrLocPrior is the prior SNR for each frequency (in Q11)
+//snrLocPost is the post SNR for each frequency (in Q11)
+void WebRtcNsx_SpeechNoiseProb(NoiseSuppressionFixedC* inst,
+ uint16_t* nonSpeechProbFinal,
+ uint32_t* priorLocSnr,
+ uint32_t* postLocSnr) {
+ uint32_t zeros, num, den, tmpU32no1, tmpU32no2, tmpU32no3;
+ int32_t invLrtFX, indPriorFX, tmp32, tmp32no1, tmp32no2, besselTmpFX32;
+ int32_t frac32, logTmp;
+ int32_t logLrtTimeAvgKsumFX;
+ int16_t indPriorFX16;
+ int16_t tmp16, tmp16no1, tmp16no2, tmpIndFX, tableIndex, frac, intPart;
+ size_t i;
+ int normTmp, normTmp2, nShifts;
+
+ // compute feature based on average LR factor
+ // this is the average over all frequencies of the smooth log LRT
+ logLrtTimeAvgKsumFX = 0;
+ for (i = 0; i < inst->magnLen; i++) {
+ besselTmpFX32 = (int32_t)postLocSnr[i]; // Q11
+ normTmp = WebRtcSpl_NormU32(postLocSnr[i]);
+ num = postLocSnr[i] << normTmp; // Q(11+normTmp)
+ if (normTmp > 10) {
+ den = priorLocSnr[i] << (normTmp - 11); // Q(normTmp)
+ } else {
+ den = priorLocSnr[i] >> (11 - normTmp); // Q(normTmp)
+ }
+ if (den > 0) {
+ besselTmpFX32 -= num / den; // Q11
+ } else {
+ besselTmpFX32 = 0;
+ }
+
+ // inst->logLrtTimeAvg[i] += LRT_TAVG * (besselTmp - log(snrLocPrior)
+ // - inst->logLrtTimeAvg[i]);
+ // Here, LRT_TAVG = 0.5
+ zeros = WebRtcSpl_NormU32(priorLocSnr[i]);
+ frac32 = (int32_t)(((priorLocSnr[i] << zeros) & 0x7FFFFFFF) >> 19);
+ tmp32 = (frac32 * frac32 * -43) >> 19;
+ tmp32 += ((int16_t)frac32 * 5412) >> 12;
+ frac32 = tmp32 + 37;
+ // tmp32 = log2(priorLocSnr[i])
+ tmp32 = (int32_t)(((31 - zeros) << 12) + frac32) - (11 << 12); // Q12
+ logTmp = (tmp32 * 178) >> 8; // log2(priorLocSnr[i])*log(2)
+ // tmp32no1 = LRT_TAVG * (log(snrLocPrior) + inst->logLrtTimeAvg[i]) in Q12.
+ tmp32no1 = (logTmp + inst->logLrtTimeAvgW32[i]) / 2;
+ inst->logLrtTimeAvgW32[i] += (besselTmpFX32 - tmp32no1); // Q12
+
+ logLrtTimeAvgKsumFX += inst->logLrtTimeAvgW32[i]; // Q12
+ }
+ inst->featureLogLrt = (logLrtTimeAvgKsumFX * BIN_SIZE_LRT) >>
+ (inst->stages + 11);
+
+ // done with computation of LR factor
+
+ //
+ //compute the indicator functions
+ //
+
+ // average LRT feature
+ // FLOAT code
+ // indicator0 = 0.5 * (tanh(widthPrior *
+ // (logLrtTimeAvgKsum - threshPrior0)) + 1.0);
+ tmpIndFX = 16384; // Q14(1.0)
+ tmp32no1 = logLrtTimeAvgKsumFX - inst->thresholdLogLrt; // Q12
+ nShifts = 7 - inst->stages; // WIDTH_PR_MAP_SHIFT - inst->stages + 5;
+ //use larger width in tanh map for pause regions
+ if (tmp32no1 < 0) {
+ tmpIndFX = 0;
+ tmp32no1 = -tmp32no1;
+ //widthPrior = widthPrior * 2.0;
+ nShifts++;
+ }
+ tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, nShifts); // Q14
+ // compute indicator function: sigmoid map
+ tableIndex = (int16_t)(tmp32no1 >> 14);
+ if ((tableIndex < 16) && (tableIndex >= 0)) {
+ tmp16no2 = kIndicatorTable[tableIndex];
+ tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
+ frac = (int16_t)(tmp32no1 & 0x00003fff); // Q14
+ tmp16no2 += (int16_t)((tmp16no1 * frac) >> 14);
+ if (tmpIndFX == 0) {
+ tmpIndFX = 8192 - tmp16no2; // Q14
+ } else {
+ tmpIndFX = 8192 + tmp16no2; // Q14
+ }
+ }
+ indPriorFX = inst->weightLogLrt * tmpIndFX; // 6*Q14
+
+ //spectral flatness feature
+ if (inst->weightSpecFlat) {
+ tmpU32no1 = WEBRTC_SPL_UMUL(inst->featureSpecFlat, 400); // Q10
+ tmpIndFX = 16384; // Q14(1.0)
+ //use larger width in tanh map for pause regions
+ tmpU32no2 = inst->thresholdSpecFlat - tmpU32no1; //Q10
+ nShifts = 4;
+ if (inst->thresholdSpecFlat < tmpU32no1) {
+ tmpIndFX = 0;
+ tmpU32no2 = tmpU32no1 - inst->thresholdSpecFlat;
+ //widthPrior = widthPrior * 2.0;
+ nShifts++;
+ }
+ tmpU32no1 = WebRtcSpl_DivU32U16(tmpU32no2 << nShifts, 25); // Q14
+ // compute indicator function: sigmoid map
+ // FLOAT code
+ // indicator1 = 0.5 * (tanh(sgnMap * widthPrior *
+ // (threshPrior1 - tmpFloat1)) + 1.0);
+ tableIndex = (int16_t)(tmpU32no1 >> 14);
+ if (tableIndex < 16) {
+ tmp16no2 = kIndicatorTable[tableIndex];
+ tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
+ frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
+ tmp16no2 += (int16_t)((tmp16no1 * frac) >> 14);
+ if (tmpIndFX) {
+ tmpIndFX = 8192 + tmp16no2; // Q14
+ } else {
+ tmpIndFX = 8192 - tmp16no2; // Q14
+ }
+ }
+ indPriorFX += inst->weightSpecFlat * tmpIndFX; // 6*Q14
+ }
+
+ //for template spectral-difference
+ if (inst->weightSpecDiff) {
+ tmpU32no1 = 0;
+ if (inst->featureSpecDiff) {
+ normTmp = WEBRTC_SPL_MIN(20 - inst->stages,
+ WebRtcSpl_NormU32(inst->featureSpecDiff));
+ assert(normTmp >= 0);
+ tmpU32no1 = inst->featureSpecDiff << normTmp; // Q(normTmp-2*stages)
+ tmpU32no2 = inst->timeAvgMagnEnergy >> (20 - inst->stages - normTmp);
+ if (tmpU32no2 > 0) {
+ // Q(20 - inst->stages)
+ tmpU32no1 /= tmpU32no2;
+ } else {
+ tmpU32no1 = (uint32_t)(0x7fffffff);
+ }
+ }
+ tmpU32no3 = (inst->thresholdSpecDiff << 17) / 25;
+ tmpU32no2 = tmpU32no1 - tmpU32no3;
+ nShifts = 1;
+ tmpIndFX = 16384; // Q14(1.0)
+ //use larger width in tanh map for pause regions
+ if (tmpU32no2 & 0x80000000) {
+ tmpIndFX = 0;
+ tmpU32no2 = tmpU32no3 - tmpU32no1;
+ //widthPrior = widthPrior * 2.0;
+ nShifts--;
+ }
+ tmpU32no1 = tmpU32no2 >> nShifts;
+ // compute indicator function: sigmoid map
+ /* FLOAT code
+ indicator2 = 0.5 * (tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.0);
+ */
+ tableIndex = (int16_t)(tmpU32no1 >> 14);
+ if (tableIndex < 16) {
+ tmp16no2 = kIndicatorTable[tableIndex];
+ tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
+ frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
+ tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
+ tmp16no1, frac, 14);
+ if (tmpIndFX) {
+ tmpIndFX = 8192 + tmp16no2;
+ } else {
+ tmpIndFX = 8192 - tmp16no2;
+ }
+ }
+ indPriorFX += inst->weightSpecDiff * tmpIndFX; // 6*Q14
+ }
+
+ //combine the indicator function with the feature weights
+ // FLOAT code
+ // indPrior = 1 - (weightIndPrior0 * indicator0 + weightIndPrior1 *
+ // indicator1 + weightIndPrior2 * indicator2);
+ indPriorFX16 = WebRtcSpl_DivW32W16ResW16(98307 - indPriorFX, 6); // Q14
+ // done with computing indicator function
+
+ //compute the prior probability
+ // FLOAT code
+ // inst->priorNonSpeechProb += PRIOR_UPDATE *
+ // (indPriorNonSpeech - inst->priorNonSpeechProb);
+ tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14
+ inst->priorNonSpeechProb += (int16_t)((PRIOR_UPDATE_Q14 * tmp16) >> 14);
+
+ //final speech probability: combine prior model with LR factor:
+
+ memset(nonSpeechProbFinal, 0, sizeof(uint16_t) * inst->magnLen);
+
+ if (inst->priorNonSpeechProb > 0) {
+ for (i = 0; i < inst->magnLen; i++) {
+ // FLOAT code
+ // invLrt = exp(inst->logLrtTimeAvg[i]);
+ // invLrt = inst->priorSpeechProb * invLrt;
+ // nonSpeechProbFinal[i] = (1.0 - inst->priorSpeechProb) /
+ // (1.0 - inst->priorSpeechProb + invLrt);
+ // invLrt = (1.0 - inst->priorNonSpeechProb) * invLrt;
+ // nonSpeechProbFinal[i] = inst->priorNonSpeechProb /
+ // (inst->priorNonSpeechProb + invLrt);
+ if (inst->logLrtTimeAvgW32[i] < 65300) {
+ tmp32no1 = (inst->logLrtTimeAvgW32[i] * 23637) >> 14; // Q12
+ intPart = (int16_t)(tmp32no1 >> 12);
+ if (intPart < -8) {
+ intPart = -8;
+ }
+ frac = (int16_t)(tmp32no1 & 0x00000fff); // Q12
+
+ // Quadratic approximation of 2^frac
+ tmp32no2 = (frac * frac * 44) >> 19; // Q12.
+ tmp32no2 += (frac * 84) >> 7; // Q12
+ invLrtFX = (1 << (8 + intPart)) +
+ WEBRTC_SPL_SHIFT_W32(tmp32no2, intPart - 4); // Q8
+
+ normTmp = WebRtcSpl_NormW32(invLrtFX);
+ normTmp2 = WebRtcSpl_NormW16((16384 - inst->priorNonSpeechProb));
+ if (normTmp + normTmp2 >= 7) {
+ if (normTmp + normTmp2 < 15) {
+ invLrtFX >>= 15 - normTmp2 - normTmp;
+ // Q(normTmp+normTmp2-7)
+ tmp32no1 = invLrtFX * (16384 - inst->priorNonSpeechProb);
+ // Q(normTmp+normTmp2+7)
+ invLrtFX = WEBRTC_SPL_SHIFT_W32(tmp32no1, 7 - normTmp - normTmp2);
+ // Q14
+ } else {
+ tmp32no1 = invLrtFX * (16384 - inst->priorNonSpeechProb);
+ // Q22
+ invLrtFX = tmp32no1 >> 8; // Q14.
+ }
+
+ tmp32no1 = (int32_t)inst->priorNonSpeechProb << 8; // Q22
+
+ nonSpeechProbFinal[i] = tmp32no1 /
+ (inst->priorNonSpeechProb + invLrtFX); // Q8
+ }
+ }
+ }
+ }
+}
+
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core_mips.c b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core_mips.c
new file mode 100644
index 00000000..d99be872
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core_mips.c
@@ -0,0 +1,1002 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <string.h>
+
+#include "webrtc/modules/audio_processing/ns/include/noise_suppression_x.h"
+#include "webrtc/modules/audio_processing/ns/nsx_core.h"
+
+static const int16_t kIndicatorTable[17] = {
+ 0, 2017, 3809, 5227, 6258, 6963, 7424, 7718,
+ 7901, 8014, 8084, 8126, 8152, 8168, 8177, 8183, 8187
+};
+
+// Compute speech/noise probability
+// speech/noise probability is returned in: probSpeechFinal
+//snrLocPrior is the prior SNR for each frequency (in Q11)
+//snrLocPost is the post SNR for each frequency (in Q11)
+void WebRtcNsx_SpeechNoiseProb(NoiseSuppressionFixedC* inst,
+ uint16_t* nonSpeechProbFinal,
+ uint32_t* priorLocSnr,
+ uint32_t* postLocSnr) {
+ uint32_t tmpU32no1, tmpU32no2, tmpU32no3;
+ int32_t indPriorFX, tmp32no1;
+ int32_t logLrtTimeAvgKsumFX;
+ int16_t indPriorFX16;
+ int16_t tmp16, tmp16no1, tmp16no2, tmpIndFX, tableIndex, frac;
+ size_t i;
+ int normTmp, nShifts;
+
+ int32_t r0, r1, r2, r3, r4, r5, r6, r7, r8, r9;
+ int32_t const_max = 0x7fffffff;
+ int32_t const_neg43 = -43;
+ int32_t const_5412 = 5412;
+ int32_t const_11rsh12 = (11 << 12);
+ int32_t const_178 = 178;
+
+
+ // compute feature based on average LR factor
+ // this is the average over all frequencies of the smooth log LRT
+ logLrtTimeAvgKsumFX = 0;
+ for (i = 0; i < inst->magnLen; i++) {
+ r0 = postLocSnr[i]; // Q11
+ r1 = priorLocSnr[i];
+ r2 = inst->logLrtTimeAvgW32[i];
+
+ __asm __volatile(
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "clz %[r3], %[r0] \n\t"
+ "clz %[r5], %[r1] \n\t"
+ "slti %[r4], %[r3], 32 \n\t"
+ "slti %[r6], %[r5], 32 \n\t"
+ "movz %[r3], $0, %[r4] \n\t"
+ "movz %[r5], $0, %[r6] \n\t"
+ "slti %[r4], %[r3], 11 \n\t"
+ "addiu %[r6], %[r3], -11 \n\t"
+ "neg %[r7], %[r6] \n\t"
+ "sllv %[r6], %[r1], %[r6] \n\t"
+ "srav %[r7], %[r1], %[r7] \n\t"
+ "movn %[r6], %[r7], %[r4] \n\t"
+ "sllv %[r1], %[r1], %[r5] \n\t"
+ "and %[r1], %[r1], %[const_max] \n\t"
+ "sra %[r1], %[r1], 19 \n\t"
+ "mul %[r7], %[r1], %[r1] \n\t"
+ "sllv %[r3], %[r0], %[r3] \n\t"
+ "divu %[r8], %[r3], %[r6] \n\t"
+ "slti %[r6], %[r6], 1 \n\t"
+ "mul %[r7], %[r7], %[const_neg43] \n\t"
+ "sra %[r7], %[r7], 19 \n\t"
+ "movz %[r3], %[r8], %[r6] \n\t"
+ "subu %[r0], %[r0], %[r3] \n\t"
+ "movn %[r0], $0, %[r6] \n\t"
+ "mul %[r1], %[r1], %[const_5412] \n\t"
+ "sra %[r1], %[r1], 12 \n\t"
+ "addu %[r7], %[r7], %[r1] \n\t"
+ "addiu %[r1], %[r7], 37 \n\t"
+ "addiu %[r5], %[r5], -31 \n\t"
+ "neg %[r5], %[r5] \n\t"
+ "sll %[r5], %[r5], 12 \n\t"
+ "addu %[r5], %[r5], %[r1] \n\t"
+ "subu %[r7], %[r5], %[const_11rsh12] \n\t"
+ "mul %[r7], %[r7], %[const_178] \n\t"
+ "sra %[r7], %[r7], 8 \n\t"
+ "addu %[r7], %[r7], %[r2] \n\t"
+ "sra %[r7], %[r7], 1 \n\t"
+ "subu %[r2], %[r2], %[r7] \n\t"
+ "addu %[r2], %[r2], %[r0] \n\t"
+ ".set pop \n\t"
+ : [r0] "+r" (r0), [r1] "+r" (r1), [r2] "+r" (r2),
+ [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
+ [r6] "=&r" (r6), [r7] "=&r" (r7), [r8] "=&r" (r8)
+ : [const_max] "r" (const_max), [const_neg43] "r" (const_neg43),
+ [const_5412] "r" (const_5412), [const_11rsh12] "r" (const_11rsh12),
+ [const_178] "r" (const_178)
+ : "hi", "lo"
+ );
+ inst->logLrtTimeAvgW32[i] = r2;
+ logLrtTimeAvgKsumFX += r2;
+ }
+
+ inst->featureLogLrt = (logLrtTimeAvgKsumFX * BIN_SIZE_LRT) >>
+ (inst->stages + 11);
+
+ // done with computation of LR factor
+
+ //
+ // compute the indicator functions
+ //
+
+ // average LRT feature
+ // FLOAT code
+ // indicator0 = 0.5 * (tanh(widthPrior *
+ // (logLrtTimeAvgKsum - threshPrior0)) + 1.0);
+ tmpIndFX = 16384; // Q14(1.0)
+ tmp32no1 = logLrtTimeAvgKsumFX - inst->thresholdLogLrt; // Q12
+ nShifts = 7 - inst->stages; // WIDTH_PR_MAP_SHIFT - inst->stages + 5;
+ //use larger width in tanh map for pause regions
+ if (tmp32no1 < 0) {
+ tmpIndFX = 0;
+ tmp32no1 = -tmp32no1;
+ //widthPrior = widthPrior * 2.0;
+ nShifts++;
+ }
+ tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, nShifts); // Q14
+ // compute indicator function: sigmoid map
+ tableIndex = (int16_t)(tmp32no1 >> 14);
+ if ((tableIndex < 16) && (tableIndex >= 0)) {
+ tmp16no2 = kIndicatorTable[tableIndex];
+ tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
+ frac = (int16_t)(tmp32no1 & 0x00003fff); // Q14
+ tmp16no2 += (int16_t)((tmp16no1 * frac) >> 14);
+ if (tmpIndFX == 0) {
+ tmpIndFX = 8192 - tmp16no2; // Q14
+ } else {
+ tmpIndFX = 8192 + tmp16no2; // Q14
+ }
+ }
+ indPriorFX = inst->weightLogLrt * tmpIndFX; // 6*Q14
+
+ //spectral flatness feature
+ if (inst->weightSpecFlat) {
+ tmpU32no1 = WEBRTC_SPL_UMUL(inst->featureSpecFlat, 400); // Q10
+ tmpIndFX = 16384; // Q14(1.0)
+ //use larger width in tanh map for pause regions
+ tmpU32no2 = inst->thresholdSpecFlat - tmpU32no1; //Q10
+ nShifts = 4;
+ if (inst->thresholdSpecFlat < tmpU32no1) {
+ tmpIndFX = 0;
+ tmpU32no2 = tmpU32no1 - inst->thresholdSpecFlat;
+ //widthPrior = widthPrior * 2.0;
+ nShifts++;
+ }
+ tmpU32no1 = WebRtcSpl_DivU32U16(tmpU32no2 << nShifts, 25); //Q14
+ // compute indicator function: sigmoid map
+ // FLOAT code
+ // indicator1 = 0.5 * (tanh(sgnMap * widthPrior *
+ // (threshPrior1 - tmpFloat1)) + 1.0);
+ tableIndex = (int16_t)(tmpU32no1 >> 14);
+ if (tableIndex < 16) {
+ tmp16no2 = kIndicatorTable[tableIndex];
+ tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
+ frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
+ tmp16no2 += (int16_t)((tmp16no1 * frac) >> 14);
+ if (tmpIndFX) {
+ tmpIndFX = 8192 + tmp16no2; // Q14
+ } else {
+ tmpIndFX = 8192 - tmp16no2; // Q14
+ }
+ }
+ indPriorFX += inst->weightSpecFlat * tmpIndFX; // 6*Q14
+ }
+
+ //for template spectral-difference
+ if (inst->weightSpecDiff) {
+ tmpU32no1 = 0;
+ if (inst->featureSpecDiff) {
+ normTmp = WEBRTC_SPL_MIN(20 - inst->stages,
+ WebRtcSpl_NormU32(inst->featureSpecDiff));
+ assert(normTmp >= 0);
+ tmpU32no1 = inst->featureSpecDiff << normTmp; // Q(normTmp-2*stages)
+ tmpU32no2 = inst->timeAvgMagnEnergy >> (20 - inst->stages - normTmp);
+ if (tmpU32no2 > 0) {
+ // Q(20 - inst->stages)
+ tmpU32no1 /= tmpU32no2;
+ } else {
+ tmpU32no1 = (uint32_t)(0x7fffffff);
+ }
+ }
+ tmpU32no3 = (inst->thresholdSpecDiff << 17) / 25;
+ tmpU32no2 = tmpU32no1 - tmpU32no3;
+ nShifts = 1;
+ tmpIndFX = 16384; // Q14(1.0)
+ //use larger width in tanh map for pause regions
+ if (tmpU32no2 & 0x80000000) {
+ tmpIndFX = 0;
+ tmpU32no2 = tmpU32no3 - tmpU32no1;
+ //widthPrior = widthPrior * 2.0;
+ nShifts--;
+ }
+ tmpU32no1 = tmpU32no2 >> nShifts;
+ // compute indicator function: sigmoid map
+ /* FLOAT code
+ indicator2 = 0.5 * (tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.0);
+ */
+ tableIndex = (int16_t)(tmpU32no1 >> 14);
+ if (tableIndex < 16) {
+ tmp16no2 = kIndicatorTable[tableIndex];
+ tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
+ frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
+ tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
+ tmp16no1, frac, 14);
+ if (tmpIndFX) {
+ tmpIndFX = 8192 + tmp16no2;
+ } else {
+ tmpIndFX = 8192 - tmp16no2;
+ }
+ }
+ indPriorFX += inst->weightSpecDiff * tmpIndFX; // 6*Q14
+ }
+
+ //combine the indicator function with the feature weights
+ // FLOAT code
+ // indPrior = 1 - (weightIndPrior0 * indicator0 + weightIndPrior1 *
+ // indicator1 + weightIndPrior2 * indicator2);
+ indPriorFX16 = WebRtcSpl_DivW32W16ResW16(98307 - indPriorFX, 6); // Q14
+ // done with computing indicator function
+
+ //compute the prior probability
+ // FLOAT code
+ // inst->priorNonSpeechProb += PRIOR_UPDATE *
+ // (indPriorNonSpeech - inst->priorNonSpeechProb);
+ tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14
+ inst->priorNonSpeechProb += (int16_t)((PRIOR_UPDATE_Q14 * tmp16) >> 14);
+
+ //final speech probability: combine prior model with LR factor:
+
+ memset(nonSpeechProbFinal, 0, sizeof(uint16_t) * inst->magnLen);
+
+ if (inst->priorNonSpeechProb > 0) {
+ r0 = inst->priorNonSpeechProb;
+ r1 = 16384 - r0;
+ int32_t const_23637 = 23637;
+ int32_t const_44 = 44;
+ int32_t const_84 = 84;
+ int32_t const_1 = 1;
+ int32_t const_neg8 = -8;
+ for (i = 0; i < inst->magnLen; i++) {
+ r2 = inst->logLrtTimeAvgW32[i];
+ if (r2 < 65300) {
+ __asm __volatile(
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "mul %[r2], %[r2], %[const_23637] \n\t"
+ "sll %[r6], %[r1], 16 \n\t"
+ "clz %[r7], %[r6] \n\t"
+ "clo %[r8], %[r6] \n\t"
+ "slt %[r9], %[r6], $0 \n\t"
+ "movn %[r7], %[r8], %[r9] \n\t"
+ "sra %[r2], %[r2], 14 \n\t"
+ "andi %[r3], %[r2], 0xfff \n\t"
+ "mul %[r4], %[r3], %[r3] \n\t"
+ "mul %[r3], %[r3], %[const_84] \n\t"
+ "sra %[r2], %[r2], 12 \n\t"
+ "slt %[r5], %[r2], %[const_neg8] \n\t"
+ "movn %[r2], %[const_neg8], %[r5] \n\t"
+ "mul %[r4], %[r4], %[const_44] \n\t"
+ "sra %[r3], %[r3], 7 \n\t"
+ "addiu %[r7], %[r7], -1 \n\t"
+ "slti %[r9], %[r7], 31 \n\t"
+ "movz %[r7], $0, %[r9] \n\t"
+ "sra %[r4], %[r4], 19 \n\t"
+ "addu %[r4], %[r4], %[r3] \n\t"
+ "addiu %[r3], %[r2], 8 \n\t"
+ "addiu %[r2], %[r2], -4 \n\t"
+ "neg %[r5], %[r2] \n\t"
+ "sllv %[r6], %[r4], %[r2] \n\t"
+ "srav %[r5], %[r4], %[r5] \n\t"
+ "slt %[r2], %[r2], $0 \n\t"
+ "movn %[r6], %[r5], %[r2] \n\t"
+ "sllv %[r3], %[const_1], %[r3] \n\t"
+ "addu %[r2], %[r3], %[r6] \n\t"
+ "clz %[r4], %[r2] \n\t"
+ "clo %[r5], %[r2] \n\t"
+ "slt %[r8], %[r2], $0 \n\t"
+ "movn %[r4], %[r5], %[r8] \n\t"
+ "addiu %[r4], %[r4], -1 \n\t"
+ "slt %[r5], $0, %[r2] \n\t"
+ "or %[r5], %[r5], %[r7] \n\t"
+ "movz %[r4], $0, %[r5] \n\t"
+ "addiu %[r6], %[r7], -7 \n\t"
+ "addu %[r6], %[r6], %[r4] \n\t"
+ "bltz %[r6], 1f \n\t"
+ " nop \n\t"
+ "addiu %[r4], %[r6], -8 \n\t"
+ "neg %[r3], %[r4] \n\t"
+ "srav %[r5], %[r2], %[r3] \n\t"
+ "mul %[r5], %[r5], %[r1] \n\t"
+ "mul %[r2], %[r2], %[r1] \n\t"
+ "slt %[r4], %[r4], $0 \n\t"
+ "srav %[r5], %[r5], %[r6] \n\t"
+ "sra %[r2], %[r2], 8 \n\t"
+ "movn %[r2], %[r5], %[r4] \n\t"
+ "sll %[r3], %[r0], 8 \n\t"
+ "addu %[r2], %[r0], %[r2] \n\t"
+ "divu %[r3], %[r3], %[r2] \n\t"
+ "1: \n\t"
+ ".set pop \n\t"
+ : [r2] "+r" (r2), [r3] "=&r" (r3), [r4] "=&r" (r4),
+ [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7),
+ [r8] "=&r" (r8), [r9] "=&r" (r9)
+ : [r0] "r" (r0), [r1] "r" (r1), [const_23637] "r" (const_23637),
+ [const_neg8] "r" (const_neg8), [const_84] "r" (const_84),
+ [const_1] "r" (const_1), [const_44] "r" (const_44)
+ : "hi", "lo"
+ );
+ nonSpeechProbFinal[i] = r3;
+ }
+ }
+ }
+}
+
+// Update analysis buffer for lower band, and window data before FFT.
+void WebRtcNsx_AnalysisUpdate_mips(NoiseSuppressionFixedC* inst,
+ int16_t* out,
+ int16_t* new_speech) {
+ int iters, after;
+ int anaLen = (int)inst->anaLen;
+ int *window = (int*)inst->window;
+ int *anaBuf = (int*)inst->analysisBuffer;
+ int *outBuf = (int*)out;
+ int r0, r1, r2, r3, r4, r5, r6, r7;
+#if defined(MIPS_DSP_R1_LE)
+ int r8;
+#endif
+
+ // For lower band update analysis buffer.
+ memcpy(inst->analysisBuffer, inst->analysisBuffer + inst->blockLen10ms,
+ (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->analysisBuffer));
+ memcpy(inst->analysisBuffer + inst->anaLen - inst->blockLen10ms, new_speech,
+ inst->blockLen10ms * sizeof(*inst->analysisBuffer));
+
+ // Window data before FFT.
+#if defined(MIPS_DSP_R1_LE)
+ __asm __volatile(
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "sra %[iters], %[anaLen], 3 \n\t"
+ "1: \n\t"
+ "blez %[iters], 2f \n\t"
+ " nop \n\t"
+ "lw %[r0], 0(%[window]) \n\t"
+ "lw %[r1], 0(%[anaBuf]) \n\t"
+ "lw %[r2], 4(%[window]) \n\t"
+ "lw %[r3], 4(%[anaBuf]) \n\t"
+ "lw %[r4], 8(%[window]) \n\t"
+ "lw %[r5], 8(%[anaBuf]) \n\t"
+ "lw %[r6], 12(%[window]) \n\t"
+ "lw %[r7], 12(%[anaBuf]) \n\t"
+ "muleq_s.w.phl %[r8], %[r0], %[r1] \n\t"
+ "muleq_s.w.phr %[r0], %[r0], %[r1] \n\t"
+ "muleq_s.w.phl %[r1], %[r2], %[r3] \n\t"
+ "muleq_s.w.phr %[r2], %[r2], %[r3] \n\t"
+ "muleq_s.w.phl %[r3], %[r4], %[r5] \n\t"
+ "muleq_s.w.phr %[r4], %[r4], %[r5] \n\t"
+ "muleq_s.w.phl %[r5], %[r6], %[r7] \n\t"
+ "muleq_s.w.phr %[r6], %[r6], %[r7] \n\t"
+#if defined(MIPS_DSP_R2_LE)
+ "precr_sra_r.ph.w %[r8], %[r0], 15 \n\t"
+ "precr_sra_r.ph.w %[r1], %[r2], 15 \n\t"
+ "precr_sra_r.ph.w %[r3], %[r4], 15 \n\t"
+ "precr_sra_r.ph.w %[r5], %[r6], 15 \n\t"
+ "sw %[r8], 0(%[outBuf]) \n\t"
+ "sw %[r1], 4(%[outBuf]) \n\t"
+ "sw %[r3], 8(%[outBuf]) \n\t"
+ "sw %[r5], 12(%[outBuf]) \n\t"
+#else
+ "shra_r.w %[r8], %[r8], 15 \n\t"
+ "shra_r.w %[r0], %[r0], 15 \n\t"
+ "shra_r.w %[r1], %[r1], 15 \n\t"
+ "shra_r.w %[r2], %[r2], 15 \n\t"
+ "shra_r.w %[r3], %[r3], 15 \n\t"
+ "shra_r.w %[r4], %[r4], 15 \n\t"
+ "shra_r.w %[r5], %[r5], 15 \n\t"
+ "shra_r.w %[r6], %[r6], 15 \n\t"
+ "sll %[r0], %[r0], 16 \n\t"
+ "sll %[r2], %[r2], 16 \n\t"
+ "sll %[r4], %[r4], 16 \n\t"
+ "sll %[r6], %[r6], 16 \n\t"
+ "packrl.ph %[r0], %[r8], %[r0] \n\t"
+ "packrl.ph %[r2], %[r1], %[r2] \n\t"
+ "packrl.ph %[r4], %[r3], %[r4] \n\t"
+ "packrl.ph %[r6], %[r5], %[r6] \n\t"
+ "sw %[r0], 0(%[outBuf]) \n\t"
+ "sw %[r2], 4(%[outBuf]) \n\t"
+ "sw %[r4], 8(%[outBuf]) \n\t"
+ "sw %[r6], 12(%[outBuf]) \n\t"
+#endif
+ "addiu %[window], %[window], 16 \n\t"
+ "addiu %[anaBuf], %[anaBuf], 16 \n\t"
+ "addiu %[outBuf], %[outBuf], 16 \n\t"
+ "b 1b \n\t"
+ " addiu %[iters], %[iters], -1 \n\t"
+ "2: \n\t"
+ "andi %[after], %[anaLen], 7 \n\t"
+ "3: \n\t"
+ "blez %[after], 4f \n\t"
+ " nop \n\t"
+ "lh %[r0], 0(%[window]) \n\t"
+ "lh %[r1], 0(%[anaBuf]) \n\t"
+ "mul %[r0], %[r0], %[r1] \n\t"
+ "addiu %[window], %[window], 2 \n\t"
+ "addiu %[anaBuf], %[anaBuf], 2 \n\t"
+ "addiu %[outBuf], %[outBuf], 2 \n\t"
+ "shra_r.w %[r0], %[r0], 14 \n\t"
+ "sh %[r0], -2(%[outBuf]) \n\t"
+ "b 3b \n\t"
+ " addiu %[after], %[after], -1 \n\t"
+ "4: \n\t"
+ ".set pop \n\t"
+ : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2),
+ [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
+ [r6] "=&r" (r6), [r7] "=&r" (r7), [r8] "=&r" (r8),
+ [iters] "=&r" (iters), [after] "=&r" (after),
+ [window] "+r" (window),[anaBuf] "+r" (anaBuf),
+ [outBuf] "+r" (outBuf)
+ : [anaLen] "r" (anaLen)
+ : "memory", "hi", "lo"
+ );
+#else
+ __asm __volatile(
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "sra %[iters], %[anaLen], 2 \n\t"
+ "1: \n\t"
+ "blez %[iters], 2f \n\t"
+ " nop \n\t"
+ "lh %[r0], 0(%[window]) \n\t"
+ "lh %[r1], 0(%[anaBuf]) \n\t"
+ "lh %[r2], 2(%[window]) \n\t"
+ "lh %[r3], 2(%[anaBuf]) \n\t"
+ "lh %[r4], 4(%[window]) \n\t"
+ "lh %[r5], 4(%[anaBuf]) \n\t"
+ "lh %[r6], 6(%[window]) \n\t"
+ "lh %[r7], 6(%[anaBuf]) \n\t"
+ "mul %[r0], %[r0], %[r1] \n\t"
+ "mul %[r2], %[r2], %[r3] \n\t"
+ "mul %[r4], %[r4], %[r5] \n\t"
+ "mul %[r6], %[r6], %[r7] \n\t"
+ "addiu %[window], %[window], 8 \n\t"
+ "addiu %[anaBuf], %[anaBuf], 8 \n\t"
+ "addiu %[r0], %[r0], 0x2000 \n\t"
+ "addiu %[r2], %[r2], 0x2000 \n\t"
+ "addiu %[r4], %[r4], 0x2000 \n\t"
+ "addiu %[r6], %[r6], 0x2000 \n\t"
+ "sra %[r0], %[r0], 14 \n\t"
+ "sra %[r2], %[r2], 14 \n\t"
+ "sra %[r4], %[r4], 14 \n\t"
+ "sra %[r6], %[r6], 14 \n\t"
+ "sh %[r0], 0(%[outBuf]) \n\t"
+ "sh %[r2], 2(%[outBuf]) \n\t"
+ "sh %[r4], 4(%[outBuf]) \n\t"
+ "sh %[r6], 6(%[outBuf]) \n\t"
+ "addiu %[outBuf], %[outBuf], 8 \n\t"
+ "b 1b \n\t"
+ " addiu %[iters], %[iters], -1 \n\t"
+ "2: \n\t"
+ "andi %[after], %[anaLen], 3 \n\t"
+ "3: \n\t"
+ "blez %[after], 4f \n\t"
+ " nop \n\t"
+ "lh %[r0], 0(%[window]) \n\t"
+ "lh %[r1], 0(%[anaBuf]) \n\t"
+ "mul %[r0], %[r0], %[r1] \n\t"
+ "addiu %[window], %[window], 2 \n\t"
+ "addiu %[anaBuf], %[anaBuf], 2 \n\t"
+ "addiu %[outBuf], %[outBuf], 2 \n\t"
+ "addiu %[r0], %[r0], 0x2000 \n\t"
+ "sra %[r0], %[r0], 14 \n\t"
+ "sh %[r0], -2(%[outBuf]) \n\t"
+ "b 3b \n\t"
+ " addiu %[after], %[after], -1 \n\t"
+ "4: \n\t"
+ ".set pop \n\t"
+ : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2),
+ [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
+ [r6] "=&r" (r6), [r7] "=&r" (r7), [iters] "=&r" (iters),
+ [after] "=&r" (after), [window] "+r" (window),
+ [anaBuf] "+r" (anaBuf), [outBuf] "+r" (outBuf)
+ : [anaLen] "r" (anaLen)
+ : "memory", "hi", "lo"
+ );
+#endif
+}
+
+// For the noise supression process, synthesis, read out fully processed
+// segment, and update synthesis buffer.
+void WebRtcNsx_SynthesisUpdate_mips(NoiseSuppressionFixedC* inst,
+ int16_t* out_frame,
+ int16_t gain_factor) {
+ int iters = (int)inst->blockLen10ms >> 2;
+ int after = inst->blockLen10ms & 3;
+ int r0, r1, r2, r3, r4, r5, r6, r7;
+ int16_t *window = (int16_t*)inst->window;
+ int16_t *real = inst->real;
+ int16_t *synthBuf = inst->synthesisBuffer;
+ int16_t *out = out_frame;
+ int sat_pos = 0x7fff;
+ int sat_neg = 0xffff8000;
+ int block10 = (int)inst->blockLen10ms;
+ int anaLen = (int)inst->anaLen;
+
+ __asm __volatile(
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "1: \n\t"
+ "blez %[iters], 2f \n\t"
+ " nop \n\t"
+ "lh %[r0], 0(%[window]) \n\t"
+ "lh %[r1], 0(%[real]) \n\t"
+ "lh %[r2], 2(%[window]) \n\t"
+ "lh %[r3], 2(%[real]) \n\t"
+ "lh %[r4], 4(%[window]) \n\t"
+ "lh %[r5], 4(%[real]) \n\t"
+ "lh %[r6], 6(%[window]) \n\t"
+ "lh %[r7], 6(%[real]) \n\t"
+ "mul %[r0], %[r0], %[r1] \n\t"
+ "mul %[r2], %[r2], %[r3] \n\t"
+ "mul %[r4], %[r4], %[r5] \n\t"
+ "mul %[r6], %[r6], %[r7] \n\t"
+ "addiu %[r0], %[r0], 0x2000 \n\t"
+ "addiu %[r2], %[r2], 0x2000 \n\t"
+ "addiu %[r4], %[r4], 0x2000 \n\t"
+ "addiu %[r6], %[r6], 0x2000 \n\t"
+ "sra %[r0], %[r0], 14 \n\t"
+ "sra %[r2], %[r2], 14 \n\t"
+ "sra %[r4], %[r4], 14 \n\t"
+ "sra %[r6], %[r6], 14 \n\t"
+ "mul %[r0], %[r0], %[gain_factor] \n\t"
+ "mul %[r2], %[r2], %[gain_factor] \n\t"
+ "mul %[r4], %[r4], %[gain_factor] \n\t"
+ "mul %[r6], %[r6], %[gain_factor] \n\t"
+ "addiu %[r0], %[r0], 0x1000 \n\t"
+ "addiu %[r2], %[r2], 0x1000 \n\t"
+ "addiu %[r4], %[r4], 0x1000 \n\t"
+ "addiu %[r6], %[r6], 0x1000 \n\t"
+ "sra %[r0], %[r0], 13 \n\t"
+ "sra %[r2], %[r2], 13 \n\t"
+ "sra %[r4], %[r4], 13 \n\t"
+ "sra %[r6], %[r6], 13 \n\t"
+ "slt %[r1], %[r0], %[sat_pos] \n\t"
+ "slt %[r3], %[r2], %[sat_pos] \n\t"
+ "slt %[r5], %[r4], %[sat_pos] \n\t"
+ "slt %[r7], %[r6], %[sat_pos] \n\t"
+ "movz %[r0], %[sat_pos], %[r1] \n\t"
+ "movz %[r2], %[sat_pos], %[r3] \n\t"
+ "movz %[r4], %[sat_pos], %[r5] \n\t"
+ "movz %[r6], %[sat_pos], %[r7] \n\t"
+ "lh %[r1], 0(%[synthBuf]) \n\t"
+ "lh %[r3], 2(%[synthBuf]) \n\t"
+ "lh %[r5], 4(%[synthBuf]) \n\t"
+ "lh %[r7], 6(%[synthBuf]) \n\t"
+ "addu %[r0], %[r0], %[r1] \n\t"
+ "addu %[r2], %[r2], %[r3] \n\t"
+ "addu %[r4], %[r4], %[r5] \n\t"
+ "addu %[r6], %[r6], %[r7] \n\t"
+ "slt %[r1], %[r0], %[sat_pos] \n\t"
+ "slt %[r3], %[r2], %[sat_pos] \n\t"
+ "slt %[r5], %[r4], %[sat_pos] \n\t"
+ "slt %[r7], %[r6], %[sat_pos] \n\t"
+ "movz %[r0], %[sat_pos], %[r1] \n\t"
+ "movz %[r2], %[sat_pos], %[r3] \n\t"
+ "movz %[r4], %[sat_pos], %[r5] \n\t"
+ "movz %[r6], %[sat_pos], %[r7] \n\t"
+ "slt %[r1], %[r0], %[sat_neg] \n\t"
+ "slt %[r3], %[r2], %[sat_neg] \n\t"
+ "slt %[r5], %[r4], %[sat_neg] \n\t"
+ "slt %[r7], %[r6], %[sat_neg] \n\t"
+ "movn %[r0], %[sat_neg], %[r1] \n\t"
+ "movn %[r2], %[sat_neg], %[r3] \n\t"
+ "movn %[r4], %[sat_neg], %[r5] \n\t"
+ "movn %[r6], %[sat_neg], %[r7] \n\t"
+ "sh %[r0], 0(%[synthBuf]) \n\t"
+ "sh %[r2], 2(%[synthBuf]) \n\t"
+ "sh %[r4], 4(%[synthBuf]) \n\t"
+ "sh %[r6], 6(%[synthBuf]) \n\t"
+ "sh %[r0], 0(%[out]) \n\t"
+ "sh %[r2], 2(%[out]) \n\t"
+ "sh %[r4], 4(%[out]) \n\t"
+ "sh %[r6], 6(%[out]) \n\t"
+ "addiu %[window], %[window], 8 \n\t"
+ "addiu %[real], %[real], 8 \n\t"
+ "addiu %[synthBuf],%[synthBuf], 8 \n\t"
+ "addiu %[out], %[out], 8 \n\t"
+ "b 1b \n\t"
+ " addiu %[iters], %[iters], -1 \n\t"
+ "2: \n\t"
+ "blez %[after], 3f \n\t"
+ " subu %[block10], %[anaLen], %[block10] \n\t"
+ "lh %[r0], 0(%[window]) \n\t"
+ "lh %[r1], 0(%[real]) \n\t"
+ "mul %[r0], %[r0], %[r1] \n\t"
+ "addiu %[window], %[window], 2 \n\t"
+ "addiu %[real], %[real], 2 \n\t"
+ "addiu %[r0], %[r0], 0x2000 \n\t"
+ "sra %[r0], %[r0], 14 \n\t"
+ "mul %[r0], %[r0], %[gain_factor] \n\t"
+ "addiu %[r0], %[r0], 0x1000 \n\t"
+ "sra %[r0], %[r0], 13 \n\t"
+ "slt %[r1], %[r0], %[sat_pos] \n\t"
+ "movz %[r0], %[sat_pos], %[r1] \n\t"
+ "lh %[r1], 0(%[synthBuf]) \n\t"
+ "addu %[r0], %[r0], %[r1] \n\t"
+ "slt %[r1], %[r0], %[sat_pos] \n\t"
+ "movz %[r0], %[sat_pos], %[r1] \n\t"
+ "slt %[r1], %[r0], %[sat_neg] \n\t"
+ "movn %[r0], %[sat_neg], %[r1] \n\t"
+ "sh %[r0], 0(%[synthBuf]) \n\t"
+ "sh %[r0], 0(%[out]) \n\t"
+ "addiu %[synthBuf],%[synthBuf], 2 \n\t"
+ "addiu %[out], %[out], 2 \n\t"
+ "b 2b \n\t"
+ " addiu %[after], %[after], -1 \n\t"
+ "3: \n\t"
+ "sra %[iters], %[block10], 2 \n\t"
+ "4: \n\t"
+ "blez %[iters], 5f \n\t"
+ " andi %[after], %[block10], 3 \n\t"
+ "lh %[r0], 0(%[window]) \n\t"
+ "lh %[r1], 0(%[real]) \n\t"
+ "lh %[r2], 2(%[window]) \n\t"
+ "lh %[r3], 2(%[real]) \n\t"
+ "lh %[r4], 4(%[window]) \n\t"
+ "lh %[r5], 4(%[real]) \n\t"
+ "lh %[r6], 6(%[window]) \n\t"
+ "lh %[r7], 6(%[real]) \n\t"
+ "mul %[r0], %[r0], %[r1] \n\t"
+ "mul %[r2], %[r2], %[r3] \n\t"
+ "mul %[r4], %[r4], %[r5] \n\t"
+ "mul %[r6], %[r6], %[r7] \n\t"
+ "addiu %[r0], %[r0], 0x2000 \n\t"
+ "addiu %[r2], %[r2], 0x2000 \n\t"
+ "addiu %[r4], %[r4], 0x2000 \n\t"
+ "addiu %[r6], %[r6], 0x2000 \n\t"
+ "sra %[r0], %[r0], 14 \n\t"
+ "sra %[r2], %[r2], 14 \n\t"
+ "sra %[r4], %[r4], 14 \n\t"
+ "sra %[r6], %[r6], 14 \n\t"
+ "mul %[r0], %[r0], %[gain_factor] \n\t"
+ "mul %[r2], %[r2], %[gain_factor] \n\t"
+ "mul %[r4], %[r4], %[gain_factor] \n\t"
+ "mul %[r6], %[r6], %[gain_factor] \n\t"
+ "addiu %[r0], %[r0], 0x1000 \n\t"
+ "addiu %[r2], %[r2], 0x1000 \n\t"
+ "addiu %[r4], %[r4], 0x1000 \n\t"
+ "addiu %[r6], %[r6], 0x1000 \n\t"
+ "sra %[r0], %[r0], 13 \n\t"
+ "sra %[r2], %[r2], 13 \n\t"
+ "sra %[r4], %[r4], 13 \n\t"
+ "sra %[r6], %[r6], 13 \n\t"
+ "slt %[r1], %[r0], %[sat_pos] \n\t"
+ "slt %[r3], %[r2], %[sat_pos] \n\t"
+ "slt %[r5], %[r4], %[sat_pos] \n\t"
+ "slt %[r7], %[r6], %[sat_pos] \n\t"
+ "movz %[r0], %[sat_pos], %[r1] \n\t"
+ "movz %[r2], %[sat_pos], %[r3] \n\t"
+ "movz %[r4], %[sat_pos], %[r5] \n\t"
+ "movz %[r6], %[sat_pos], %[r7] \n\t"
+ "lh %[r1], 0(%[synthBuf]) \n\t"
+ "lh %[r3], 2(%[synthBuf]) \n\t"
+ "lh %[r5], 4(%[synthBuf]) \n\t"
+ "lh %[r7], 6(%[synthBuf]) \n\t"
+ "addu %[r0], %[r0], %[r1] \n\t"
+ "addu %[r2], %[r2], %[r3] \n\t"
+ "addu %[r4], %[r4], %[r5] \n\t"
+ "addu %[r6], %[r6], %[r7] \n\t"
+ "slt %[r1], %[r0], %[sat_pos] \n\t"
+ "slt %[r3], %[r2], %[sat_pos] \n\t"
+ "slt %[r5], %[r4], %[sat_pos] \n\t"
+ "slt %[r7], %[r6], %[sat_pos] \n\t"
+ "movz %[r0], %[sat_pos], %[r1] \n\t"
+ "movz %[r2], %[sat_pos], %[r3] \n\t"
+ "movz %[r4], %[sat_pos], %[r5] \n\t"
+ "movz %[r6], %[sat_pos], %[r7] \n\t"
+ "slt %[r1], %[r0], %[sat_neg] \n\t"
+ "slt %[r3], %[r2], %[sat_neg] \n\t"
+ "slt %[r5], %[r4], %[sat_neg] \n\t"
+ "slt %[r7], %[r6], %[sat_neg] \n\t"
+ "movn %[r0], %[sat_neg], %[r1] \n\t"
+ "movn %[r2], %[sat_neg], %[r3] \n\t"
+ "movn %[r4], %[sat_neg], %[r5] \n\t"
+ "movn %[r6], %[sat_neg], %[r7] \n\t"
+ "sh %[r0], 0(%[synthBuf]) \n\t"
+ "sh %[r2], 2(%[synthBuf]) \n\t"
+ "sh %[r4], 4(%[synthBuf]) \n\t"
+ "sh %[r6], 6(%[synthBuf]) \n\t"
+ "addiu %[window], %[window], 8 \n\t"
+ "addiu %[real], %[real], 8 \n\t"
+ "addiu %[synthBuf],%[synthBuf], 8 \n\t"
+ "b 4b \n\t"
+ " addiu %[iters], %[iters], -1 \n\t"
+ "5: \n\t"
+ "blez %[after], 6f \n\t"
+ " nop \n\t"
+ "lh %[r0], 0(%[window]) \n\t"
+ "lh %[r1], 0(%[real]) \n\t"
+ "mul %[r0], %[r0], %[r1] \n\t"
+ "addiu %[window], %[window], 2 \n\t"
+ "addiu %[real], %[real], 2 \n\t"
+ "addiu %[r0], %[r0], 0x2000 \n\t"
+ "sra %[r0], %[r0], 14 \n\t"
+ "mul %[r0], %[r0], %[gain_factor] \n\t"
+ "addiu %[r0], %[r0], 0x1000 \n\t"
+ "sra %[r0], %[r0], 13 \n\t"
+ "slt %[r1], %[r0], %[sat_pos] \n\t"
+ "movz %[r0], %[sat_pos], %[r1] \n\t"
+ "lh %[r1], 0(%[synthBuf]) \n\t"
+ "addu %[r0], %[r0], %[r1] \n\t"
+ "slt %[r1], %[r0], %[sat_pos] \n\t"
+ "movz %[r0], %[sat_pos], %[r1] \n\t"
+ "slt %[r1], %[r0], %[sat_neg] \n\t"
+ "movn %[r0], %[sat_neg], %[r1] \n\t"
+ "sh %[r0], 0(%[synthBuf]) \n\t"
+ "addiu %[synthBuf],%[synthBuf], 2 \n\t"
+ "b 2b \n\t"
+ " addiu %[after], %[after], -1 \n\t"
+ "6: \n\t"
+ ".set pop \n\t"
+ : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2),
+ [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
+ [r6] "=&r" (r6), [r7] "=&r" (r7), [iters] "+r" (iters),
+ [after] "+r" (after), [block10] "+r" (block10),
+ [window] "+r" (window), [real] "+r" (real),
+ [synthBuf] "+r" (synthBuf), [out] "+r" (out)
+ : [gain_factor] "r" (gain_factor), [sat_pos] "r" (sat_pos),
+ [sat_neg] "r" (sat_neg), [anaLen] "r" (anaLen)
+ : "memory", "hi", "lo"
+ );
+
+ // update synthesis buffer
+ memcpy(inst->synthesisBuffer, inst->synthesisBuffer + inst->blockLen10ms,
+ (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->synthesisBuffer));
+ WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer
+ + inst->anaLen - inst->blockLen10ms, inst->blockLen10ms);
+}
+
+// Filter the data in the frequency domain, and create spectrum.
+void WebRtcNsx_PrepareSpectrum_mips(NoiseSuppressionFixedC* inst,
+ int16_t* freq_buf) {
+ uint16_t *noiseSupFilter = inst->noiseSupFilter;
+ int16_t *real = inst->real;
+ int16_t *imag = inst->imag;
+ int32_t loop_count = 2;
+ int16_t tmp_1, tmp_2, tmp_3, tmp_4, tmp_5, tmp_6;
+ int16_t tmp16 = (int16_t)(inst->anaLen << 1) - 4;
+ int16_t* freq_buf_f = freq_buf;
+ int16_t* freq_buf_s = &freq_buf[tmp16];
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ //first sample
+ "lh %[tmp_1], 0(%[noiseSupFilter]) \n\t"
+ "lh %[tmp_2], 0(%[real]) \n\t"
+ "lh %[tmp_3], 0(%[imag]) \n\t"
+ "mul %[tmp_2], %[tmp_2], %[tmp_1] \n\t"
+ "mul %[tmp_3], %[tmp_3], %[tmp_1] \n\t"
+ "sra %[tmp_2], %[tmp_2], 14 \n\t"
+ "sra %[tmp_3], %[tmp_3], 14 \n\t"
+ "sh %[tmp_2], 0(%[real]) \n\t"
+ "sh %[tmp_3], 0(%[imag]) \n\t"
+ "negu %[tmp_3], %[tmp_3] \n\t"
+ "sh %[tmp_2], 0(%[freq_buf_f]) \n\t"
+ "sh %[tmp_3], 2(%[freq_buf_f]) \n\t"
+ "addiu %[real], %[real], 2 \n\t"
+ "addiu %[imag], %[imag], 2 \n\t"
+ "addiu %[noiseSupFilter], %[noiseSupFilter], 2 \n\t"
+ "addiu %[freq_buf_f], %[freq_buf_f], 4 \n\t"
+ "1: \n\t"
+ "lh %[tmp_1], 0(%[noiseSupFilter]) \n\t"
+ "lh %[tmp_2], 0(%[real]) \n\t"
+ "lh %[tmp_3], 0(%[imag]) \n\t"
+ "lh %[tmp_4], 2(%[noiseSupFilter]) \n\t"
+ "lh %[tmp_5], 2(%[real]) \n\t"
+ "lh %[tmp_6], 2(%[imag]) \n\t"
+ "mul %[tmp_2], %[tmp_2], %[tmp_1] \n\t"
+ "mul %[tmp_3], %[tmp_3], %[tmp_1] \n\t"
+ "mul %[tmp_5], %[tmp_5], %[tmp_4] \n\t"
+ "mul %[tmp_6], %[tmp_6], %[tmp_4] \n\t"
+ "addiu %[loop_count], %[loop_count], 2 \n\t"
+ "sra %[tmp_2], %[tmp_2], 14 \n\t"
+ "sra %[tmp_3], %[tmp_3], 14 \n\t"
+ "sra %[tmp_5], %[tmp_5], 14 \n\t"
+ "sra %[tmp_6], %[tmp_6], 14 \n\t"
+ "addiu %[noiseSupFilter], %[noiseSupFilter], 4 \n\t"
+ "sh %[tmp_2], 0(%[real]) \n\t"
+ "sh %[tmp_2], 4(%[freq_buf_s]) \n\t"
+ "sh %[tmp_3], 0(%[imag]) \n\t"
+ "sh %[tmp_3], 6(%[freq_buf_s]) \n\t"
+ "negu %[tmp_3], %[tmp_3] \n\t"
+ "sh %[tmp_5], 2(%[real]) \n\t"
+ "sh %[tmp_5], 0(%[freq_buf_s]) \n\t"
+ "sh %[tmp_6], 2(%[imag]) \n\t"
+ "sh %[tmp_6], 2(%[freq_buf_s]) \n\t"
+ "negu %[tmp_6], %[tmp_6] \n\t"
+ "addiu %[freq_buf_s], %[freq_buf_s], -8 \n\t"
+ "addiu %[real], %[real], 4 \n\t"
+ "addiu %[imag], %[imag], 4 \n\t"
+ "sh %[tmp_2], 0(%[freq_buf_f]) \n\t"
+ "sh %[tmp_3], 2(%[freq_buf_f]) \n\t"
+ "sh %[tmp_5], 4(%[freq_buf_f]) \n\t"
+ "sh %[tmp_6], 6(%[freq_buf_f]) \n\t"
+ "blt %[loop_count], %[loop_size], 1b \n\t"
+ " addiu %[freq_buf_f], %[freq_buf_f], 8 \n\t"
+ //last two samples:
+ "lh %[tmp_1], 0(%[noiseSupFilter]) \n\t"
+ "lh %[tmp_2], 0(%[real]) \n\t"
+ "lh %[tmp_3], 0(%[imag]) \n\t"
+ "lh %[tmp_4], 2(%[noiseSupFilter]) \n\t"
+ "lh %[tmp_5], 2(%[real]) \n\t"
+ "lh %[tmp_6], 2(%[imag]) \n\t"
+ "mul %[tmp_2], %[tmp_2], %[tmp_1] \n\t"
+ "mul %[tmp_3], %[tmp_3], %[tmp_1] \n\t"
+ "mul %[tmp_5], %[tmp_5], %[tmp_4] \n\t"
+ "mul %[tmp_6], %[tmp_6], %[tmp_4] \n\t"
+ "sra %[tmp_2], %[tmp_2], 14 \n\t"
+ "sra %[tmp_3], %[tmp_3], 14 \n\t"
+ "sra %[tmp_5], %[tmp_5], 14 \n\t"
+ "sra %[tmp_6], %[tmp_6], 14 \n\t"
+ "sh %[tmp_2], 0(%[real]) \n\t"
+ "sh %[tmp_2], 4(%[freq_buf_s]) \n\t"
+ "sh %[tmp_3], 0(%[imag]) \n\t"
+ "sh %[tmp_3], 6(%[freq_buf_s]) \n\t"
+ "negu %[tmp_3], %[tmp_3] \n\t"
+ "sh %[tmp_2], 0(%[freq_buf_f]) \n\t"
+ "sh %[tmp_3], 2(%[freq_buf_f]) \n\t"
+ "sh %[tmp_5], 4(%[freq_buf_f]) \n\t"
+ "sh %[tmp_6], 6(%[freq_buf_f]) \n\t"
+ "sh %[tmp_5], 2(%[real]) \n\t"
+ "sh %[tmp_6], 2(%[imag]) \n\t"
+ ".set pop \n\t"
+ : [real] "+r" (real), [imag] "+r" (imag),
+ [freq_buf_f] "+r" (freq_buf_f), [freq_buf_s] "+r" (freq_buf_s),
+ [loop_count] "+r" (loop_count), [noiseSupFilter] "+r" (noiseSupFilter),
+ [tmp_1] "=&r" (tmp_1), [tmp_2] "=&r" (tmp_2), [tmp_3] "=&r" (tmp_3),
+ [tmp_4] "=&r" (tmp_4), [tmp_5] "=&r" (tmp_5), [tmp_6] "=&r" (tmp_6)
+ : [loop_size] "r" (inst->anaLen2)
+ : "memory", "hi", "lo"
+ );
+}
+
+#if defined(MIPS_DSP_R1_LE)
+// Denormalize the real-valued signal |in|, the output from inverse FFT.
+void WebRtcNsx_Denormalize_mips(NoiseSuppressionFixedC* inst,
+ int16_t* in,
+ int factor) {
+ int32_t r0, r1, r2, r3, t0;
+ int len = (int)inst->anaLen;
+ int16_t *out = &inst->real[0];
+ int shift = factor - inst->normData;
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "beqz %[len], 8f \n\t"
+ " nop \n\t"
+ "bltz %[shift], 4f \n\t"
+ " sra %[t0], %[len], 2 \n\t"
+ "beqz %[t0], 2f \n\t"
+ " andi %[len], %[len], 3 \n\t"
+ "1: \n\t"
+ "lh %[r0], 0(%[in]) \n\t"
+ "lh %[r1], 2(%[in]) \n\t"
+ "lh %[r2], 4(%[in]) \n\t"
+ "lh %[r3], 6(%[in]) \n\t"
+ "shllv_s.ph %[r0], %[r0], %[shift] \n\t"
+ "shllv_s.ph %[r1], %[r1], %[shift] \n\t"
+ "shllv_s.ph %[r2], %[r2], %[shift] \n\t"
+ "shllv_s.ph %[r3], %[r3], %[shift] \n\t"
+ "addiu %[in], %[in], 8 \n\t"
+ "addiu %[t0], %[t0], -1 \n\t"
+ "sh %[r0], 0(%[out]) \n\t"
+ "sh %[r1], 2(%[out]) \n\t"
+ "sh %[r2], 4(%[out]) \n\t"
+ "sh %[r3], 6(%[out]) \n\t"
+ "bgtz %[t0], 1b \n\t"
+ " addiu %[out], %[out], 8 \n\t"
+ "2: \n\t"
+ "beqz %[len], 8f \n\t"
+ " nop \n\t"
+ "3: \n\t"
+ "lh %[r0], 0(%[in]) \n\t"
+ "addiu %[in], %[in], 2 \n\t"
+ "addiu %[len], %[len], -1 \n\t"
+ "shllv_s.ph %[r0], %[r0], %[shift] \n\t"
+ "addiu %[out], %[out], 2 \n\t"
+ "bgtz %[len], 3b \n\t"
+ " sh %[r0], -2(%[out]) \n\t"
+ "b 8f \n\t"
+ "4: \n\t"
+ "negu %[shift], %[shift] \n\t"
+ "beqz %[t0], 6f \n\t"
+ " andi %[len], %[len], 3 \n\t"
+ "5: \n\t"
+ "lh %[r0], 0(%[in]) \n\t"
+ "lh %[r1], 2(%[in]) \n\t"
+ "lh %[r2], 4(%[in]) \n\t"
+ "lh %[r3], 6(%[in]) \n\t"
+ "srav %[r0], %[r0], %[shift] \n\t"
+ "srav %[r1], %[r1], %[shift] \n\t"
+ "srav %[r2], %[r2], %[shift] \n\t"
+ "srav %[r3], %[r3], %[shift] \n\t"
+ "addiu %[in], %[in], 8 \n\t"
+ "addiu %[t0], %[t0], -1 \n\t"
+ "sh %[r0], 0(%[out]) \n\t"
+ "sh %[r1], 2(%[out]) \n\t"
+ "sh %[r2], 4(%[out]) \n\t"
+ "sh %[r3], 6(%[out]) \n\t"
+ "bgtz %[t0], 5b \n\t"
+ " addiu %[out], %[out], 8 \n\t"
+ "6: \n\t"
+ "beqz %[len], 8f \n\t"
+ " nop \n\t"
+ "7: \n\t"
+ "lh %[r0], 0(%[in]) \n\t"
+ "addiu %[in], %[in], 2 \n\t"
+ "addiu %[len], %[len], -1 \n\t"
+ "srav %[r0], %[r0], %[shift] \n\t"
+ "addiu %[out], %[out], 2 \n\t"
+ "bgtz %[len], 7b \n\t"
+ " sh %[r0], -2(%[out]) \n\t"
+ "8: \n\t"
+ ".set pop \n\t"
+ : [t0] "=&r" (t0), [r0] "=&r" (r0), [r1] "=&r" (r1),
+ [r2] "=&r" (r2), [r3] "=&r" (r3)
+ : [len] "r" (len), [shift] "r" (shift), [in] "r" (in),
+ [out] "r" (out)
+ : "memory"
+ );
+}
+#endif
+
+// Normalize the real-valued signal |in|, the input to forward FFT.
+void WebRtcNsx_NormalizeRealBuffer_mips(NoiseSuppressionFixedC* inst,
+ const int16_t* in,
+ int16_t* out) {
+ int32_t r0, r1, r2, r3, t0;
+ int len = (int)inst->anaLen;
+ int shift = inst->normData;
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "beqz %[len], 4f \n\t"
+ " sra %[t0], %[len], 2 \n\t"
+ "beqz %[t0], 2f \n\t"
+ " andi %[len], %[len], 3 \n\t"
+ "1: \n\t"
+ "lh %[r0], 0(%[in]) \n\t"
+ "lh %[r1], 2(%[in]) \n\t"
+ "lh %[r2], 4(%[in]) \n\t"
+ "lh %[r3], 6(%[in]) \n\t"
+ "sllv %[r0], %[r0], %[shift] \n\t"
+ "sllv %[r1], %[r1], %[shift] \n\t"
+ "sllv %[r2], %[r2], %[shift] \n\t"
+ "sllv %[r3], %[r3], %[shift] \n\t"
+ "addiu %[in], %[in], 8 \n\t"
+ "addiu %[t0], %[t0], -1 \n\t"
+ "sh %[r0], 0(%[out]) \n\t"
+ "sh %[r1], 2(%[out]) \n\t"
+ "sh %[r2], 4(%[out]) \n\t"
+ "sh %[r3], 6(%[out]) \n\t"
+ "bgtz %[t0], 1b \n\t"
+ " addiu %[out], %[out], 8 \n\t"
+ "2: \n\t"
+ "beqz %[len], 4f \n\t"
+ " nop \n\t"
+ "3: \n\t"
+ "lh %[r0], 0(%[in]) \n\t"
+ "addiu %[in], %[in], 2 \n\t"
+ "addiu %[len], %[len], -1 \n\t"
+ "sllv %[r0], %[r0], %[shift] \n\t"
+ "addiu %[out], %[out], 2 \n\t"
+ "bgtz %[len], 3b \n\t"
+ " sh %[r0], -2(%[out]) \n\t"
+ "4: \n\t"
+ ".set pop \n\t"
+ : [t0] "=&r" (t0), [r0] "=&r" (r0), [r1] "=&r" (r1),
+ [r2] "=&r" (r2), [r3] "=&r" (r3)
+ : [len] "r" (len), [shift] "r" (shift), [in] "r" (in),
+ [out] "r" (out)
+ : "memory"
+ );
+}
+
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core_neon.c b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core_neon.c
new file mode 100644
index 00000000..65788ae2
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core_neon.c
@@ -0,0 +1,598 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/ns/nsx_core.h"
+
+#include <arm_neon.h>
+#include <assert.h>
+
+// Constants to compensate for shifting signal log(2^shifts).
+const int16_t WebRtcNsx_kLogTable[9] = {
+ 0, 177, 355, 532, 710, 887, 1065, 1242, 1420
+};
+
+const int16_t WebRtcNsx_kCounterDiv[201] = {
+ 32767, 16384, 10923, 8192, 6554, 5461, 4681, 4096, 3641, 3277, 2979, 2731,
+ 2521, 2341, 2185, 2048, 1928, 1820, 1725, 1638, 1560, 1489, 1425, 1365, 1311,
+ 1260, 1214, 1170, 1130, 1092, 1057, 1024, 993, 964, 936, 910, 886, 862, 840,
+ 819, 799, 780, 762, 745, 728, 712, 697, 683, 669, 655, 643, 630, 618, 607,
+ 596, 585, 575, 565, 555, 546, 537, 529, 520, 512, 504, 496, 489, 482, 475,
+ 468, 462, 455, 449, 443, 437, 431, 426, 420, 415, 410, 405, 400, 395, 390,
+ 386, 381, 377, 372, 368, 364, 360, 356, 352, 349, 345, 341, 338, 334, 331,
+ 328, 324, 321, 318, 315, 312, 309, 306, 303, 301, 298, 295, 293, 290, 287,
+ 285, 282, 280, 278, 275, 273, 271, 269, 266, 264, 262, 260, 258, 256, 254,
+ 252, 250, 248, 246, 245, 243, 241, 239, 237, 236, 234, 232, 231, 229, 228,
+ 226, 224, 223, 221, 220, 218, 217, 216, 214, 213, 211, 210, 209, 207, 206,
+ 205, 204, 202, 201, 200, 199, 197, 196, 195, 194, 193, 192, 191, 189, 188,
+ 187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174, 173,
+ 172, 172, 171, 170, 169, 168, 167, 166, 165, 165, 164, 163
+};
+
+const int16_t WebRtcNsx_kLogTableFrac[256] = {
+ 0, 1, 3, 4, 6, 7, 9, 10, 11, 13, 14, 16, 17, 18, 20, 21,
+ 22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 36, 37, 38, 40, 41, 42,
+ 44, 45, 46, 47, 49, 50, 51, 52, 54, 55, 56, 57, 59, 60, 61, 62,
+ 63, 65, 66, 67, 68, 69, 71, 72, 73, 74, 75, 77, 78, 79, 80, 81,
+ 82, 84, 85, 86, 87, 88, 89, 90, 92, 93, 94, 95, 96, 97, 98, 99,
+ 100, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 116,
+ 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131,
+ 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146,
+ 147, 148, 149, 150, 151, 152, 153, 154, 155, 155, 156, 157, 158, 159, 160,
+ 161, 162, 163, 164, 165, 166, 167, 168, 169, 169, 170, 171, 172, 173, 174,
+ 175, 176, 177, 178, 178, 179, 180, 181, 182, 183, 184, 185, 185, 186, 187,
+ 188, 189, 190, 191, 192, 192, 193, 194, 195, 196, 197, 198, 198, 199, 200,
+ 201, 202, 203, 203, 204, 205, 206, 207, 208, 208, 209, 210, 211, 212, 212,
+ 213, 214, 215, 216, 216, 217, 218, 219, 220, 220, 221, 222, 223, 224, 224,
+ 225, 226, 227, 228, 228, 229, 230, 231, 231, 232, 233, 234, 234, 235, 236,
+ 237, 238, 238, 239, 240, 241, 241, 242, 243, 244, 244, 245, 246, 247, 247,
+ 248, 249, 249, 250, 251, 252, 252, 253, 254, 255, 255
+};
+
+// Update the noise estimation information.
+static void UpdateNoiseEstimateNeon(NoiseSuppressionFixedC* inst, int offset) {
+ const int16_t kExp2Const = 11819; // Q13
+ int16_t* ptr_noiseEstLogQuantile = NULL;
+ int16_t* ptr_noiseEstQuantile = NULL;
+ int16x4_t kExp2Const16x4 = vdup_n_s16(kExp2Const);
+ int32x4_t twentyOne32x4 = vdupq_n_s32(21);
+ int32x4_t constA32x4 = vdupq_n_s32(0x1fffff);
+ int32x4_t constB32x4 = vdupq_n_s32(0x200000);
+
+ int16_t tmp16 = WebRtcSpl_MaxValueW16(inst->noiseEstLogQuantile + offset,
+ inst->magnLen);
+
+ // Guarantee a Q-domain as high as possible and still fit in int16
+ inst->qNoise = 14 - (int) WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(kExp2Const,
+ tmp16,
+ 21);
+
+ int32x4_t qNoise32x4 = vdupq_n_s32(inst->qNoise);
+
+ for (ptr_noiseEstLogQuantile = &inst->noiseEstLogQuantile[offset],
+ ptr_noiseEstQuantile = &inst->noiseEstQuantile[0];
+ ptr_noiseEstQuantile < &inst->noiseEstQuantile[inst->magnLen - 3];
+ ptr_noiseEstQuantile += 4, ptr_noiseEstLogQuantile += 4) {
+
+ // tmp32no2 = kExp2Const * inst->noiseEstLogQuantile[offset + i];
+ int16x4_t v16x4 = vld1_s16(ptr_noiseEstLogQuantile);
+ int32x4_t v32x4B = vmull_s16(v16x4, kExp2Const16x4);
+
+ // tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac
+ int32x4_t v32x4A = vandq_s32(v32x4B, constA32x4);
+ v32x4A = vorrq_s32(v32x4A, constB32x4);
+
+ // tmp16 = (int16_t)(tmp32no2 >> 21);
+ v32x4B = vshrq_n_s32(v32x4B, 21);
+
+ // tmp16 -= 21;// shift 21 to get result in Q0
+ v32x4B = vsubq_s32(v32x4B, twentyOne32x4);
+
+ // tmp16 += (int16_t) inst->qNoise;
+ // shift to get result in Q(qNoise)
+ v32x4B = vaddq_s32(v32x4B, qNoise32x4);
+
+ // if (tmp16 < 0) {
+ // tmp32no1 >>= -tmp16;
+ // } else {
+ // tmp32no1 <<= tmp16;
+ // }
+ v32x4B = vshlq_s32(v32x4A, v32x4B);
+
+ // tmp16 = WebRtcSpl_SatW32ToW16(tmp32no1);
+ v16x4 = vqmovn_s32(v32x4B);
+
+ //inst->noiseEstQuantile[i] = tmp16;
+ vst1_s16(ptr_noiseEstQuantile, v16x4);
+ }
+
+ // Last iteration:
+
+ // inst->quantile[i]=exp(inst->lquantile[offset+i]);
+ // in Q21
+ int32_t tmp32no2 = kExp2Const * *ptr_noiseEstLogQuantile;
+ int32_t tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac
+
+ tmp16 = (int16_t)(tmp32no2 >> 21);
+ tmp16 -= 21;// shift 21 to get result in Q0
+ tmp16 += (int16_t) inst->qNoise; //shift to get result in Q(qNoise)
+ if (tmp16 < 0) {
+ tmp32no1 >>= -tmp16;
+ } else {
+ tmp32no1 <<= tmp16;
+ }
+ *ptr_noiseEstQuantile = WebRtcSpl_SatW32ToW16(tmp32no1);
+}
+
+// Noise Estimation
+void WebRtcNsx_NoiseEstimationNeon(NoiseSuppressionFixedC* inst,
+ uint16_t* magn,
+ uint32_t* noise,
+ int16_t* q_noise) {
+ int16_t lmagn[HALF_ANAL_BLOCKL], counter, countDiv;
+ int16_t countProd, delta, zeros, frac;
+ int16_t log2, tabind, logval, tmp16, tmp16no1, tmp16no2;
+ const int16_t log2_const = 22713;
+ const int16_t width_factor = 21845;
+
+ size_t i, s, offset;
+
+ tabind = inst->stages - inst->normData;
+ assert(tabind < 9);
+ assert(tabind > -9);
+ if (tabind < 0) {
+ logval = -WebRtcNsx_kLogTable[-tabind];
+ } else {
+ logval = WebRtcNsx_kLogTable[tabind];
+ }
+
+ int16x8_t logval_16x8 = vdupq_n_s16(logval);
+
+ // lmagn(i)=log(magn(i))=log(2)*log2(magn(i))
+ // magn is in Q(-stages), and the real lmagn values are:
+ // real_lmagn(i)=log(magn(i)*2^stages)=log(magn(i))+log(2^stages)
+ // lmagn in Q8
+ for (i = 0; i < inst->magnLen; i++) {
+ if (magn[i]) {
+ zeros = WebRtcSpl_NormU32((uint32_t)magn[i]);
+ frac = (int16_t)((((uint32_t)magn[i] << zeros)
+ & 0x7FFFFFFF) >> 23);
+ assert(frac < 256);
+ // log2(magn(i))
+ log2 = (int16_t)(((31 - zeros) << 8)
+ + WebRtcNsx_kLogTableFrac[frac]);
+ // log2(magn(i))*log(2)
+ lmagn[i] = (int16_t)((log2 * log2_const) >> 15);
+ // + log(2^stages)
+ lmagn[i] += logval;
+ } else {
+ lmagn[i] = logval;
+ }
+ }
+
+ int16x4_t Q3_16x4 = vdup_n_s16(3);
+ int16x8_t WIDTHQ8_16x8 = vdupq_n_s16(WIDTH_Q8);
+ int16x8_t WIDTHFACTOR_16x8 = vdupq_n_s16(width_factor);
+
+ int16_t factor = FACTOR_Q7;
+ if (inst->blockIndex < END_STARTUP_LONG)
+ factor = FACTOR_Q7_STARTUP;
+
+ // Loop over simultaneous estimates
+ for (s = 0; s < SIMULT; s++) {
+ offset = s * inst->magnLen;
+
+ // Get counter values from state
+ counter = inst->noiseEstCounter[s];
+ assert(counter < 201);
+ countDiv = WebRtcNsx_kCounterDiv[counter];
+ countProd = (int16_t)(counter * countDiv);
+
+ // quant_est(...)
+ int16_t deltaBuff[8];
+ int16x4_t tmp16x4_0;
+ int16x4_t tmp16x4_1;
+ int16x4_t countDiv_16x4 = vdup_n_s16(countDiv);
+ int16x8_t countProd_16x8 = vdupq_n_s16(countProd);
+ int16x8_t tmp16x8_0 = vdupq_n_s16(countDiv);
+ int16x8_t prod16x8 = vqrdmulhq_s16(WIDTHFACTOR_16x8, tmp16x8_0);
+ int16x8_t tmp16x8_1;
+ int16x8_t tmp16x8_2;
+ int16x8_t tmp16x8_3;
+ uint16x8_t tmp16x8_4;
+ int32x4_t tmp32x4;
+
+ for (i = 0; i + 7 < inst->magnLen; i += 8) {
+ // Compute delta.
+ // Smaller step size during startup. This prevents from using
+ // unrealistic values causing overflow.
+ tmp16x8_0 = vdupq_n_s16(factor);
+ vst1q_s16(deltaBuff, tmp16x8_0);
+
+ int j;
+ for (j = 0; j < 8; j++) {
+ if (inst->noiseEstDensity[offset + i + j] > 512) {
+ // Get values for deltaBuff by shifting intead of dividing.
+ int factor = WebRtcSpl_NormW16(inst->noiseEstDensity[offset + i + j]);
+ deltaBuff[j] = (int16_t)(FACTOR_Q16 >> (14 - factor));
+ }
+ }
+
+ // Update log quantile estimate
+
+ // tmp16 = (int16_t)((delta * countDiv) >> 14);
+ tmp32x4 = vmull_s16(vld1_s16(&deltaBuff[0]), countDiv_16x4);
+ tmp16x4_1 = vshrn_n_s32(tmp32x4, 14);
+ tmp32x4 = vmull_s16(vld1_s16(&deltaBuff[4]), countDiv_16x4);
+ tmp16x4_0 = vshrn_n_s32(tmp32x4, 14);
+ tmp16x8_0 = vcombine_s16(tmp16x4_1, tmp16x4_0); // Keep for several lines.
+
+ // prepare for the "if" branch
+ // tmp16 += 2;
+ // tmp16_1 = (Word16)(tmp16>>2);
+ tmp16x8_1 = vrshrq_n_s16(tmp16x8_0, 2);
+
+ // inst->noiseEstLogQuantile[offset+i] + tmp16_1;
+ tmp16x8_2 = vld1q_s16(&inst->noiseEstLogQuantile[offset + i]); // Keep
+ tmp16x8_1 = vaddq_s16(tmp16x8_2, tmp16x8_1); // Keep for several lines
+
+ // Prepare for the "else" branch
+ // tmp16 += 1;
+ // tmp16_1 = (Word16)(tmp16>>1);
+ tmp16x8_0 = vrshrq_n_s16(tmp16x8_0, 1);
+
+ // tmp16_2 = (int16_t)((tmp16_1 * 3) >> 1);
+ tmp32x4 = vmull_s16(vget_low_s16(tmp16x8_0), Q3_16x4);
+ tmp16x4_1 = vshrn_n_s32(tmp32x4, 1);
+
+ // tmp16_2 = (int16_t)((tmp16_1 * 3) >> 1);
+ tmp32x4 = vmull_s16(vget_high_s16(tmp16x8_0), Q3_16x4);
+ tmp16x4_0 = vshrn_n_s32(tmp32x4, 1);
+
+ // inst->noiseEstLogQuantile[offset + i] - tmp16_2;
+ tmp16x8_0 = vcombine_s16(tmp16x4_1, tmp16x4_0); // keep
+ tmp16x8_0 = vsubq_s16(tmp16x8_2, tmp16x8_0);
+
+ // logval is the smallest fixed point representation we can have. Values
+ // below that will correspond to values in the interval [0, 1], which
+ // can't possibly occur.
+ tmp16x8_0 = vmaxq_s16(tmp16x8_0, logval_16x8);
+
+ // Do the if-else branches:
+ tmp16x8_3 = vld1q_s16(&lmagn[i]); // keep for several lines
+ tmp16x8_4 = vcgtq_s16(tmp16x8_3, tmp16x8_2);
+ tmp16x8_2 = vbslq_s16(tmp16x8_4, tmp16x8_1, tmp16x8_0);
+ vst1q_s16(&inst->noiseEstLogQuantile[offset + i], tmp16x8_2);
+
+ // Update density estimate
+ // tmp16_1 + tmp16_2
+ tmp16x8_1 = vld1q_s16(&inst->noiseEstDensity[offset + i]);
+ tmp16x8_0 = vqrdmulhq_s16(tmp16x8_1, countProd_16x8);
+ tmp16x8_0 = vaddq_s16(tmp16x8_0, prod16x8);
+
+ // lmagn[i] - inst->noiseEstLogQuantile[offset + i]
+ tmp16x8_3 = vsubq_s16(tmp16x8_3, tmp16x8_2);
+ tmp16x8_3 = vabsq_s16(tmp16x8_3);
+ tmp16x8_4 = vcgtq_s16(WIDTHQ8_16x8, tmp16x8_3);
+ tmp16x8_1 = vbslq_s16(tmp16x8_4, tmp16x8_0, tmp16x8_1);
+ vst1q_s16(&inst->noiseEstDensity[offset + i], tmp16x8_1);
+ } // End loop over magnitude spectrum
+
+ // Last iteration over magnitude spectrum:
+ // compute delta
+ if (inst->noiseEstDensity[offset + i] > 512) {
+ // Get values for deltaBuff by shifting intead of dividing.
+ int factor = WebRtcSpl_NormW16(inst->noiseEstDensity[offset + i]);
+ delta = (int16_t)(FACTOR_Q16 >> (14 - factor));
+ } else {
+ delta = FACTOR_Q7;
+ if (inst->blockIndex < END_STARTUP_LONG) {
+ // Smaller step size during startup. This prevents from using
+ // unrealistic values causing overflow.
+ delta = FACTOR_Q7_STARTUP;
+ }
+ }
+ // update log quantile estimate
+ tmp16 = (int16_t)((delta * countDiv) >> 14);
+ if (lmagn[i] > inst->noiseEstLogQuantile[offset + i]) {
+ // +=QUANTILE*delta/(inst->counter[s]+1) QUANTILE=0.25, =1 in Q2
+ // CounterDiv=1/(inst->counter[s]+1) in Q15
+ tmp16 += 2;
+ inst->noiseEstLogQuantile[offset + i] += tmp16 / 4;
+ } else {
+ tmp16 += 1;
+ // *(1-QUANTILE), in Q2 QUANTILE=0.25, 1-0.25=0.75=3 in Q2
+ // TODO(bjornv): investigate why we need to truncate twice.
+ tmp16no2 = (int16_t)((tmp16 / 2) * 3 / 2);
+ inst->noiseEstLogQuantile[offset + i] -= tmp16no2;
+ if (inst->noiseEstLogQuantile[offset + i] < logval) {
+ // logval is the smallest fixed point representation we can have.
+ // Values below that will correspond to values in the interval
+ // [0, 1], which can't possibly occur.
+ inst->noiseEstLogQuantile[offset + i] = logval;
+ }
+ }
+
+ // update density estimate
+ if (WEBRTC_SPL_ABS_W16(lmagn[i] - inst->noiseEstLogQuantile[offset + i])
+ < WIDTH_Q8) {
+ tmp16no1 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
+ inst->noiseEstDensity[offset + i], countProd, 15);
+ tmp16no2 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
+ width_factor, countDiv, 15);
+ inst->noiseEstDensity[offset + i] = tmp16no1 + tmp16no2;
+ }
+
+
+ if (counter >= END_STARTUP_LONG) {
+ inst->noiseEstCounter[s] = 0;
+ if (inst->blockIndex >= END_STARTUP_LONG) {
+ UpdateNoiseEstimateNeon(inst, offset);
+ }
+ }
+ inst->noiseEstCounter[s]++;
+
+ } // end loop over simultaneous estimates
+
+ // Sequentially update the noise during startup
+ if (inst->blockIndex < END_STARTUP_LONG) {
+ UpdateNoiseEstimateNeon(inst, offset);
+ }
+
+ for (i = 0; i < inst->magnLen; i++) {
+ noise[i] = (uint32_t)(inst->noiseEstQuantile[i]); // Q(qNoise)
+ }
+ (*q_noise) = (int16_t)inst->qNoise;
+}
+
+// Filter the data in the frequency domain, and create spectrum.
+void WebRtcNsx_PrepareSpectrumNeon(NoiseSuppressionFixedC* inst,
+ int16_t* freq_buf) {
+ assert(inst->magnLen % 8 == 1);
+ assert(inst->anaLen2 % 16 == 0);
+
+ // (1) Filtering.
+
+ // Fixed point C code for the next block is as follows:
+ // for (i = 0; i < inst->magnLen; i++) {
+ // inst->real[i] = (int16_t)((inst->real[i] *
+ // (int16_t)(inst->noiseSupFilter[i])) >> 14); // Q(normData-stages)
+ // inst->imag[i] = (int16_t)((inst->imag[i] *
+ // (int16_t)(inst->noiseSupFilter[i])) >> 14); // Q(normData-stages)
+ // }
+
+ int16_t* preal = &inst->real[0];
+ int16_t* pimag = &inst->imag[0];
+ int16_t* pns_filter = (int16_t*)&inst->noiseSupFilter[0];
+ int16_t* pimag_end = pimag + inst->magnLen - 4;
+
+ while (pimag < pimag_end) {
+ int16x8_t real = vld1q_s16(preal);
+ int16x8_t imag = vld1q_s16(pimag);
+ int16x8_t ns_filter = vld1q_s16(pns_filter);
+
+ int32x4_t tmp_r_0 = vmull_s16(vget_low_s16(real), vget_low_s16(ns_filter));
+ int32x4_t tmp_i_0 = vmull_s16(vget_low_s16(imag), vget_low_s16(ns_filter));
+ int32x4_t tmp_r_1 = vmull_s16(vget_high_s16(real),
+ vget_high_s16(ns_filter));
+ int32x4_t tmp_i_1 = vmull_s16(vget_high_s16(imag),
+ vget_high_s16(ns_filter));
+
+ int16x4_t result_r_0 = vshrn_n_s32(tmp_r_0, 14);
+ int16x4_t result_i_0 = vshrn_n_s32(tmp_i_0, 14);
+ int16x4_t result_r_1 = vshrn_n_s32(tmp_r_1, 14);
+ int16x4_t result_i_1 = vshrn_n_s32(tmp_i_1, 14);
+
+ vst1q_s16(preal, vcombine_s16(result_r_0, result_r_1));
+ vst1q_s16(pimag, vcombine_s16(result_i_0, result_i_1));
+ preal += 8;
+ pimag += 8;
+ pns_filter += 8;
+ }
+
+ // Filter the last element
+ *preal = (int16_t)((*preal * *pns_filter) >> 14);
+ *pimag = (int16_t)((*pimag * *pns_filter) >> 14);
+
+ // (2) Create spectrum.
+
+ // Fixed point C code for the rest of the function is as follows:
+ // freq_buf[0] = inst->real[0];
+ // freq_buf[1] = -inst->imag[0];
+ // for (i = 1, j = 2; i < inst->anaLen2; i += 1, j += 2) {
+ // freq_buf[j] = inst->real[i];
+ // freq_buf[j + 1] = -inst->imag[i];
+ // }
+ // freq_buf[inst->anaLen] = inst->real[inst->anaLen2];
+ // freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2];
+
+ preal = &inst->real[0];
+ pimag = &inst->imag[0];
+ pimag_end = pimag + inst->anaLen2;
+ int16_t * freq_buf_start = freq_buf;
+ while (pimag < pimag_end) {
+ // loop unroll
+ int16x8x2_t real_imag_0;
+ int16x8x2_t real_imag_1;
+ real_imag_0.val[1] = vld1q_s16(pimag);
+ real_imag_0.val[0] = vld1q_s16(preal);
+ preal += 8;
+ pimag += 8;
+ real_imag_1.val[1] = vld1q_s16(pimag);
+ real_imag_1.val[0] = vld1q_s16(preal);
+ preal += 8;
+ pimag += 8;
+
+ real_imag_0.val[1] = vnegq_s16(real_imag_0.val[1]);
+ real_imag_1.val[1] = vnegq_s16(real_imag_1.val[1]);
+ vst2q_s16(freq_buf_start, real_imag_0);
+ freq_buf_start += 16;
+ vst2q_s16(freq_buf_start, real_imag_1);
+ freq_buf_start += 16;
+ }
+ freq_buf[inst->anaLen] = inst->real[inst->anaLen2];
+ freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2];
+}
+
+// For the noise supress process, synthesis, read out fully processed segment,
+// and update synthesis buffer.
+void WebRtcNsx_SynthesisUpdateNeon(NoiseSuppressionFixedC* inst,
+ int16_t* out_frame,
+ int16_t gain_factor) {
+ assert(inst->anaLen % 16 == 0);
+ assert(inst->blockLen10ms % 16 == 0);
+
+ int16_t* preal_start = inst->real;
+ const int16_t* pwindow = inst->window;
+ int16_t* preal_end = preal_start + inst->anaLen;
+ int16_t* psynthesis_buffer = inst->synthesisBuffer;
+
+ while (preal_start < preal_end) {
+ // Loop unroll.
+ int16x8_t window_0 = vld1q_s16(pwindow);
+ int16x8_t real_0 = vld1q_s16(preal_start);
+ int16x8_t synthesis_buffer_0 = vld1q_s16(psynthesis_buffer);
+
+ int16x8_t window_1 = vld1q_s16(pwindow + 8);
+ int16x8_t real_1 = vld1q_s16(preal_start + 8);
+ int16x8_t synthesis_buffer_1 = vld1q_s16(psynthesis_buffer + 8);
+
+ int32x4_t tmp32a_0_low = vmull_s16(vget_low_s16(real_0),
+ vget_low_s16(window_0));
+ int32x4_t tmp32a_0_high = vmull_s16(vget_high_s16(real_0),
+ vget_high_s16(window_0));
+
+ int32x4_t tmp32a_1_low = vmull_s16(vget_low_s16(real_1),
+ vget_low_s16(window_1));
+ int32x4_t tmp32a_1_high = vmull_s16(vget_high_s16(real_1),
+ vget_high_s16(window_1));
+
+ int16x4_t tmp16a_0_low = vqrshrn_n_s32(tmp32a_0_low, 14);
+ int16x4_t tmp16a_0_high = vqrshrn_n_s32(tmp32a_0_high, 14);
+
+ int16x4_t tmp16a_1_low = vqrshrn_n_s32(tmp32a_1_low, 14);
+ int16x4_t tmp16a_1_high = vqrshrn_n_s32(tmp32a_1_high, 14);
+
+ int32x4_t tmp32b_0_low = vmull_n_s16(tmp16a_0_low, gain_factor);
+ int32x4_t tmp32b_0_high = vmull_n_s16(tmp16a_0_high, gain_factor);
+
+ int32x4_t tmp32b_1_low = vmull_n_s16(tmp16a_1_low, gain_factor);
+ int32x4_t tmp32b_1_high = vmull_n_s16(tmp16a_1_high, gain_factor);
+
+ int16x4_t tmp16b_0_low = vqrshrn_n_s32(tmp32b_0_low, 13);
+ int16x4_t tmp16b_0_high = vqrshrn_n_s32(tmp32b_0_high, 13);
+
+ int16x4_t tmp16b_1_low = vqrshrn_n_s32(tmp32b_1_low, 13);
+ int16x4_t tmp16b_1_high = vqrshrn_n_s32(tmp32b_1_high, 13);
+
+ synthesis_buffer_0 = vqaddq_s16(vcombine_s16(tmp16b_0_low, tmp16b_0_high),
+ synthesis_buffer_0);
+ synthesis_buffer_1 = vqaddq_s16(vcombine_s16(tmp16b_1_low, tmp16b_1_high),
+ synthesis_buffer_1);
+ vst1q_s16(psynthesis_buffer, synthesis_buffer_0);
+ vst1q_s16(psynthesis_buffer + 8, synthesis_buffer_1);
+
+ pwindow += 16;
+ preal_start += 16;
+ psynthesis_buffer += 16;
+ }
+
+ // Read out fully processed segment.
+ int16_t * p_start = inst->synthesisBuffer;
+ int16_t * p_end = inst->synthesisBuffer + inst->blockLen10ms;
+ int16_t * p_frame = out_frame;
+ while (p_start < p_end) {
+ int16x8_t frame_0 = vld1q_s16(p_start);
+ vst1q_s16(p_frame, frame_0);
+ p_start += 8;
+ p_frame += 8;
+ }
+
+ // Update synthesis buffer.
+ int16_t* p_start_src = inst->synthesisBuffer + inst->blockLen10ms;
+ int16_t* p_end_src = inst->synthesisBuffer + inst->anaLen;
+ int16_t* p_start_dst = inst->synthesisBuffer;
+ while (p_start_src < p_end_src) {
+ int16x8_t frame = vld1q_s16(p_start_src);
+ vst1q_s16(p_start_dst, frame);
+ p_start_src += 8;
+ p_start_dst += 8;
+ }
+
+ p_start = inst->synthesisBuffer + inst->anaLen - inst->blockLen10ms;
+ p_end = p_start + inst->blockLen10ms;
+ int16x8_t zero = vdupq_n_s16(0);
+ for (;p_start < p_end; p_start += 8) {
+ vst1q_s16(p_start, zero);
+ }
+}
+
+// Update analysis buffer for lower band, and window data before FFT.
+void WebRtcNsx_AnalysisUpdateNeon(NoiseSuppressionFixedC* inst,
+ int16_t* out,
+ int16_t* new_speech) {
+ assert(inst->blockLen10ms % 16 == 0);
+ assert(inst->anaLen % 16 == 0);
+
+ // For lower band update analysis buffer.
+ // memcpy(inst->analysisBuffer, inst->analysisBuffer + inst->blockLen10ms,
+ // (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->analysisBuffer));
+ int16_t* p_start_src = inst->analysisBuffer + inst->blockLen10ms;
+ int16_t* p_end_src = inst->analysisBuffer + inst->anaLen;
+ int16_t* p_start_dst = inst->analysisBuffer;
+ while (p_start_src < p_end_src) {
+ int16x8_t frame = vld1q_s16(p_start_src);
+ vst1q_s16(p_start_dst, frame);
+
+ p_start_src += 8;
+ p_start_dst += 8;
+ }
+
+ // memcpy(inst->analysisBuffer + inst->anaLen - inst->blockLen10ms,
+ // new_speech, inst->blockLen10ms * sizeof(*inst->analysisBuffer));
+ p_start_src = new_speech;
+ p_end_src = new_speech + inst->blockLen10ms;
+ p_start_dst = inst->analysisBuffer + inst->anaLen - inst->blockLen10ms;
+ while (p_start_src < p_end_src) {
+ int16x8_t frame = vld1q_s16(p_start_src);
+ vst1q_s16(p_start_dst, frame);
+
+ p_start_src += 8;
+ p_start_dst += 8;
+ }
+
+ // Window data before FFT.
+ int16_t* p_start_window = (int16_t*) inst->window;
+ int16_t* p_start_buffer = inst->analysisBuffer;
+ int16_t* p_start_out = out;
+ const int16_t* p_end_out = out + inst->anaLen;
+
+ // Load the first element to reduce pipeline bubble.
+ int16x8_t window = vld1q_s16(p_start_window);
+ int16x8_t buffer = vld1q_s16(p_start_buffer);
+ p_start_window += 8;
+ p_start_buffer += 8;
+
+ while (p_start_out < p_end_out) {
+ // Unroll loop.
+ int32x4_t tmp32_low = vmull_s16(vget_low_s16(window), vget_low_s16(buffer));
+ int32x4_t tmp32_high = vmull_s16(vget_high_s16(window),
+ vget_high_s16(buffer));
+ window = vld1q_s16(p_start_window);
+ buffer = vld1q_s16(p_start_buffer);
+
+ int16x4_t result_low = vrshrn_n_s32(tmp32_low, 14);
+ int16x4_t result_high = vrshrn_n_s32(tmp32_high, 14);
+ vst1q_s16(p_start_out, vcombine_s16(result_low, result_high));
+
+ p_start_buffer += 8;
+ p_start_window += 8;
+ p_start_out += 8;
+ }
+}
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_defines.h b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_defines.h
new file mode 100644
index 00000000..862dc3ca
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_defines.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_DEFINES_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_DEFINES_H_
+
+#define ANAL_BLOCKL_MAX 256 /* Max analysis block length */
+#define HALF_ANAL_BLOCKL 129 /* Half max analysis block length + 1 */
+#define NUM_HIGH_BANDS_MAX 2 /* Max number of high bands */
+#define SIMULT 3
+#define END_STARTUP_LONG 200
+#define END_STARTUP_SHORT 50
+#define FACTOR_Q16 2621440 /* 40 in Q16 */
+#define FACTOR_Q7 5120 /* 40 in Q7 */
+#define FACTOR_Q7_STARTUP 1024 /* 8 in Q7 */
+#define WIDTH_Q8 3 /* 0.01 in Q8 (or 25 ) */
+
+/* PARAMETERS FOR NEW METHOD */
+#define DD_PR_SNR_Q11 2007 /* ~= Q11(0.98) DD update of prior SNR */
+#define ONE_MINUS_DD_PR_SNR_Q11 41 /* DD update of prior SNR */
+#define SPECT_FLAT_TAVG_Q14 4915 /* (0.30) tavg parameter for spectral flatness measure */
+#define SPECT_DIFF_TAVG_Q8 77 /* (0.30) tavg parameter for spectral flatness measure */
+#define PRIOR_UPDATE_Q14 1638 /* Q14(0.1) Update parameter of prior model */
+#define NOISE_UPDATE_Q8 26 /* 26 ~= Q8(0.1) Update parameter for noise */
+
+/* Probability threshold for noise state in speech/noise likelihood. */
+#define ONE_MINUS_PROB_RANGE_Q8 205 /* 205 ~= Q8(0.8) */
+#define HIST_PAR_EST 1000 /* Histogram size for estimation of parameters */
+
+/* FEATURE EXTRACTION CONFIG */
+/* Bin size of histogram */
+#define BIN_SIZE_LRT 10
+/* Scale parameters: multiply dominant peaks of the histograms by scale factor to obtain. */
+/* Thresholds for prior model */
+#define FACTOR_1_LRT_DIFF 6 /* For LRT and spectral difference (5 times bigger) */
+/* For spectral_flatness: used when noise is flatter than speech (10 times bigger). */
+#define FACTOR_2_FLAT_Q10 922
+/* Peak limit for spectral flatness (varies between 0 and 1) */
+#define THRES_PEAK_FLAT 24 /* * 2 * BIN_SIZE_FLAT_FX */
+/* Limit on spacing of two highest peaks in histogram: spacing determined by bin size. */
+#define LIM_PEAK_SPACE_FLAT_DIFF 4 /* * 2 * BIN_SIZE_DIFF_FX */
+/* Limit on relevance of second peak */
+#define LIM_PEAK_WEIGHT_FLAT_DIFF 2
+#define THRES_FLUCT_LRT 10240 /* = 20 * inst->modelUpdate; fluctuation limit of LRT feat. */
+/* Limit on the max and min values for the feature thresholds */
+#define MAX_FLAT_Q10 38912 /* * 2 * BIN_SIZE_FLAT_FX */
+#define MIN_FLAT_Q10 4096 /* * 2 * BIN_SIZE_FLAT_FX */
+#define MAX_DIFF 100 /* * 2 * BIN_SIZE_DIFF_FX */
+#define MIN_DIFF 16 /* * 2 * BIN_SIZE_DIFF_FX */
+/* Criteria of weight of histogram peak to accept/reject feature */
+#define THRES_WEIGHT_FLAT_DIFF 154 /*(int)(0.3*(inst->modelUpdate)) for flatness and difference */
+
+#define STAT_UPDATES 9 /* Update every 512 = 1 << 9 block */
+#define ONE_MINUS_GAMMA_PAUSE_Q8 13 /* ~= Q8(0.05) Update for conservative noise estimate */
+#define GAMMA_NOISE_TRANS_AND_SPEECH_Q8 3 /* ~= Q8(0.01) Update for transition and noise region */
+
+#endif /* WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_DEFINES_H_ */
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/windows_private.h b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/windows_private.h
new file mode 100644
index 00000000..44c2e846
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/windows_private.h
@@ -0,0 +1,574 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_WINDOWS_PRIVATE_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_WINDOWS_PRIVATE_H_
+
+// Hanning window for 4ms 16kHz
+static const float kHanning64w128[128] = {
+ 0.00000000000000f, 0.02454122852291f, 0.04906767432742f,
+ 0.07356456359967f, 0.09801714032956f, 0.12241067519922f,
+ 0.14673047445536f, 0.17096188876030f, 0.19509032201613f,
+ 0.21910124015687f, 0.24298017990326f, 0.26671275747490f,
+ 0.29028467725446f, 0.31368174039889f, 0.33688985339222f,
+ 0.35989503653499f, 0.38268343236509f, 0.40524131400499f,
+ 0.42755509343028f, 0.44961132965461f, 0.47139673682600f,
+ 0.49289819222978f, 0.51410274419322f, 0.53499761988710f,
+ 0.55557023301960f, 0.57580819141785f, 0.59569930449243f,
+ 0.61523159058063f, 0.63439328416365f, 0.65317284295378f,
+ 0.67155895484702f, 0.68954054473707f, 0.70710678118655f,
+ 0.72424708295147f, 0.74095112535496f, 0.75720884650648f,
+ 0.77301045336274f, 0.78834642762661f, 0.80320753148064f,
+ 0.81758481315158f, 0.83146961230255f, 0.84485356524971f,
+ 0.85772861000027f, 0.87008699110871f, 0.88192126434835f,
+ 0.89322430119552f, 0.90398929312344f, 0.91420975570353f,
+ 0.92387953251129f, 0.93299279883474f, 0.94154406518302f,
+ 0.94952818059304f, 0.95694033573221f, 0.96377606579544f,
+ 0.97003125319454f, 0.97570213003853f, 0.98078528040323f,
+ 0.98527764238894f, 0.98917650996478f, 0.99247953459871f,
+ 0.99518472667220f, 0.99729045667869f, 0.99879545620517f,
+ 0.99969881869620f, 1.00000000000000f,
+ 0.99969881869620f, 0.99879545620517f, 0.99729045667869f,
+ 0.99518472667220f, 0.99247953459871f, 0.98917650996478f,
+ 0.98527764238894f, 0.98078528040323f, 0.97570213003853f,
+ 0.97003125319454f, 0.96377606579544f, 0.95694033573221f,
+ 0.94952818059304f, 0.94154406518302f, 0.93299279883474f,
+ 0.92387953251129f, 0.91420975570353f, 0.90398929312344f,
+ 0.89322430119552f, 0.88192126434835f, 0.87008699110871f,
+ 0.85772861000027f, 0.84485356524971f, 0.83146961230255f,
+ 0.81758481315158f, 0.80320753148064f, 0.78834642762661f,
+ 0.77301045336274f, 0.75720884650648f, 0.74095112535496f,
+ 0.72424708295147f, 0.70710678118655f, 0.68954054473707f,
+ 0.67155895484702f, 0.65317284295378f, 0.63439328416365f,
+ 0.61523159058063f, 0.59569930449243f, 0.57580819141785f,
+ 0.55557023301960f, 0.53499761988710f, 0.51410274419322f,
+ 0.49289819222978f, 0.47139673682600f, 0.44961132965461f,
+ 0.42755509343028f, 0.40524131400499f, 0.38268343236509f,
+ 0.35989503653499f, 0.33688985339222f, 0.31368174039889f,
+ 0.29028467725446f, 0.26671275747490f, 0.24298017990326f,
+ 0.21910124015687f, 0.19509032201613f, 0.17096188876030f,
+ 0.14673047445536f, 0.12241067519922f, 0.09801714032956f,
+ 0.07356456359967f, 0.04906767432742f, 0.02454122852291f
+};
+
+
+
+// hybrib Hanning & flat window
+static const float kBlocks80w128[128] = {
+ (float)0.00000000, (float)0.03271908, (float)0.06540313, (float)0.09801714, (float)0.13052619,
+ (float)0.16289547, (float)0.19509032, (float)0.22707626, (float)0.25881905, (float)0.29028468,
+ (float)0.32143947, (float)0.35225005, (float)0.38268343, (float)0.41270703, (float)0.44228869,
+ (float)0.47139674, (float)0.50000000, (float)0.52806785, (float)0.55557023, (float)0.58247770,
+ (float)0.60876143, (float)0.63439328, (float)0.65934582, (float)0.68359230, (float)0.70710678,
+ (float)0.72986407, (float)0.75183981, (float)0.77301045, (float)0.79335334, (float)0.81284668,
+ (float)0.83146961, (float)0.84920218, (float)0.86602540, (float)0.88192126, (float)0.89687274,
+ (float)0.91086382, (float)0.92387953, (float)0.93590593, (float)0.94693013, (float)0.95694034,
+ (float)0.96592583, (float)0.97387698, (float)0.98078528, (float)0.98664333, (float)0.99144486,
+ (float)0.99518473, (float)0.99785892, (float)0.99946459, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)0.99946459, (float)0.99785892, (float)0.99518473, (float)0.99144486,
+ (float)0.98664333, (float)0.98078528, (float)0.97387698, (float)0.96592583, (float)0.95694034,
+ (float)0.94693013, (float)0.93590593, (float)0.92387953, (float)0.91086382, (float)0.89687274,
+ (float)0.88192126, (float)0.86602540, (float)0.84920218, (float)0.83146961, (float)0.81284668,
+ (float)0.79335334, (float)0.77301045, (float)0.75183981, (float)0.72986407, (float)0.70710678,
+ (float)0.68359230, (float)0.65934582, (float)0.63439328, (float)0.60876143, (float)0.58247770,
+ (float)0.55557023, (float)0.52806785, (float)0.50000000, (float)0.47139674, (float)0.44228869,
+ (float)0.41270703, (float)0.38268343, (float)0.35225005, (float)0.32143947, (float)0.29028468,
+ (float)0.25881905, (float)0.22707626, (float)0.19509032, (float)0.16289547, (float)0.13052619,
+ (float)0.09801714, (float)0.06540313, (float)0.03271908
+};
+
+// hybrib Hanning & flat window
+static const float kBlocks160w256[256] = {
+ (float)0.00000000, (float)0.01636173, (float)0.03271908, (float)0.04906767, (float)0.06540313,
+ (float)0.08172107, (float)0.09801714, (float)0.11428696, (float)0.13052619, (float)0.14673047,
+ (float)0.16289547, (float)0.17901686, (float)0.19509032, (float)0.21111155, (float)0.22707626,
+ (float)0.24298018, (float)0.25881905, (float)0.27458862, (float)0.29028468, (float)0.30590302,
+ (float)0.32143947, (float)0.33688985, (float)0.35225005, (float)0.36751594, (float)0.38268343,
+ (float)0.39774847, (float)0.41270703, (float)0.42755509, (float)0.44228869, (float)0.45690388,
+ (float)0.47139674, (float)0.48576339, (float)0.50000000, (float)0.51410274, (float)0.52806785,
+ (float)0.54189158, (float)0.55557023, (float)0.56910015, (float)0.58247770, (float)0.59569930,
+ (float)0.60876143, (float)0.62166057, (float)0.63439328, (float)0.64695615, (float)0.65934582,
+ (float)0.67155895, (float)0.68359230, (float)0.69544264, (float)0.70710678, (float)0.71858162,
+ (float)0.72986407, (float)0.74095113, (float)0.75183981, (float)0.76252720, (float)0.77301045,
+ (float)0.78328675, (float)0.79335334, (float)0.80320753, (float)0.81284668, (float)0.82226822,
+ (float)0.83146961, (float)0.84044840, (float)0.84920218, (float)0.85772861, (float)0.86602540,
+ (float)0.87409034, (float)0.88192126, (float)0.88951608, (float)0.89687274, (float)0.90398929,
+ (float)0.91086382, (float)0.91749450, (float)0.92387953, (float)0.93001722, (float)0.93590593,
+ (float)0.94154407, (float)0.94693013, (float)0.95206268, (float)0.95694034, (float)0.96156180,
+ (float)0.96592583, (float)0.97003125, (float)0.97387698, (float)0.97746197, (float)0.98078528,
+ (float)0.98384601, (float)0.98664333, (float)0.98917651, (float)0.99144486, (float)0.99344778,
+ (float)0.99518473, (float)0.99665524, (float)0.99785892, (float)0.99879546, (float)0.99946459,
+ (float)0.99986614, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)0.99986614, (float)0.99946459, (float)0.99879546, (float)0.99785892,
+ (float)0.99665524, (float)0.99518473, (float)0.99344778, (float)0.99144486, (float)0.98917651,
+ (float)0.98664333, (float)0.98384601, (float)0.98078528, (float)0.97746197, (float)0.97387698,
+ (float)0.97003125, (float)0.96592583, (float)0.96156180, (float)0.95694034, (float)0.95206268,
+ (float)0.94693013, (float)0.94154407, (float)0.93590593, (float)0.93001722, (float)0.92387953,
+ (float)0.91749450, (float)0.91086382, (float)0.90398929, (float)0.89687274, (float)0.88951608,
+ (float)0.88192126, (float)0.87409034, (float)0.86602540, (float)0.85772861, (float)0.84920218,
+ (float)0.84044840, (float)0.83146961, (float)0.82226822, (float)0.81284668, (float)0.80320753,
+ (float)0.79335334, (float)0.78328675, (float)0.77301045, (float)0.76252720, (float)0.75183981,
+ (float)0.74095113, (float)0.72986407, (float)0.71858162, (float)0.70710678, (float)0.69544264,
+ (float)0.68359230, (float)0.67155895, (float)0.65934582, (float)0.64695615, (float)0.63439328,
+ (float)0.62166057, (float)0.60876143, (float)0.59569930, (float)0.58247770, (float)0.56910015,
+ (float)0.55557023, (float)0.54189158, (float)0.52806785, (float)0.51410274, (float)0.50000000,
+ (float)0.48576339, (float)0.47139674, (float)0.45690388, (float)0.44228869, (float)0.42755509,
+ (float)0.41270703, (float)0.39774847, (float)0.38268343, (float)0.36751594, (float)0.35225005,
+ (float)0.33688985, (float)0.32143947, (float)0.30590302, (float)0.29028468, (float)0.27458862,
+ (float)0.25881905, (float)0.24298018, (float)0.22707626, (float)0.21111155, (float)0.19509032,
+ (float)0.17901686, (float)0.16289547, (float)0.14673047, (float)0.13052619, (float)0.11428696,
+ (float)0.09801714, (float)0.08172107, (float)0.06540313, (float)0.04906767, (float)0.03271908,
+ (float)0.01636173
+};
+
+// hybrib Hanning & flat window: for 20ms
+static const float kBlocks320w512[512] = {
+ (float)0.00000000, (float)0.00818114, (float)0.01636173, (float)0.02454123, (float)0.03271908,
+ (float)0.04089475, (float)0.04906767, (float)0.05723732, (float)0.06540313, (float)0.07356456,
+ (float)0.08172107, (float)0.08987211, (float)0.09801714, (float)0.10615561, (float)0.11428696,
+ (float)0.12241068, (float)0.13052619, (float)0.13863297, (float)0.14673047, (float)0.15481816,
+ (float)0.16289547, (float)0.17096189, (float)0.17901686, (float)0.18705985, (float)0.19509032,
+ (float)0.20310773, (float)0.21111155, (float)0.21910124, (float)0.22707626, (float)0.23503609,
+ (float)0.24298018, (float)0.25090801, (float)0.25881905, (float)0.26671276, (float)0.27458862,
+ (float)0.28244610, (float)0.29028468, (float)0.29810383, (float)0.30590302, (float)0.31368174,
+ (float)0.32143947, (float)0.32917568, (float)0.33688985, (float)0.34458148, (float)0.35225005,
+ (float)0.35989504, (float)0.36751594, (float)0.37511224, (float)0.38268343, (float)0.39022901,
+ (float)0.39774847, (float)0.40524131, (float)0.41270703, (float)0.42014512, (float)0.42755509,
+ (float)0.43493645, (float)0.44228869, (float)0.44961133, (float)0.45690388, (float)0.46416584,
+ (float)0.47139674, (float)0.47859608, (float)0.48576339, (float)0.49289819, (float)0.50000000,
+ (float)0.50706834, (float)0.51410274, (float)0.52110274, (float)0.52806785, (float)0.53499762,
+ (float)0.54189158, (float)0.54874927, (float)0.55557023, (float)0.56235401, (float)0.56910015,
+ (float)0.57580819, (float)0.58247770, (float)0.58910822, (float)0.59569930, (float)0.60225052,
+ (float)0.60876143, (float)0.61523159, (float)0.62166057, (float)0.62804795, (float)0.63439328,
+ (float)0.64069616, (float)0.64695615, (float)0.65317284, (float)0.65934582, (float)0.66547466,
+ (float)0.67155895, (float)0.67759830, (float)0.68359230, (float)0.68954054, (float)0.69544264,
+ (float)0.70129818, (float)0.70710678, (float)0.71286806, (float)0.71858162, (float)0.72424708,
+ (float)0.72986407, (float)0.73543221, (float)0.74095113, (float)0.74642045, (float)0.75183981,
+ (float)0.75720885, (float)0.76252720, (float)0.76779452, (float)0.77301045, (float)0.77817464,
+ (float)0.78328675, (float)0.78834643, (float)0.79335334, (float)0.79830715, (float)0.80320753,
+ (float)0.80805415, (float)0.81284668, (float)0.81758481, (float)0.82226822, (float)0.82689659,
+ (float)0.83146961, (float)0.83598698, (float)0.84044840, (float)0.84485357, (float)0.84920218,
+ (float)0.85349396, (float)0.85772861, (float)0.86190585, (float)0.86602540, (float)0.87008699,
+ (float)0.87409034, (float)0.87803519, (float)0.88192126, (float)0.88574831, (float)0.88951608,
+ (float)0.89322430, (float)0.89687274, (float)0.90046115, (float)0.90398929, (float)0.90745693,
+ (float)0.91086382, (float)0.91420976, (float)0.91749450, (float)0.92071783, (float)0.92387953,
+ (float)0.92697940, (float)0.93001722, (float)0.93299280, (float)0.93590593, (float)0.93875641,
+ (float)0.94154407, (float)0.94426870, (float)0.94693013, (float)0.94952818, (float)0.95206268,
+ (float)0.95453345, (float)0.95694034, (float)0.95928317, (float)0.96156180, (float)0.96377607,
+ (float)0.96592583, (float)0.96801094, (float)0.97003125, (float)0.97198664, (float)0.97387698,
+ (float)0.97570213, (float)0.97746197, (float)0.97915640, (float)0.98078528, (float)0.98234852,
+ (float)0.98384601, (float)0.98527764, (float)0.98664333, (float)0.98794298, (float)0.98917651,
+ (float)0.99034383, (float)0.99144486, (float)0.99247953, (float)0.99344778, (float)0.99434953,
+ (float)0.99518473, (float)0.99595331, (float)0.99665524, (float)0.99729046, (float)0.99785892,
+ (float)0.99836060, (float)0.99879546, (float)0.99916346, (float)0.99946459, (float)0.99969882,
+ (float)0.99986614, (float)0.99996653, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+ (float)1.00000000, (float)0.99996653, (float)0.99986614, (float)0.99969882, (float)0.99946459,
+ (float)0.99916346, (float)0.99879546, (float)0.99836060, (float)0.99785892, (float)0.99729046,
+ (float)0.99665524, (float)0.99595331, (float)0.99518473, (float)0.99434953, (float)0.99344778,
+ (float)0.99247953, (float)0.99144486, (float)0.99034383, (float)0.98917651, (float)0.98794298,
+ (float)0.98664333, (float)0.98527764, (float)0.98384601, (float)0.98234852, (float)0.98078528,
+ (float)0.97915640, (float)0.97746197, (float)0.97570213, (float)0.97387698, (float)0.97198664,
+ (float)0.97003125, (float)0.96801094, (float)0.96592583, (float)0.96377607, (float)0.96156180,
+ (float)0.95928317, (float)0.95694034, (float)0.95453345, (float)0.95206268, (float)0.94952818,
+ (float)0.94693013, (float)0.94426870, (float)0.94154407, (float)0.93875641, (float)0.93590593,
+ (float)0.93299280, (float)0.93001722, (float)0.92697940, (float)0.92387953, (float)0.92071783,
+ (float)0.91749450, (float)0.91420976, (float)0.91086382, (float)0.90745693, (float)0.90398929,
+ (float)0.90046115, (float)0.89687274, (float)0.89322430, (float)0.88951608, (float)0.88574831,
+ (float)0.88192126, (float)0.87803519, (float)0.87409034, (float)0.87008699, (float)0.86602540,
+ (float)0.86190585, (float)0.85772861, (float)0.85349396, (float)0.84920218, (float)0.84485357,
+ (float)0.84044840, (float)0.83598698, (float)0.83146961, (float)0.82689659, (float)0.82226822,
+ (float)0.81758481, (float)0.81284668, (float)0.80805415, (float)0.80320753, (float)0.79830715,
+ (float)0.79335334, (float)0.78834643, (float)0.78328675, (float)0.77817464, (float)0.77301045,
+ (float)0.76779452, (float)0.76252720, (float)0.75720885, (float)0.75183981, (float)0.74642045,
+ (float)0.74095113, (float)0.73543221, (float)0.72986407, (float)0.72424708, (float)0.71858162,
+ (float)0.71286806, (float)0.70710678, (float)0.70129818, (float)0.69544264, (float)0.68954054,
+ (float)0.68359230, (float)0.67759830, (float)0.67155895, (float)0.66547466, (float)0.65934582,
+ (float)0.65317284, (float)0.64695615, (float)0.64069616, (float)0.63439328, (float)0.62804795,
+ (float)0.62166057, (float)0.61523159, (float)0.60876143, (float)0.60225052, (float)0.59569930,
+ (float)0.58910822, (float)0.58247770, (float)0.57580819, (float)0.56910015, (float)0.56235401,
+ (float)0.55557023, (float)0.54874927, (float)0.54189158, (float)0.53499762, (float)0.52806785,
+ (float)0.52110274, (float)0.51410274, (float)0.50706834, (float)0.50000000, (float)0.49289819,
+ (float)0.48576339, (float)0.47859608, (float)0.47139674, (float)0.46416584, (float)0.45690388,
+ (float)0.44961133, (float)0.44228869, (float)0.43493645, (float)0.42755509, (float)0.42014512,
+ (float)0.41270703, (float)0.40524131, (float)0.39774847, (float)0.39022901, (float)0.38268343,
+ (float)0.37511224, (float)0.36751594, (float)0.35989504, (float)0.35225005, (float)0.34458148,
+ (float)0.33688985, (float)0.32917568, (float)0.32143947, (float)0.31368174, (float)0.30590302,
+ (float)0.29810383, (float)0.29028468, (float)0.28244610, (float)0.27458862, (float)0.26671276,
+ (float)0.25881905, (float)0.25090801, (float)0.24298018, (float)0.23503609, (float)0.22707626,
+ (float)0.21910124, (float)0.21111155, (float)0.20310773, (float)0.19509032, (float)0.18705985,
+ (float)0.17901686, (float)0.17096189, (float)0.16289547, (float)0.15481816, (float)0.14673047,
+ (float)0.13863297, (float)0.13052619, (float)0.12241068, (float)0.11428696, (float)0.10615561,
+ (float)0.09801714, (float)0.08987211, (float)0.08172107, (float)0.07356456, (float)0.06540313,
+ (float)0.05723732, (float)0.04906767, (float)0.04089475, (float)0.03271908, (float)0.02454123,
+ (float)0.01636173, (float)0.00818114
+};
+
+
+// Hanning window: for 15ms at 16kHz with symmetric zeros
+static const float kBlocks240w512[512] = {
+ (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000,
+ (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000,
+ (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000,
+ (float)0.00000000, (float)0.00000000, (float)0.00654494, (float)0.01308960, (float)0.01963369,
+ (float)0.02617695, (float)0.03271908, (float)0.03925982, (float)0.04579887, (float)0.05233596,
+ (float)0.05887080, (float)0.06540313, (float)0.07193266, (float)0.07845910, (float)0.08498218,
+ (float)0.09150162, (float)0.09801714, (float)0.10452846, (float)0.11103531, (float)0.11753740,
+ (float)0.12403446, (float)0.13052620, (float)0.13701233, (float)0.14349262, (float)0.14996676,
+ (float)0.15643448, (float)0.16289547, (float)0.16934951, (float)0.17579629, (float)0.18223552,
+ (float)0.18866697, (float)0.19509032, (float)0.20150533, (float)0.20791170, (float)0.21430916,
+ (float)0.22069745, (float)0.22707628, (float)0.23344538, (float)0.23980446, (float)0.24615330,
+ (float)0.25249159, (float)0.25881904, (float)0.26513544, (float)0.27144045, (float)0.27773386,
+ (float)0.28401536, (float)0.29028466, (float)0.29654160, (float)0.30278578, (float)0.30901700,
+ (float)0.31523499, (float)0.32143945, (float)0.32763019, (float)0.33380687, (float)0.33996925,
+ (float)0.34611708, (float)0.35225007, (float)0.35836795, (float)0.36447051, (float)0.37055743,
+ (float)0.37662852, (float)0.38268346, (float)0.38872197, (float)0.39474389, (float)0.40074885,
+ (float)0.40673664, (float)0.41270703, (float)0.41865975, (float)0.42459452, (float)0.43051112,
+ (float)0.43640924, (float)0.44228873, (float)0.44814920, (float)0.45399052, (float)0.45981237,
+ (float)0.46561453, (float)0.47139674, (float)0.47715878, (float)0.48290035, (float)0.48862126,
+ (float)0.49432120, (float)0.50000000, (float)0.50565743, (float)0.51129311, (float)0.51690692,
+ (float)0.52249855, (float)0.52806789, (float)0.53361452, (float)0.53913832, (float)0.54463905,
+ (float)0.55011642, (float)0.55557024, (float)0.56100029, (float)0.56640625, (float)0.57178795,
+ (float)0.57714522, (float)0.58247769, (float)0.58778524, (float)0.59306765, (float)0.59832460,
+ (float)0.60355598, (float)0.60876143, (float)0.61394083, (float)0.61909395, (float)0.62422055,
+ (float)0.62932038, (float)0.63439333, (float)0.63943899, (float)0.64445734, (float)0.64944810,
+ (float)0.65441096, (float)0.65934587, (float)0.66425246, (float)0.66913062, (float)0.67398012,
+ (float)0.67880076, (float)0.68359232, (float)0.68835455, (float)0.69308740, (float)0.69779050,
+ (float)0.70246369, (float)0.70710677, (float)0.71171963, (float)0.71630198, (float)0.72085363,
+ (float)0.72537440, (float)0.72986406, (float)0.73432255, (float)0.73874950, (float)0.74314487,
+ (float)0.74750835, (float)0.75183982, (float)0.75613910, (float)0.76040596, (float)0.76464027,
+ (float)0.76884186, (float)0.77301043, (float)0.77714598, (float)0.78124821, (float)0.78531694,
+ (float)0.78935206, (float)0.79335338, (float)0.79732066, (float)0.80125386, (float)0.80515265,
+ (float)0.80901700, (float)0.81284672, (float)0.81664157, (float)0.82040149, (float)0.82412618,
+ (float)0.82781565, (float)0.83146966, (float)0.83508795, (float)0.83867061, (float)0.84221727,
+ (float)0.84572780, (float)0.84920216, (float)0.85264021, (float)0.85604161, (float)0.85940641,
+ (float)0.86273444, (float)0.86602545, (float)0.86927933, (float)0.87249607, (float)0.87567532,
+ (float)0.87881714, (float)0.88192129, (float)0.88498765, (float)0.88801610, (float)0.89100653,
+ (float)0.89395881, (float)0.89687276, (float)0.89974827, (float)0.90258533, (float)0.90538365,
+ (float)0.90814316, (float)0.91086388, (float)0.91354549, (float)0.91618794, (float)0.91879123,
+ (float)0.92135513, (float)0.92387950, (float)0.92636442, (float)0.92880958, (float)0.93121493,
+ (float)0.93358046, (float)0.93590593, (float)0.93819135, (float)0.94043654, (float)0.94264150,
+ (float)0.94480604, (float)0.94693011, (float)0.94901365, (float)0.95105654, (float)0.95305866,
+ (float)0.95501995, (float)0.95694035, (float)0.95881975, (float)0.96065807, (float)0.96245527,
+ (float)0.96421117, (float)0.96592581, (float)0.96759909, (float)0.96923089, (float)0.97082120,
+ (float)0.97236991, (float)0.97387701, (float)0.97534233, (float)0.97676587, (float)0.97814763,
+ (float)0.97948742, (float)0.98078531, (float)0.98204112, (float)0.98325491, (float)0.98442656,
+ (float)0.98555607, (float)0.98664331, (float)0.98768836, (float)0.98869103, (float)0.98965138,
+ (float)0.99056935, (float)0.99144489, (float)0.99227792, (float)0.99306846, (float)0.99381649,
+ (float)0.99452192, (float)0.99518472, (float)0.99580491, (float)0.99638247, (float)0.99691731,
+ (float)0.99740952, (float)0.99785894, (float)0.99826562, (float)0.99862951, (float)0.99895066,
+ (float)0.99922901, (float)0.99946457, (float)0.99965733, (float)0.99980724, (float)0.99991435,
+ (float)0.99997860, (float)1.00000000, (float)0.99997860, (float)0.99991435, (float)0.99980724,
+ (float)0.99965733, (float)0.99946457, (float)0.99922901, (float)0.99895066, (float)0.99862951,
+ (float)0.99826562, (float)0.99785894, (float)0.99740946, (float)0.99691731, (float)0.99638247,
+ (float)0.99580491, (float)0.99518472, (float)0.99452192, (float)0.99381644, (float)0.99306846,
+ (float)0.99227792, (float)0.99144489, (float)0.99056935, (float)0.98965138, (float)0.98869103,
+ (float)0.98768836, (float)0.98664331, (float)0.98555607, (float)0.98442656, (float)0.98325491,
+ (float)0.98204112, (float)0.98078525, (float)0.97948742, (float)0.97814757, (float)0.97676587,
+ (float)0.97534227, (float)0.97387695, (float)0.97236991, (float)0.97082120, (float)0.96923089,
+ (float)0.96759909, (float)0.96592581, (float)0.96421117, (float)0.96245521, (float)0.96065807,
+ (float)0.95881969, (float)0.95694029, (float)0.95501995, (float)0.95305860, (float)0.95105648,
+ (float)0.94901365, (float)0.94693011, (float)0.94480604, (float)0.94264150, (float)0.94043654,
+ (float)0.93819129, (float)0.93590593, (float)0.93358046, (float)0.93121493, (float)0.92880952,
+ (float)0.92636436, (float)0.92387950, (float)0.92135507, (float)0.91879123, (float)0.91618794,
+ (float)0.91354543, (float)0.91086382, (float)0.90814310, (float)0.90538365, (float)0.90258527,
+ (float)0.89974827, (float)0.89687276, (float)0.89395875, (float)0.89100647, (float)0.88801610,
+ (float)0.88498759, (float)0.88192123, (float)0.87881714, (float)0.87567532, (float)0.87249595,
+ (float)0.86927933, (float)0.86602539, (float)0.86273432, (float)0.85940641, (float)0.85604161,
+ (float)0.85264009, (float)0.84920216, (float)0.84572780, (float)0.84221715, (float)0.83867055,
+ (float)0.83508795, (float)0.83146954, (float)0.82781565, (float)0.82412612, (float)0.82040137,
+ (float)0.81664157, (float)0.81284660, (float)0.80901700, (float)0.80515265, (float)0.80125374,
+ (float)0.79732066, (float)0.79335332, (float)0.78935200, (float)0.78531694, (float)0.78124815,
+ (float)0.77714586, (float)0.77301049, (float)0.76884180, (float)0.76464021, (float)0.76040596,
+ (float)0.75613904, (float)0.75183970, (float)0.74750835, (float)0.74314481, (float)0.73874938,
+ (float)0.73432249, (float)0.72986400, (float)0.72537428, (float)0.72085363, (float)0.71630186,
+ (float)0.71171951, (float)0.70710677, (float)0.70246363, (float)0.69779032, (float)0.69308734,
+ (float)0.68835449, (float)0.68359220, (float)0.67880070, (float)0.67398006, (float)0.66913044,
+ (float)0.66425240, (float)0.65934575, (float)0.65441096, (float)0.64944804, (float)0.64445722,
+ (float)0.63943905, (float)0.63439327, (float)0.62932026, (float)0.62422055, (float)0.61909389,
+ (float)0.61394072, (float)0.60876143, (float)0.60355592, (float)0.59832448, (float)0.59306765,
+ (float)0.58778518, (float)0.58247757, (float)0.57714522, (float)0.57178789, (float)0.56640613,
+ (float)0.56100023, (float)0.55557019, (float)0.55011630, (float)0.54463905, (float)0.53913826,
+ (float)0.53361434, (float)0.52806783, (float)0.52249849, (float)0.51690674, (float)0.51129305,
+ (float)0.50565726, (float)0.50000006, (float)0.49432117, (float)0.48862115, (float)0.48290038,
+ (float)0.47715873, (float)0.47139663, (float)0.46561456, (float)0.45981231, (float)0.45399037,
+ (float)0.44814920, (float)0.44228864, (float)0.43640912, (float)0.43051112, (float)0.42459446,
+ (float)0.41865960, (float)0.41270703, (float)0.40673658, (float)0.40074870, (float)0.39474386,
+ (float)0.38872188, (float)0.38268328, (float)0.37662849, (float)0.37055734, (float)0.36447033,
+ (float)0.35836792, (float)0.35224995, (float)0.34611690, (float)0.33996922, (float)0.33380675,
+ (float)0.32763001, (float)0.32143945, (float)0.31523487, (float)0.30901679, (float)0.30278572,
+ (float)0.29654145, (float)0.29028472, (float)0.28401530, (float)0.27773371, (float)0.27144048,
+ (float)0.26513538, (float)0.25881892, (float)0.25249159, (float)0.24615324, (float)0.23980433,
+ (float)0.23344538, (float)0.22707619, (float)0.22069728, (float)0.21430916, (float)0.20791161,
+ (float)0.20150517, (float)0.19509031, (float)0.18866688, (float)0.18223536, (float)0.17579627,
+ (float)0.16934940, (float)0.16289529, (float)0.15643445, (float)0.14996666, (float)0.14349243,
+ (float)0.13701232, (float)0.13052608, (float)0.12403426, (float)0.11753736, (float)0.11103519,
+ (float)0.10452849, (float)0.09801710, (float)0.09150149, (float)0.08498220, (float)0.07845904,
+ (float)0.07193252, (float)0.06540315, (float)0.05887074, (float)0.05233581, (float)0.04579888,
+ (float)0.03925974, (float)0.03271893, (float)0.02617695, (float)0.01963361, (float)0.01308943,
+ (float)0.00654493, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000,
+ (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000,
+ (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000,
+ (float)0.00000000, (float)0.00000000
+};
+
+
+// Hanning window: for 30ms with 1024 fft with symmetric zeros at 16kHz
+static const float kBlocks480w1024[1024] = {
+ (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000,
+ (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000,
+ (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000,
+ (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000,
+ (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000,
+ (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000,
+ (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00327249, (float)0.00654494,
+ (float)0.00981732, (float)0.01308960, (float)0.01636173, (float)0.01963369, (float)0.02290544,
+ (float)0.02617695, (float)0.02944817, (float)0.03271908, (float)0.03598964, (float)0.03925982,
+ (float)0.04252957, (float)0.04579887, (float)0.04906768, (float)0.05233596, (float)0.05560368,
+ (float)0.05887080, (float)0.06213730, (float)0.06540313, (float)0.06866825, (float)0.07193266,
+ (float)0.07519628, (float)0.07845910, (float)0.08172107, (float)0.08498218, (float)0.08824237,
+ (float)0.09150162, (float)0.09475989, (float)0.09801714, (float)0.10127335, (float)0.10452846,
+ (float)0.10778246, (float)0.11103531, (float)0.11428697, (float)0.11753740, (float)0.12078657,
+ (float)0.12403446, (float)0.12728101, (float)0.13052620, (float)0.13376999, (float)0.13701233,
+ (float)0.14025325, (float)0.14349262, (float)0.14673047, (float)0.14996676, (float)0.15320145,
+ (float)0.15643448, (float)0.15966582, (float)0.16289547, (float)0.16612339, (float)0.16934951,
+ (float)0.17257382, (float)0.17579629, (float)0.17901687, (float)0.18223552, (float)0.18545224,
+ (float)0.18866697, (float)0.19187967, (float)0.19509032, (float)0.19829889, (float)0.20150533,
+ (float)0.20470962, (float)0.20791170, (float)0.21111156, (float)0.21430916, (float)0.21750447,
+ (float)0.22069745, (float)0.22388805, (float)0.22707628, (float)0.23026206, (float)0.23344538,
+ (float)0.23662618, (float)0.23980446, (float)0.24298020, (float)0.24615330, (float)0.24932377,
+ (float)0.25249159, (float)0.25565669, (float)0.25881904, (float)0.26197866, (float)0.26513544,
+ (float)0.26828939, (float)0.27144045, (float)0.27458861, (float)0.27773386, (float)0.28087610,
+ (float)0.28401536, (float)0.28715158, (float)0.29028466, (float)0.29341471, (float)0.29654160,
+ (float)0.29966527, (float)0.30278578, (float)0.30590302, (float)0.30901700, (float)0.31212768,
+ (float)0.31523499, (float)0.31833893, (float)0.32143945, (float)0.32453656, (float)0.32763019,
+ (float)0.33072028, (float)0.33380687, (float)0.33688986, (float)0.33996925, (float)0.34304500,
+ (float)0.34611708, (float)0.34918544, (float)0.35225007, (float)0.35531089, (float)0.35836795,
+ (float)0.36142117, (float)0.36447051, (float)0.36751595, (float)0.37055743, (float)0.37359497,
+ (float)0.37662852, (float)0.37965801, (float)0.38268346, (float)0.38570479, (float)0.38872197,
+ (float)0.39173502, (float)0.39474389, (float)0.39774847, (float)0.40074885, (float)0.40374491,
+ (float)0.40673664, (float)0.40972406, (float)0.41270703, (float)0.41568562, (float)0.41865975,
+ (float)0.42162940, (float)0.42459452, (float)0.42755508, (float)0.43051112, (float)0.43346250,
+ (float)0.43640924, (float)0.43935132, (float)0.44228873, (float)0.44522133, (float)0.44814920,
+ (float)0.45107228, (float)0.45399052, (float)0.45690390, (float)0.45981237, (float)0.46271592,
+ (float)0.46561453, (float)0.46850815, (float)0.47139674, (float)0.47428030, (float)0.47715878,
+ (float)0.48003215, (float)0.48290035, (float)0.48576337, (float)0.48862126, (float)0.49147385,
+ (float)0.49432120, (float)0.49716330, (float)0.50000000, (float)0.50283140, (float)0.50565743,
+ (float)0.50847799, (float)0.51129311, (float)0.51410276, (float)0.51690692, (float)0.51970553,
+ (float)0.52249855, (float)0.52528602, (float)0.52806789, (float)0.53084403, (float)0.53361452,
+ (float)0.53637928, (float)0.53913832, (float)0.54189163, (float)0.54463905, (float)0.54738063,
+ (float)0.55011642, (float)0.55284631, (float)0.55557024, (float)0.55828828, (float)0.56100029,
+ (float)0.56370628, (float)0.56640625, (float)0.56910014, (float)0.57178795, (float)0.57446963,
+ (float)0.57714522, (float)0.57981455, (float)0.58247769, (float)0.58513463, (float)0.58778524,
+ (float)0.59042960, (float)0.59306765, (float)0.59569931, (float)0.59832460, (float)0.60094351,
+ (float)0.60355598, (float)0.60616195, (float)0.60876143, (float)0.61135441, (float)0.61394083,
+ (float)0.61652070, (float)0.61909395, (float)0.62166059, (float)0.62422055, (float)0.62677383,
+ (float)0.62932038, (float)0.63186020, (float)0.63439333, (float)0.63691956, (float)0.63943899,
+ (float)0.64195162, (float)0.64445734, (float)0.64695615, (float)0.64944810, (float)0.65193301,
+ (float)0.65441096, (float)0.65688187, (float)0.65934587, (float)0.66180271, (float)0.66425246,
+ (float)0.66669512, (float)0.66913062, (float)0.67155898, (float)0.67398012, (float)0.67639405,
+ (float)0.67880076, (float)0.68120021, (float)0.68359232, (float)0.68597710, (float)0.68835455,
+ (float)0.69072467, (float)0.69308740, (float)0.69544262, (float)0.69779050, (float)0.70013082,
+ (float)0.70246369, (float)0.70478904, (float)0.70710677, (float)0.70941699, (float)0.71171963,
+ (float)0.71401459, (float)0.71630198, (float)0.71858168, (float)0.72085363, (float)0.72311789,
+ (float)0.72537440, (float)0.72762316, (float)0.72986406, (float)0.73209721, (float)0.73432255,
+ (float)0.73653996, (float)0.73874950, (float)0.74095118, (float)0.74314487, (float)0.74533057,
+ (float)0.74750835, (float)0.74967808, (float)0.75183982, (float)0.75399351, (float)0.75613910,
+ (float)0.75827658, (float)0.76040596, (float)0.76252723, (float)0.76464027, (float)0.76674515,
+ (float)0.76884186, (float)0.77093029, (float)0.77301043, (float)0.77508241, (float)0.77714598,
+ (float)0.77920127, (float)0.78124821, (float)0.78328675, (float)0.78531694, (float)0.78733873,
+ (float)0.78935206, (float)0.79135692, (float)0.79335338, (float)0.79534125, (float)0.79732066,
+ (float)0.79929149, (float)0.80125386, (float)0.80320752, (float)0.80515265, (float)0.80708915,
+ (float)0.80901700, (float)0.81093621, (float)0.81284672, (float)0.81474853, (float)0.81664157,
+ (float)0.81852591, (float)0.82040149, (float)0.82226825, (float)0.82412618, (float)0.82597536,
+ (float)0.82781565, (float)0.82964706, (float)0.83146966, (float)0.83328325, (float)0.83508795,
+ (float)0.83688378, (float)0.83867061, (float)0.84044838, (float)0.84221727, (float)0.84397703,
+ (float)0.84572780, (float)0.84746957, (float)0.84920216, (float)0.85092574, (float)0.85264021,
+ (float)0.85434544, (float)0.85604161, (float)0.85772866, (float)0.85940641, (float)0.86107504,
+ (float)0.86273444, (float)0.86438453, (float)0.86602545, (float)0.86765707, (float)0.86927933,
+ (float)0.87089235, (float)0.87249607, (float)0.87409031, (float)0.87567532, (float)0.87725097,
+ (float)0.87881714, (float)0.88037390, (float)0.88192129, (float)0.88345921, (float)0.88498765,
+ (float)0.88650668, (float)0.88801610, (float)0.88951612, (float)0.89100653, (float)0.89248741,
+ (float)0.89395881, (float)0.89542055, (float)0.89687276, (float)0.89831537, (float)0.89974827,
+ (float)0.90117162, (float)0.90258533, (float)0.90398932, (float)0.90538365, (float)0.90676826,
+ (float)0.90814316, (float)0.90950841, (float)0.91086388, (float)0.91220951, (float)0.91354549,
+ (float)0.91487163, (float)0.91618794, (float)0.91749454, (float)0.91879123, (float)0.92007810,
+ (float)0.92135513, (float)0.92262226, (float)0.92387950, (float)0.92512691, (float)0.92636442,
+ (float)0.92759192, (float)0.92880958, (float)0.93001723, (float)0.93121493, (float)0.93240267,
+ (float)0.93358046, (float)0.93474817, (float)0.93590593, (float)0.93705362, (float)0.93819135,
+ (float)0.93931901, (float)0.94043654, (float)0.94154406, (float)0.94264150, (float)0.94372880,
+ (float)0.94480604, (float)0.94587320, (float)0.94693011, (float)0.94797695, (float)0.94901365,
+ (float)0.95004016, (float)0.95105654, (float)0.95206273, (float)0.95305866, (float)0.95404440,
+ (float)0.95501995, (float)0.95598525, (float)0.95694035, (float)0.95788521, (float)0.95881975,
+ (float)0.95974404, (float)0.96065807, (float)0.96156180, (float)0.96245527, (float)0.96333838,
+ (float)0.96421117, (float)0.96507370, (float)0.96592581, (float)0.96676767, (float)0.96759909,
+ (float)0.96842021, (float)0.96923089, (float)0.97003126, (float)0.97082120, (float)0.97160077,
+ (float)0.97236991, (float)0.97312868, (float)0.97387701, (float)0.97461486, (float)0.97534233,
+ (float)0.97605932, (float)0.97676587, (float)0.97746199, (float)0.97814763, (float)0.97882277,
+ (float)0.97948742, (float)0.98014158, (float)0.98078531, (float)0.98141843, (float)0.98204112,
+ (float)0.98265332, (float)0.98325491, (float)0.98384601, (float)0.98442656, (float)0.98499662,
+ (float)0.98555607, (float)0.98610497, (float)0.98664331, (float)0.98717111, (float)0.98768836,
+ (float)0.98819500, (float)0.98869103, (float)0.98917651, (float)0.98965138, (float)0.99011570,
+ (float)0.99056935, (float)0.99101239, (float)0.99144489, (float)0.99186671, (float)0.99227792,
+ (float)0.99267852, (float)0.99306846, (float)0.99344778, (float)0.99381649, (float)0.99417448,
+ (float)0.99452192, (float)0.99485862, (float)0.99518472, (float)0.99550015, (float)0.99580491,
+ (float)0.99609905, (float)0.99638247, (float)0.99665523, (float)0.99691731, (float)0.99716878,
+ (float)0.99740952, (float)0.99763954, (float)0.99785894, (float)0.99806762, (float)0.99826562,
+ (float)0.99845290, (float)0.99862951, (float)0.99879545, (float)0.99895066, (float)0.99909520,
+ (float)0.99922901, (float)0.99935216, (float)0.99946457, (float)0.99956632, (float)0.99965733,
+ (float)0.99973762, (float)0.99980724, (float)0.99986613, (float)0.99991435, (float)0.99995178,
+ (float)0.99997860, (float)0.99999464, (float)1.00000000, (float)0.99999464, (float)0.99997860,
+ (float)0.99995178, (float)0.99991435, (float)0.99986613, (float)0.99980724, (float)0.99973762,
+ (float)0.99965733, (float)0.99956632, (float)0.99946457, (float)0.99935216, (float)0.99922901,
+ (float)0.99909520, (float)0.99895066, (float)0.99879545, (float)0.99862951, (float)0.99845290,
+ (float)0.99826562, (float)0.99806762, (float)0.99785894, (float)0.99763954, (float)0.99740946,
+ (float)0.99716872, (float)0.99691731, (float)0.99665523, (float)0.99638247, (float)0.99609905,
+ (float)0.99580491, (float)0.99550015, (float)0.99518472, (float)0.99485862, (float)0.99452192,
+ (float)0.99417448, (float)0.99381644, (float)0.99344778, (float)0.99306846, (float)0.99267852,
+ (float)0.99227792, (float)0.99186671, (float)0.99144489, (float)0.99101239, (float)0.99056935,
+ (float)0.99011564, (float)0.98965138, (float)0.98917651, (float)0.98869103, (float)0.98819494,
+ (float)0.98768836, (float)0.98717111, (float)0.98664331, (float)0.98610497, (float)0.98555607,
+ (float)0.98499656, (float)0.98442656, (float)0.98384601, (float)0.98325491, (float)0.98265326,
+ (float)0.98204112, (float)0.98141843, (float)0.98078525, (float)0.98014158, (float)0.97948742,
+ (float)0.97882277, (float)0.97814757, (float)0.97746193, (float)0.97676587, (float)0.97605932,
+ (float)0.97534227, (float)0.97461486, (float)0.97387695, (float)0.97312862, (float)0.97236991,
+ (float)0.97160077, (float)0.97082120, (float)0.97003126, (float)0.96923089, (float)0.96842015,
+ (float)0.96759909, (float)0.96676761, (float)0.96592581, (float)0.96507365, (float)0.96421117,
+ (float)0.96333838, (float)0.96245521, (float)0.96156180, (float)0.96065807, (float)0.95974404,
+ (float)0.95881969, (float)0.95788515, (float)0.95694029, (float)0.95598525, (float)0.95501995,
+ (float)0.95404440, (float)0.95305860, (float)0.95206267, (float)0.95105648, (float)0.95004016,
+ (float)0.94901365, (float)0.94797695, (float)0.94693011, (float)0.94587314, (float)0.94480604,
+ (float)0.94372880, (float)0.94264150, (float)0.94154406, (float)0.94043654, (float)0.93931895,
+ (float)0.93819129, (float)0.93705362, (float)0.93590593, (float)0.93474817, (float)0.93358046,
+ (float)0.93240267, (float)0.93121493, (float)0.93001723, (float)0.92880952, (float)0.92759192,
+ (float)0.92636436, (float)0.92512691, (float)0.92387950, (float)0.92262226, (float)0.92135507,
+ (float)0.92007804, (float)0.91879123, (float)0.91749448, (float)0.91618794, (float)0.91487157,
+ (float)0.91354543, (float)0.91220951, (float)0.91086382, (float)0.90950835, (float)0.90814310,
+ (float)0.90676820, (float)0.90538365, (float)0.90398932, (float)0.90258527, (float)0.90117157,
+ (float)0.89974827, (float)0.89831525, (float)0.89687276, (float)0.89542055, (float)0.89395875,
+ (float)0.89248741, (float)0.89100647, (float)0.88951600, (float)0.88801610, (float)0.88650662,
+ (float)0.88498759, (float)0.88345915, (float)0.88192123, (float)0.88037384, (float)0.87881714,
+ (float)0.87725091, (float)0.87567532, (float)0.87409031, (float)0.87249595, (float)0.87089223,
+ (float)0.86927933, (float)0.86765701, (float)0.86602539, (float)0.86438447, (float)0.86273432,
+ (float)0.86107504, (float)0.85940641, (float)0.85772860, (float)0.85604161, (float)0.85434544,
+ (float)0.85264009, (float)0.85092574, (float)0.84920216, (float)0.84746951, (float)0.84572780,
+ (float)0.84397697, (float)0.84221715, (float)0.84044844, (float)0.83867055, (float)0.83688372,
+ (float)0.83508795, (float)0.83328319, (float)0.83146954, (float)0.82964706, (float)0.82781565,
+ (float)0.82597530, (float)0.82412612, (float)0.82226813, (float)0.82040137, (float)0.81852591,
+ (float)0.81664157, (float)0.81474847, (float)0.81284660, (float)0.81093609, (float)0.80901700,
+ (float)0.80708915, (float)0.80515265, (float)0.80320752, (float)0.80125374, (float)0.79929143,
+ (float)0.79732066, (float)0.79534125, (float)0.79335332, (float)0.79135686, (float)0.78935200,
+ (float)0.78733861, (float)0.78531694, (float)0.78328675, (float)0.78124815, (float)0.77920121,
+ (float)0.77714586, (float)0.77508223, (float)0.77301049, (float)0.77093029, (float)0.76884180,
+ (float)0.76674509, (float)0.76464021, (float)0.76252711, (float)0.76040596, (float)0.75827658,
+ (float)0.75613904, (float)0.75399339, (float)0.75183970, (float)0.74967796, (float)0.74750835,
+ (float)0.74533057, (float)0.74314481, (float)0.74095106, (float)0.73874938, (float)0.73653996,
+ (float)0.73432249, (float)0.73209721, (float)0.72986400, (float)0.72762305, (float)0.72537428,
+ (float)0.72311789, (float)0.72085363, (float)0.71858162, (float)0.71630186, (float)0.71401453,
+ (float)0.71171951, (float)0.70941705, (float)0.70710677, (float)0.70478898, (float)0.70246363,
+ (float)0.70013070, (float)0.69779032, (float)0.69544268, (float)0.69308734, (float)0.69072461,
+ (float)0.68835449, (float)0.68597704, (float)0.68359220, (float)0.68120021, (float)0.67880070,
+ (float)0.67639399, (float)0.67398006, (float)0.67155886, (float)0.66913044, (float)0.66669512,
+ (float)0.66425240, (float)0.66180259, (float)0.65934575, (float)0.65688181, (float)0.65441096,
+ (float)0.65193301, (float)0.64944804, (float)0.64695609, (float)0.64445722, (float)0.64195150,
+ (float)0.63943905, (float)0.63691956, (float)0.63439327, (float)0.63186014, (float)0.62932026,
+ (float)0.62677372, (float)0.62422055, (float)0.62166059, (float)0.61909389, (float)0.61652064,
+ (float)0.61394072, (float)0.61135429, (float)0.60876143, (float)0.60616189, (float)0.60355592,
+ (float)0.60094339, (float)0.59832448, (float)0.59569913, (float)0.59306765, (float)0.59042960,
+ (float)0.58778518, (float)0.58513451, (float)0.58247757, (float)0.57981461, (float)0.57714522,
+ (float)0.57446963, (float)0.57178789, (float)0.56910002, (float)0.56640613, (float)0.56370628,
+ (float)0.56100023, (float)0.55828822, (float)0.55557019, (float)0.55284619, (float)0.55011630,
+ (float)0.54738069, (float)0.54463905, (float)0.54189152, (float)0.53913826, (float)0.53637916,
+ (float)0.53361434, (float)0.53084403, (float)0.52806783, (float)0.52528596, (float)0.52249849,
+ (float)0.51970541, (float)0.51690674, (float)0.51410276, (float)0.51129305, (float)0.50847787,
+ (float)0.50565726, (float)0.50283122, (float)0.50000006, (float)0.49716327, (float)0.49432117,
+ (float)0.49147379, (float)0.48862115, (float)0.48576325, (float)0.48290038, (float)0.48003212,
+ (float)0.47715873, (float)0.47428021, (float)0.47139663, (float)0.46850798, (float)0.46561456,
+ (float)0.46271589, (float)0.45981231, (float)0.45690379, (float)0.45399037, (float)0.45107210,
+ (float)0.44814920, (float)0.44522130, (float)0.44228864, (float)0.43935123, (float)0.43640912,
+ (float)0.43346232, (float)0.43051112, (float)0.42755505, (float)0.42459446, (float)0.42162928,
+ (float)0.41865960, (float)0.41568545, (float)0.41270703, (float)0.40972400, (float)0.40673658,
+ (float)0.40374479, (float)0.40074870, (float)0.39774850, (float)0.39474386, (float)0.39173496,
+ (float)0.38872188, (float)0.38570464, (float)0.38268328, (float)0.37965804, (float)0.37662849,
+ (float)0.37359491, (float)0.37055734, (float)0.36751580, (float)0.36447033, (float)0.36142117,
+ (float)0.35836792, (float)0.35531086, (float)0.35224995, (float)0.34918529, (float)0.34611690,
+ (float)0.34304500, (float)0.33996922, (float)0.33688980, (float)0.33380675, (float)0.33072016,
+ (float)0.32763001, (float)0.32453656, (float)0.32143945, (float)0.31833887, (float)0.31523487,
+ (float)0.31212750, (float)0.30901679, (float)0.30590302, (float)0.30278572, (float)0.29966521,
+ (float)0.29654145, (float)0.29341453, (float)0.29028472, (float)0.28715155, (float)0.28401530,
+ (float)0.28087601, (float)0.27773371, (float)0.27458847, (float)0.27144048, (float)0.26828936,
+ (float)0.26513538, (float)0.26197854, (float)0.25881892, (float)0.25565651, (float)0.25249159,
+ (float)0.24932374, (float)0.24615324, (float)0.24298008, (float)0.23980433, (float)0.23662600,
+ (float)0.23344538, (float)0.23026201, (float)0.22707619, (float)0.22388794, (float)0.22069728,
+ (float)0.21750426, (float)0.21430916, (float)0.21111152, (float)0.20791161, (float)0.20470949,
+ (float)0.20150517, (float)0.19829892, (float)0.19509031, (float)0.19187963, (float)0.18866688,
+ (float)0.18545210, (float)0.18223536, (float)0.17901689, (float)0.17579627, (float)0.17257376,
+ (float)0.16934940, (float)0.16612324, (float)0.16289529, (float)0.15966584, (float)0.15643445,
+ (float)0.15320137, (float)0.14996666, (float)0.14673033, (float)0.14349243, (float)0.14025325,
+ (float)0.13701232, (float)0.13376991, (float)0.13052608, (float)0.12728085, (float)0.12403426,
+ (float)0.12078657, (float)0.11753736, (float)0.11428688, (float)0.11103519, (float)0.10778230,
+ (float)0.10452849, (float)0.10127334, (float)0.09801710, (float)0.09475980, (float)0.09150149,
+ (float)0.08824220, (float)0.08498220, (float)0.08172106, (float)0.07845904, (float)0.07519618,
+ (float)0.07193252, (float)0.06866808, (float)0.06540315, (float)0.06213728, (float)0.05887074,
+ (float)0.05560357, (float)0.05233581, (float)0.04906749, (float)0.04579888, (float)0.04252954,
+ (float)0.03925974, (float)0.03598953, (float)0.03271893, (float)0.02944798, (float)0.02617695,
+ (float)0.02290541, (float)0.01963361, (float)0.01636161, (float)0.01308943, (float)0.00981712,
+ (float)0.00654493, (float)0.00327244, (float)0.00000000, (float)0.00000000, (float)0.00000000,
+ (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000,
+ (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000,
+ (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000,
+ (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000,
+ (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000,
+ (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000
+};
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_WINDOWS_PRIVATE_H_
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator.c b/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator.c
new file mode 100644
index 00000000..f9f3dc24
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator.c
@@ -0,0 +1,684 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/utility/delay_estimator.h"
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+// Number of right shifts for scaling is linearly depending on number of bits in
+// the far-end binary spectrum.
+static const int kShiftsAtZero = 13; // Right shifts at zero binary spectrum.
+static const int kShiftsLinearSlope = 3;
+
+static const int32_t kProbabilityOffset = 1024; // 2 in Q9.
+static const int32_t kProbabilityLowerLimit = 8704; // 17 in Q9.
+static const int32_t kProbabilityMinSpread = 2816; // 5.5 in Q9.
+
+// Robust validation settings
+static const float kHistogramMax = 3000.f;
+static const float kLastHistogramMax = 250.f;
+static const float kMinHistogramThreshold = 1.5f;
+static const int kMinRequiredHits = 10;
+static const int kMaxHitsWhenPossiblyNonCausal = 10;
+static const int kMaxHitsWhenPossiblyCausal = 1000;
+static const float kQ14Scaling = 1.f / (1 << 14); // Scaling by 2^14 to get Q0.
+static const float kFractionSlope = 0.05f;
+static const float kMinFractionWhenPossiblyCausal = 0.5f;
+static const float kMinFractionWhenPossiblyNonCausal = 0.25f;
+
+// Counts and returns number of bits of a 32-bit word.
+static int BitCount(uint32_t u32) {
+ uint32_t tmp = u32 - ((u32 >> 1) & 033333333333) -
+ ((u32 >> 2) & 011111111111);
+ tmp = ((tmp + (tmp >> 3)) & 030707070707);
+ tmp = (tmp + (tmp >> 6));
+ tmp = (tmp + (tmp >> 12) + (tmp >> 24)) & 077;
+
+ return ((int) tmp);
+}
+
+// Compares the |binary_vector| with all rows of the |binary_matrix| and counts
+// per row the number of times they have the same value.
+//
+// Inputs:
+// - binary_vector : binary "vector" stored in a long
+// - binary_matrix : binary "matrix" stored as a vector of long
+// - matrix_size : size of binary "matrix"
+//
+// Output:
+// - bit_counts : "Vector" stored as a long, containing for each
+// row the number of times the matrix row and the
+// input vector have the same value
+//
+static void BitCountComparison(uint32_t binary_vector,
+ const uint32_t* binary_matrix,
+ int matrix_size,
+ int32_t* bit_counts) {
+ int n = 0;
+
+ // Compare |binary_vector| with all rows of the |binary_matrix|
+ for (; n < matrix_size; n++) {
+ bit_counts[n] = (int32_t) BitCount(binary_vector ^ binary_matrix[n]);
+ }
+}
+
+// Collects necessary statistics for the HistogramBasedValidation(). This
+// function has to be called prior to calling HistogramBasedValidation(). The
+// statistics updated and used by the HistogramBasedValidation() are:
+// 1. the number of |candidate_hits|, which states for how long we have had the
+// same |candidate_delay|
+// 2. the |histogram| of candidate delays over time. This histogram is
+// weighted with respect to a reliability measure and time-varying to cope
+// with possible delay shifts.
+// For further description see commented code.
+//
+// Inputs:
+// - candidate_delay : The delay to validate.
+// - valley_depth_q14 : The cost function has a valley/minimum at the
+// |candidate_delay| location. |valley_depth_q14| is the
+// cost function difference between the minimum and
+// maximum locations. The value is in the Q14 domain.
+// - valley_level_q14 : Is the cost function value at the minimum, in Q14.
+static void UpdateRobustValidationStatistics(BinaryDelayEstimator* self,
+ int candidate_delay,
+ int32_t valley_depth_q14,
+ int32_t valley_level_q14) {
+ const float valley_depth = valley_depth_q14 * kQ14Scaling;
+ float decrease_in_last_set = valley_depth;
+ const int max_hits_for_slow_change = (candidate_delay < self->last_delay) ?
+ kMaxHitsWhenPossiblyNonCausal : kMaxHitsWhenPossiblyCausal;
+ int i = 0;
+
+ assert(self->history_size == self->farend->history_size);
+ // Reset |candidate_hits| if we have a new candidate.
+ if (candidate_delay != self->last_candidate_delay) {
+ self->candidate_hits = 0;
+ self->last_candidate_delay = candidate_delay;
+ }
+ self->candidate_hits++;
+
+ // The |histogram| is updated differently across the bins.
+ // 1. The |candidate_delay| histogram bin is increased with the
+ // |valley_depth|, which is a simple measure of how reliable the
+ // |candidate_delay| is. The histogram is not increased above
+ // |kHistogramMax|.
+ self->histogram[candidate_delay] += valley_depth;
+ if (self->histogram[candidate_delay] > kHistogramMax) {
+ self->histogram[candidate_delay] = kHistogramMax;
+ }
+ // 2. The histogram bins in the neighborhood of |candidate_delay| are
+ // unaffected. The neighborhood is defined as x + {-2, -1, 0, 1}.
+ // 3. The histogram bins in the neighborhood of |last_delay| are decreased
+ // with |decrease_in_last_set|. This value equals the difference between
+ // the cost function values at the locations |candidate_delay| and
+ // |last_delay| until we reach |max_hits_for_slow_change| consecutive hits
+ // at the |candidate_delay|. If we exceed this amount of hits the
+ // |candidate_delay| is a "potential" candidate and we start decreasing
+ // these histogram bins more rapidly with |valley_depth|.
+ if (self->candidate_hits < max_hits_for_slow_change) {
+ decrease_in_last_set = (self->mean_bit_counts[self->compare_delay] -
+ valley_level_q14) * kQ14Scaling;
+ }
+ // 4. All other bins are decreased with |valley_depth|.
+ // TODO(bjornv): Investigate how to make this loop more efficient. Split up
+ // the loop? Remove parts that doesn't add too much.
+ for (i = 0; i < self->history_size; ++i) {
+ int is_in_last_set = (i >= self->last_delay - 2) &&
+ (i <= self->last_delay + 1) && (i != candidate_delay);
+ int is_in_candidate_set = (i >= candidate_delay - 2) &&
+ (i <= candidate_delay + 1);
+ self->histogram[i] -= decrease_in_last_set * is_in_last_set +
+ valley_depth * (!is_in_last_set && !is_in_candidate_set);
+ // 5. No histogram bin can go below 0.
+ if (self->histogram[i] < 0) {
+ self->histogram[i] = 0;
+ }
+ }
+}
+
+// Validates the |candidate_delay|, estimated in WebRtc_ProcessBinarySpectrum(),
+// based on a mix of counting concurring hits with a modified histogram
+// of recent delay estimates. In brief a candidate is valid (returns 1) if it
+// is the most likely according to the histogram. There are a couple of
+// exceptions that are worth mentioning:
+// 1. If the |candidate_delay| < |last_delay| it can be that we are in a
+// non-causal state, breaking a possible echo control algorithm. Hence, we
+// open up for a quicker change by allowing the change even if the
+// |candidate_delay| is not the most likely one according to the histogram.
+// 2. There's a minimum number of hits (kMinRequiredHits) and the histogram
+// value has to reached a minimum (kMinHistogramThreshold) to be valid.
+// 3. The action is also depending on the filter length used for echo control.
+// If the delay difference is larger than what the filter can capture, we
+// also move quicker towards a change.
+// For further description see commented code.
+//
+// Input:
+// - candidate_delay : The delay to validate.
+//
+// Return value:
+// - is_histogram_valid : 1 - The |candidate_delay| is valid.
+// 0 - Otherwise.
+static int HistogramBasedValidation(const BinaryDelayEstimator* self,
+ int candidate_delay) {
+ float fraction = 1.f;
+ float histogram_threshold = self->histogram[self->compare_delay];
+ const int delay_difference = candidate_delay - self->last_delay;
+ int is_histogram_valid = 0;
+
+ // The histogram based validation of |candidate_delay| is done by comparing
+ // the |histogram| at bin |candidate_delay| with a |histogram_threshold|.
+ // This |histogram_threshold| equals a |fraction| of the |histogram| at bin
+ // |last_delay|. The |fraction| is a piecewise linear function of the
+ // |delay_difference| between the |candidate_delay| and the |last_delay|
+ // allowing for a quicker move if
+ // i) a potential echo control filter can not handle these large differences.
+ // ii) keeping |last_delay| instead of updating to |candidate_delay| could
+ // force an echo control into a non-causal state.
+ // We further require the histogram to have reached a minimum value of
+ // |kMinHistogramThreshold|. In addition, we also require the number of
+ // |candidate_hits| to be more than |kMinRequiredHits| to remove spurious
+ // values.
+
+ // Calculate a comparison histogram value (|histogram_threshold|) that is
+ // depending on the distance between the |candidate_delay| and |last_delay|.
+ // TODO(bjornv): How much can we gain by turning the fraction calculation
+ // into tables?
+ if (delay_difference > self->allowed_offset) {
+ fraction = 1.f - kFractionSlope * (delay_difference - self->allowed_offset);
+ fraction = (fraction > kMinFractionWhenPossiblyCausal ? fraction :
+ kMinFractionWhenPossiblyCausal);
+ } else if (delay_difference < 0) {
+ fraction = kMinFractionWhenPossiblyNonCausal -
+ kFractionSlope * delay_difference;
+ fraction = (fraction > 1.f ? 1.f : fraction);
+ }
+ histogram_threshold *= fraction;
+ histogram_threshold = (histogram_threshold > kMinHistogramThreshold ?
+ histogram_threshold : kMinHistogramThreshold);
+
+ is_histogram_valid =
+ (self->histogram[candidate_delay] >= histogram_threshold) &&
+ (self->candidate_hits > kMinRequiredHits);
+
+ return is_histogram_valid;
+}
+
+// Performs a robust validation of the |candidate_delay| estimated in
+// WebRtc_ProcessBinarySpectrum(). The algorithm takes the
+// |is_instantaneous_valid| and the |is_histogram_valid| and combines them
+// into a robust validation. The HistogramBasedValidation() has to be called
+// prior to this call.
+// For further description on how the combination is done, see commented code.
+//
+// Inputs:
+// - candidate_delay : The delay to validate.
+// - is_instantaneous_valid : The instantaneous validation performed in
+// WebRtc_ProcessBinarySpectrum().
+// - is_histogram_valid : The histogram based validation.
+//
+// Return value:
+// - is_robust : 1 - The candidate_delay is valid according to a
+// combination of the two inputs.
+// : 0 - Otherwise.
+static int RobustValidation(const BinaryDelayEstimator* self,
+ int candidate_delay,
+ int is_instantaneous_valid,
+ int is_histogram_valid) {
+ int is_robust = 0;
+
+ // The final robust validation is based on the two algorithms; 1) the
+ // |is_instantaneous_valid| and 2) the histogram based with result stored in
+ // |is_histogram_valid|.
+ // i) Before we actually have a valid estimate (|last_delay| == -2), we say
+ // a candidate is valid if either algorithm states so
+ // (|is_instantaneous_valid| OR |is_histogram_valid|).
+ is_robust = (self->last_delay < 0) &&
+ (is_instantaneous_valid || is_histogram_valid);
+ // ii) Otherwise, we need both algorithms to be certain
+ // (|is_instantaneous_valid| AND |is_histogram_valid|)
+ is_robust |= is_instantaneous_valid && is_histogram_valid;
+ // iii) With one exception, i.e., the histogram based algorithm can overrule
+ // the instantaneous one if |is_histogram_valid| = 1 and the histogram
+ // is significantly strong.
+ is_robust |= is_histogram_valid &&
+ (self->histogram[candidate_delay] > self->last_delay_histogram);
+
+ return is_robust;
+}
+
+void WebRtc_FreeBinaryDelayEstimatorFarend(BinaryDelayEstimatorFarend* self) {
+
+ if (self == NULL) {
+ return;
+ }
+
+ free(self->binary_far_history);
+ self->binary_far_history = NULL;
+
+ free(self->far_bit_counts);
+ self->far_bit_counts = NULL;
+
+ free(self);
+}
+
+BinaryDelayEstimatorFarend* WebRtc_CreateBinaryDelayEstimatorFarend(
+ int history_size) {
+ BinaryDelayEstimatorFarend* self = NULL;
+
+ if (history_size > 1) {
+ // Sanity conditions fulfilled.
+ self = malloc(sizeof(BinaryDelayEstimatorFarend));
+ }
+ if (self == NULL) {
+ return NULL;
+ }
+
+ self->history_size = 0;
+ self->binary_far_history = NULL;
+ self->far_bit_counts = NULL;
+ if (WebRtc_AllocateFarendBufferMemory(self, history_size) == 0) {
+ WebRtc_FreeBinaryDelayEstimatorFarend(self);
+ self = NULL;
+ }
+ return self;
+}
+
+int WebRtc_AllocateFarendBufferMemory(BinaryDelayEstimatorFarend* self,
+ int history_size) {
+ assert(self != NULL);
+ // (Re-)Allocate memory for history buffers.
+ self->binary_far_history =
+ realloc(self->binary_far_history,
+ history_size * sizeof(*self->binary_far_history));
+ self->far_bit_counts = realloc(self->far_bit_counts,
+ history_size * sizeof(*self->far_bit_counts));
+ if ((self->binary_far_history == NULL) || (self->far_bit_counts == NULL)) {
+ history_size = 0;
+ }
+ // Fill with zeros if we have expanded the buffers.
+ if (history_size > self->history_size) {
+ int size_diff = history_size - self->history_size;
+ memset(&self->binary_far_history[self->history_size],
+ 0,
+ sizeof(*self->binary_far_history) * size_diff);
+ memset(&self->far_bit_counts[self->history_size],
+ 0,
+ sizeof(*self->far_bit_counts) * size_diff);
+ }
+ self->history_size = history_size;
+
+ return self->history_size;
+}
+
+void WebRtc_InitBinaryDelayEstimatorFarend(BinaryDelayEstimatorFarend* self) {
+ assert(self != NULL);
+ memset(self->binary_far_history, 0, sizeof(uint32_t) * self->history_size);
+ memset(self->far_bit_counts, 0, sizeof(int) * self->history_size);
+}
+
+void WebRtc_SoftResetBinaryDelayEstimatorFarend(
+ BinaryDelayEstimatorFarend* self, int delay_shift) {
+ int abs_shift = abs(delay_shift);
+ int shift_size = 0;
+ int dest_index = 0;
+ int src_index = 0;
+ int padding_index = 0;
+
+ assert(self != NULL);
+ shift_size = self->history_size - abs_shift;
+ assert(shift_size > 0);
+ if (delay_shift == 0) {
+ return;
+ } else if (delay_shift > 0) {
+ dest_index = abs_shift;
+ } else if (delay_shift < 0) {
+ src_index = abs_shift;
+ padding_index = shift_size;
+ }
+
+ // Shift and zero pad buffers.
+ memmove(&self->binary_far_history[dest_index],
+ &self->binary_far_history[src_index],
+ sizeof(*self->binary_far_history) * shift_size);
+ memset(&self->binary_far_history[padding_index], 0,
+ sizeof(*self->binary_far_history) * abs_shift);
+ memmove(&self->far_bit_counts[dest_index],
+ &self->far_bit_counts[src_index],
+ sizeof(*self->far_bit_counts) * shift_size);
+ memset(&self->far_bit_counts[padding_index], 0,
+ sizeof(*self->far_bit_counts) * abs_shift);
+}
+
+void WebRtc_AddBinaryFarSpectrum(BinaryDelayEstimatorFarend* handle,
+ uint32_t binary_far_spectrum) {
+ assert(handle != NULL);
+ // Shift binary spectrum history and insert current |binary_far_spectrum|.
+ memmove(&(handle->binary_far_history[1]), &(handle->binary_far_history[0]),
+ (handle->history_size - 1) * sizeof(uint32_t));
+ handle->binary_far_history[0] = binary_far_spectrum;
+
+ // Shift history of far-end binary spectrum bit counts and insert bit count
+ // of current |binary_far_spectrum|.
+ memmove(&(handle->far_bit_counts[1]), &(handle->far_bit_counts[0]),
+ (handle->history_size - 1) * sizeof(int));
+ handle->far_bit_counts[0] = BitCount(binary_far_spectrum);
+}
+
+void WebRtc_FreeBinaryDelayEstimator(BinaryDelayEstimator* self) {
+
+ if (self == NULL) {
+ return;
+ }
+
+ free(self->mean_bit_counts);
+ self->mean_bit_counts = NULL;
+
+ free(self->bit_counts);
+ self->bit_counts = NULL;
+
+ free(self->binary_near_history);
+ self->binary_near_history = NULL;
+
+ free(self->histogram);
+ self->histogram = NULL;
+
+ // BinaryDelayEstimator does not have ownership of |farend|, hence we do not
+ // free the memory here. That should be handled separately by the user.
+ self->farend = NULL;
+
+ free(self);
+}
+
+BinaryDelayEstimator* WebRtc_CreateBinaryDelayEstimator(
+ BinaryDelayEstimatorFarend* farend, int max_lookahead) {
+ BinaryDelayEstimator* self = NULL;
+
+ if ((farend != NULL) && (max_lookahead >= 0)) {
+ // Sanity conditions fulfilled.
+ self = malloc(sizeof(BinaryDelayEstimator));
+ }
+ if (self == NULL) {
+ return NULL;
+ }
+
+ self->farend = farend;
+ self->near_history_size = max_lookahead + 1;
+ self->history_size = 0;
+ self->robust_validation_enabled = 0; // Disabled by default.
+ self->allowed_offset = 0;
+
+ self->lookahead = max_lookahead;
+
+ // Allocate memory for spectrum and history buffers.
+ self->mean_bit_counts = NULL;
+ self->bit_counts = NULL;
+ self->histogram = NULL;
+ self->binary_near_history =
+ malloc((max_lookahead + 1) * sizeof(*self->binary_near_history));
+ if (self->binary_near_history == NULL ||
+ WebRtc_AllocateHistoryBufferMemory(self, farend->history_size) == 0) {
+ WebRtc_FreeBinaryDelayEstimator(self);
+ self = NULL;
+ }
+
+ return self;
+}
+
+int WebRtc_AllocateHistoryBufferMemory(BinaryDelayEstimator* self,
+ int history_size) {
+ BinaryDelayEstimatorFarend* far = self->farend;
+ // (Re-)Allocate memory for spectrum and history buffers.
+ if (history_size != far->history_size) {
+ // Only update far-end buffers if we need.
+ history_size = WebRtc_AllocateFarendBufferMemory(far, history_size);
+ }
+ // The extra array element in |mean_bit_counts| and |histogram| is a dummy
+ // element only used while |last_delay| == -2, i.e., before we have a valid
+ // estimate.
+ self->mean_bit_counts =
+ realloc(self->mean_bit_counts,
+ (history_size + 1) * sizeof(*self->mean_bit_counts));
+ self->bit_counts =
+ realloc(self->bit_counts, history_size * sizeof(*self->bit_counts));
+ self->histogram =
+ realloc(self->histogram, (history_size + 1) * sizeof(*self->histogram));
+
+ if ((self->mean_bit_counts == NULL) ||
+ (self->bit_counts == NULL) ||
+ (self->histogram == NULL)) {
+ history_size = 0;
+ }
+ // Fill with zeros if we have expanded the buffers.
+ if (history_size > self->history_size) {
+ int size_diff = history_size - self->history_size;
+ memset(&self->mean_bit_counts[self->history_size],
+ 0,
+ sizeof(*self->mean_bit_counts) * size_diff);
+ memset(&self->bit_counts[self->history_size],
+ 0,
+ sizeof(*self->bit_counts) * size_diff);
+ memset(&self->histogram[self->history_size],
+ 0,
+ sizeof(*self->histogram) * size_diff);
+ }
+ self->history_size = history_size;
+
+ return self->history_size;
+}
+
+void WebRtc_InitBinaryDelayEstimator(BinaryDelayEstimator* self) {
+ int i = 0;
+ assert(self != NULL);
+
+ memset(self->bit_counts, 0, sizeof(int32_t) * self->history_size);
+ memset(self->binary_near_history,
+ 0,
+ sizeof(uint32_t) * self->near_history_size);
+ for (i = 0; i <= self->history_size; ++i) {
+ self->mean_bit_counts[i] = (20 << 9); // 20 in Q9.
+ self->histogram[i] = 0.f;
+ }
+ self->minimum_probability = kMaxBitCountsQ9; // 32 in Q9.
+ self->last_delay_probability = (int) kMaxBitCountsQ9; // 32 in Q9.
+
+ // Default return value if we're unable to estimate. -1 is used for errors.
+ self->last_delay = -2;
+
+ self->last_candidate_delay = -2;
+ self->compare_delay = self->history_size;
+ self->candidate_hits = 0;
+ self->last_delay_histogram = 0.f;
+}
+
+int WebRtc_SoftResetBinaryDelayEstimator(BinaryDelayEstimator* self,
+ int delay_shift) {
+ int lookahead = 0;
+ assert(self != NULL);
+ lookahead = self->lookahead;
+ self->lookahead -= delay_shift;
+ if (self->lookahead < 0) {
+ self->lookahead = 0;
+ }
+ if (self->lookahead > self->near_history_size - 1) {
+ self->lookahead = self->near_history_size - 1;
+ }
+ return lookahead - self->lookahead;
+}
+
+int WebRtc_ProcessBinarySpectrum(BinaryDelayEstimator* self,
+ uint32_t binary_near_spectrum) {
+ int i = 0;
+ int candidate_delay = -1;
+ int valid_candidate = 0;
+
+ int32_t value_best_candidate = kMaxBitCountsQ9;
+ int32_t value_worst_candidate = 0;
+ int32_t valley_depth = 0;
+
+ assert(self != NULL);
+ if (self->farend->history_size != self->history_size) {
+ // Non matching history sizes.
+ return -1;
+ }
+ if (self->near_history_size > 1) {
+ // If we apply lookahead, shift near-end binary spectrum history. Insert
+ // current |binary_near_spectrum| and pull out the delayed one.
+ memmove(&(self->binary_near_history[1]), &(self->binary_near_history[0]),
+ (self->near_history_size - 1) * sizeof(uint32_t));
+ self->binary_near_history[0] = binary_near_spectrum;
+ binary_near_spectrum = self->binary_near_history[self->lookahead];
+ }
+
+ // Compare with delayed spectra and store the |bit_counts| for each delay.
+ BitCountComparison(binary_near_spectrum, self->farend->binary_far_history,
+ self->history_size, self->bit_counts);
+
+ // Update |mean_bit_counts|, which is the smoothed version of |bit_counts|.
+ for (i = 0; i < self->history_size; i++) {
+ // |bit_counts| is constrained to [0, 32], meaning we can smooth with a
+ // factor up to 2^26. We use Q9.
+ int32_t bit_count = (self->bit_counts[i] << 9); // Q9.
+
+ // Update |mean_bit_counts| only when far-end signal has something to
+ // contribute. If |far_bit_counts| is zero the far-end signal is weak and
+ // we likely have a poor echo condition, hence don't update.
+ if (self->farend->far_bit_counts[i] > 0) {
+ // Make number of right shifts piecewise linear w.r.t. |far_bit_counts|.
+ int shifts = kShiftsAtZero;
+ shifts -= (kShiftsLinearSlope * self->farend->far_bit_counts[i]) >> 4;
+ WebRtc_MeanEstimatorFix(bit_count, shifts, &(self->mean_bit_counts[i]));
+ }
+ }
+
+ // Find |candidate_delay|, |value_best_candidate| and |value_worst_candidate|
+ // of |mean_bit_counts|.
+ for (i = 0; i < self->history_size; i++) {
+ if (self->mean_bit_counts[i] < value_best_candidate) {
+ value_best_candidate = self->mean_bit_counts[i];
+ candidate_delay = i;
+ }
+ if (self->mean_bit_counts[i] > value_worst_candidate) {
+ value_worst_candidate = self->mean_bit_counts[i];
+ }
+ }
+ valley_depth = value_worst_candidate - value_best_candidate;
+
+ // The |value_best_candidate| is a good indicator on the probability of
+ // |candidate_delay| being an accurate delay (a small |value_best_candidate|
+ // means a good binary match). In the following sections we make a decision
+ // whether to update |last_delay| or not.
+ // 1) If the difference bit counts between the best and the worst delay
+ // candidates is too small we consider the situation to be unreliable and
+ // don't update |last_delay|.
+ // 2) If the situation is reliable we update |last_delay| if the value of the
+ // best candidate delay has a value less than
+ // i) an adaptive threshold |minimum_probability|, or
+ // ii) this corresponding value |last_delay_probability|, but updated at
+ // this time instant.
+
+ // Update |minimum_probability|.
+ if ((self->minimum_probability > kProbabilityLowerLimit) &&
+ (valley_depth > kProbabilityMinSpread)) {
+ // The "hard" threshold can't be lower than 17 (in Q9).
+ // The valley in the curve also has to be distinct, i.e., the
+ // difference between |value_worst_candidate| and |value_best_candidate| has
+ // to be large enough.
+ int32_t threshold = value_best_candidate + kProbabilityOffset;
+ if (threshold < kProbabilityLowerLimit) {
+ threshold = kProbabilityLowerLimit;
+ }
+ if (self->minimum_probability > threshold) {
+ self->minimum_probability = threshold;
+ }
+ }
+ // Update |last_delay_probability|.
+ // We use a Markov type model, i.e., a slowly increasing level over time.
+ self->last_delay_probability++;
+ // Validate |candidate_delay|. We have a reliable instantaneous delay
+ // estimate if
+ // 1) The valley is distinct enough (|valley_depth| > |kProbabilityOffset|)
+ // and
+ // 2) The depth of the valley is deep enough
+ // (|value_best_candidate| < |minimum_probability|)
+ // and deeper than the best estimate so far
+ // (|value_best_candidate| < |last_delay_probability|)
+ valid_candidate = ((valley_depth > kProbabilityOffset) &&
+ ((value_best_candidate < self->minimum_probability) ||
+ (value_best_candidate < self->last_delay_probability)));
+
+ UpdateRobustValidationStatistics(self, candidate_delay, valley_depth,
+ value_best_candidate);
+ if (self->robust_validation_enabled) {
+ int is_histogram_valid = HistogramBasedValidation(self, candidate_delay);
+ valid_candidate = RobustValidation(self, candidate_delay, valid_candidate,
+ is_histogram_valid);
+
+ }
+ if (valid_candidate) {
+ if (candidate_delay != self->last_delay) {
+ self->last_delay_histogram =
+ (self->histogram[candidate_delay] > kLastHistogramMax ?
+ kLastHistogramMax : self->histogram[candidate_delay]);
+ // Adjust the histogram if we made a change to |last_delay|, though it was
+ // not the most likely one according to the histogram.
+ if (self->histogram[candidate_delay] <
+ self->histogram[self->compare_delay]) {
+ self->histogram[self->compare_delay] = self->histogram[candidate_delay];
+ }
+ }
+ self->last_delay = candidate_delay;
+ if (value_best_candidate < self->last_delay_probability) {
+ self->last_delay_probability = value_best_candidate;
+ }
+ self->compare_delay = self->last_delay;
+ }
+
+ return self->last_delay;
+}
+
+int WebRtc_binary_last_delay(BinaryDelayEstimator* self) {
+ assert(self != NULL);
+ return self->last_delay;
+}
+
+float WebRtc_binary_last_delay_quality(BinaryDelayEstimator* self) {
+ float quality = 0;
+ assert(self != NULL);
+
+ if (self->robust_validation_enabled) {
+ // Simply a linear function of the histogram height at delay estimate.
+ quality = self->histogram[self->compare_delay] / kHistogramMax;
+ } else {
+ // Note that |last_delay_probability| states how deep the minimum of the
+ // cost function is, so it is rather an error probability.
+ quality = (float) (kMaxBitCountsQ9 - self->last_delay_probability) /
+ kMaxBitCountsQ9;
+ if (quality < 0) {
+ quality = 0;
+ }
+ }
+ return quality;
+}
+
+void WebRtc_MeanEstimatorFix(int32_t new_value,
+ int factor,
+ int32_t* mean_value) {
+ int32_t diff = new_value - *mean_value;
+
+ // mean_new = mean_value + ((new_value - mean_value) >> factor);
+ if (diff < 0) {
+ diff = -((-diff) >> factor);
+ } else {
+ diff = (diff >> factor);
+ }
+ *mean_value += diff;
+}
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator.h b/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator.h
new file mode 100644
index 00000000..65c3f034
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator.h
@@ -0,0 +1,251 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+// Performs delay estimation on binary converted spectra.
+// The return value is 0 - OK and -1 - Error, unless otherwise stated.
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_H_
+
+#include "webrtc/typedefs.h"
+
+static const int32_t kMaxBitCountsQ9 = (32 << 9); // 32 matching bits in Q9.
+
+typedef struct {
+ // Pointer to bit counts.
+ int* far_bit_counts;
+ // Binary history variables.
+ uint32_t* binary_far_history;
+ int history_size;
+} BinaryDelayEstimatorFarend;
+
+typedef struct {
+ // Pointer to bit counts.
+ int32_t* mean_bit_counts;
+ // Array only used locally in ProcessBinarySpectrum() but whose size is
+ // determined at run-time.
+ int32_t* bit_counts;
+
+ // Binary history variables.
+ uint32_t* binary_near_history;
+ int near_history_size;
+ int history_size;
+
+ // Delay estimation variables.
+ int32_t minimum_probability;
+ int last_delay_probability;
+
+ // Delay memory.
+ int last_delay;
+
+ // Robust validation
+ int robust_validation_enabled;
+ int allowed_offset;
+ int last_candidate_delay;
+ int compare_delay;
+ int candidate_hits;
+ float* histogram;
+ float last_delay_histogram;
+
+ // For dynamically changing the lookahead when using SoftReset...().
+ int lookahead;
+
+ // Far-end binary spectrum history buffer etc.
+ BinaryDelayEstimatorFarend* farend;
+} BinaryDelayEstimator;
+
+// Releases the memory allocated by
+// WebRtc_CreateBinaryDelayEstimatorFarend(...).
+// Input:
+// - self : Pointer to the binary delay estimation far-end
+// instance which is the return value of
+// WebRtc_CreateBinaryDelayEstimatorFarend().
+//
+void WebRtc_FreeBinaryDelayEstimatorFarend(BinaryDelayEstimatorFarend* self);
+
+// Allocates the memory needed by the far-end part of the binary delay
+// estimation. The memory needs to be initialized separately through
+// WebRtc_InitBinaryDelayEstimatorFarend(...).
+//
+// Inputs:
+// - history_size : Size of the far-end binary spectrum history.
+//
+// Return value:
+// - BinaryDelayEstimatorFarend*
+// : Created |handle|. If the memory can't be allocated
+// or if any of the input parameters are invalid NULL
+// is returned.
+//
+BinaryDelayEstimatorFarend* WebRtc_CreateBinaryDelayEstimatorFarend(
+ int history_size);
+
+// Re-allocates the buffers.
+//
+// Inputs:
+// - self : Pointer to the binary estimation far-end instance
+// which is the return value of
+// WebRtc_CreateBinaryDelayEstimatorFarend().
+// - history_size : Size of the far-end binary spectrum history.
+//
+// Return value:
+// - history_size : The history size allocated.
+int WebRtc_AllocateFarendBufferMemory(BinaryDelayEstimatorFarend* self,
+ int history_size);
+
+// Initializes the delay estimation far-end instance created with
+// WebRtc_CreateBinaryDelayEstimatorFarend(...).
+//
+// Input:
+// - self : Pointer to the delay estimation far-end instance.
+//
+// Output:
+// - self : Initialized far-end instance.
+//
+void WebRtc_InitBinaryDelayEstimatorFarend(BinaryDelayEstimatorFarend* self);
+
+// Soft resets the delay estimation far-end instance created with
+// WebRtc_CreateBinaryDelayEstimatorFarend(...).
+//
+// Input:
+// - delay_shift : The amount of blocks to shift history buffers.
+//
+void WebRtc_SoftResetBinaryDelayEstimatorFarend(
+ BinaryDelayEstimatorFarend* self, int delay_shift);
+
+// Adds the binary far-end spectrum to the internal far-end history buffer. This
+// spectrum is used as reference when calculating the delay using
+// WebRtc_ProcessBinarySpectrum().
+//
+// Inputs:
+// - self : Pointer to the delay estimation far-end
+// instance.
+// - binary_far_spectrum : Far-end binary spectrum.
+//
+// Output:
+// - self : Updated far-end instance.
+//
+void WebRtc_AddBinaryFarSpectrum(BinaryDelayEstimatorFarend* self,
+ uint32_t binary_far_spectrum);
+
+// Releases the memory allocated by WebRtc_CreateBinaryDelayEstimator(...).
+//
+// Note that BinaryDelayEstimator utilizes BinaryDelayEstimatorFarend, but does
+// not take ownership of it, hence the BinaryDelayEstimator has to be torn down
+// before the far-end.
+//
+// Input:
+// - self : Pointer to the binary delay estimation instance
+// which is the return value of
+// WebRtc_CreateBinaryDelayEstimator().
+//
+void WebRtc_FreeBinaryDelayEstimator(BinaryDelayEstimator* self);
+
+// Allocates the memory needed by the binary delay estimation. The memory needs
+// to be initialized separately through WebRtc_InitBinaryDelayEstimator(...).
+//
+// See WebRtc_CreateDelayEstimator(..) in delay_estimator_wrapper.c for detailed
+// description.
+BinaryDelayEstimator* WebRtc_CreateBinaryDelayEstimator(
+ BinaryDelayEstimatorFarend* farend, int max_lookahead);
+
+// Re-allocates |history_size| dependent buffers. The far-end buffers will be
+// updated at the same time if needed.
+//
+// Input:
+// - self : Pointer to the binary estimation instance which is
+// the return value of
+// WebRtc_CreateBinaryDelayEstimator().
+// - history_size : Size of the history buffers.
+//
+// Return value:
+// - history_size : The history size allocated.
+int WebRtc_AllocateHistoryBufferMemory(BinaryDelayEstimator* self,
+ int history_size);
+
+// Initializes the delay estimation instance created with
+// WebRtc_CreateBinaryDelayEstimator(...).
+//
+// Input:
+// - self : Pointer to the delay estimation instance.
+//
+// Output:
+// - self : Initialized instance.
+//
+void WebRtc_InitBinaryDelayEstimator(BinaryDelayEstimator* self);
+
+// Soft resets the delay estimation instance created with
+// WebRtc_CreateBinaryDelayEstimator(...).
+//
+// Input:
+// - delay_shift : The amount of blocks to shift history buffers.
+//
+// Return value:
+// - actual_shifts : The actual number of shifts performed.
+//
+int WebRtc_SoftResetBinaryDelayEstimator(BinaryDelayEstimator* self,
+ int delay_shift);
+
+// Estimates and returns the delay between the binary far-end and binary near-
+// end spectra. It is assumed the binary far-end spectrum has been added using
+// WebRtc_AddBinaryFarSpectrum() prior to this call. The value will be offset by
+// the lookahead (i.e. the lookahead should be subtracted from the returned
+// value).
+//
+// Inputs:
+// - self : Pointer to the delay estimation instance.
+// - binary_near_spectrum : Near-end binary spectrum of the current block.
+//
+// Output:
+// - self : Updated instance.
+//
+// Return value:
+// - delay : >= 0 - Calculated delay value.
+// -2 - Insufficient data for estimation.
+//
+int WebRtc_ProcessBinarySpectrum(BinaryDelayEstimator* self,
+ uint32_t binary_near_spectrum);
+
+// Returns the last calculated delay updated by the function
+// WebRtc_ProcessBinarySpectrum(...).
+//
+// Input:
+// - self : Pointer to the delay estimation instance.
+//
+// Return value:
+// - delay : >= 0 - Last calculated delay value
+// -2 - Insufficient data for estimation.
+//
+int WebRtc_binary_last_delay(BinaryDelayEstimator* self);
+
+// Returns the estimation quality of the last calculated delay updated by the
+// function WebRtc_ProcessBinarySpectrum(...). The estimation quality is a value
+// in the interval [0, 1]. The higher the value, the better the quality.
+//
+// Return value:
+// - delay_quality : >= 0 - Estimation quality of last calculated
+// delay value.
+float WebRtc_binary_last_delay_quality(BinaryDelayEstimator* self);
+
+// Updates the |mean_value| recursively with a step size of 2^-|factor|. This
+// function is used internally in the Binary Delay Estimator as well as the
+// Fixed point wrapper.
+//
+// Inputs:
+// - new_value : The new value the mean should be updated with.
+// - factor : The step size, in number of right shifts.
+//
+// Input/Output:
+// - mean_value : Pointer to the mean value.
+//
+void WebRtc_MeanEstimatorFix(int32_t new_value,
+ int factor,
+ int32_t* mean_value);
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_H_
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_internal.h b/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_internal.h
new file mode 100644
index 00000000..fd11028f
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_internal.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+// Header file including the delay estimator handle used for testing.
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_INTERNAL_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_INTERNAL_H_
+
+#include "webrtc/modules/audio_processing/utility/delay_estimator.h"
+#include "webrtc/typedefs.h"
+
+typedef union {
+ float float_;
+ int32_t int32_;
+} SpectrumType;
+
+typedef struct {
+ // Pointers to mean values of spectrum.
+ SpectrumType* mean_far_spectrum;
+ // |mean_far_spectrum| initialization indicator.
+ int far_spectrum_initialized;
+
+ int spectrum_size;
+
+ // Far-end part of binary spectrum based delay estimation.
+ BinaryDelayEstimatorFarend* binary_farend;
+} DelayEstimatorFarend;
+
+typedef struct {
+ // Pointers to mean values of spectrum.
+ SpectrumType* mean_near_spectrum;
+ // |mean_near_spectrum| initialization indicator.
+ int near_spectrum_initialized;
+
+ int spectrum_size;
+
+ // Binary spectrum based delay estimator
+ BinaryDelayEstimator* binary_handle;
+} DelayEstimator;
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_INTERNAL_H_
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_unittest.cc b/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_unittest.cc
new file mode 100644
index 00000000..4ebe0e61
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_unittest.cc
@@ -0,0 +1,621 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+extern "C" {
+#include "webrtc/modules/audio_processing/utility/delay_estimator.h"
+#include "webrtc/modules/audio_processing/utility/delay_estimator_internal.h"
+#include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h"
+}
+#include "webrtc/typedefs.h"
+
+namespace {
+
+enum { kSpectrumSize = 65 };
+// Delay history sizes.
+enum { kMaxDelay = 100 };
+enum { kLookahead = 10 };
+enum { kHistorySize = kMaxDelay + kLookahead };
+// Length of binary spectrum sequence.
+enum { kSequenceLength = 400 };
+
+const int kDifferentHistorySize = 3;
+const int kDifferentLookahead = 1;
+
+const int kEnable[] = { 0, 1 };
+const size_t kSizeEnable = sizeof(kEnable) / sizeof(*kEnable);
+
+class DelayEstimatorTest : public ::testing::Test {
+ protected:
+ DelayEstimatorTest();
+ virtual void SetUp();
+ virtual void TearDown();
+
+ void Init();
+ void InitBinary();
+ void VerifyDelay(BinaryDelayEstimator* binary_handle, int offset, int delay);
+ void RunBinarySpectra(BinaryDelayEstimator* binary1,
+ BinaryDelayEstimator* binary2,
+ int near_offset, int lookahead_offset, int far_offset);
+ void RunBinarySpectraTest(int near_offset, int lookahead_offset,
+ int ref_robust_validation, int robust_validation);
+
+ void* handle_;
+ DelayEstimator* self_;
+ void* farend_handle_;
+ DelayEstimatorFarend* farend_self_;
+ BinaryDelayEstimator* binary_;
+ BinaryDelayEstimatorFarend* binary_farend_;
+ int spectrum_size_;
+ // Dummy input spectra.
+ float far_f_[kSpectrumSize];
+ float near_f_[kSpectrumSize];
+ uint16_t far_u16_[kSpectrumSize];
+ uint16_t near_u16_[kSpectrumSize];
+ uint32_t binary_spectrum_[kSequenceLength + kHistorySize];
+};
+
+DelayEstimatorTest::DelayEstimatorTest()
+ : handle_(NULL),
+ self_(NULL),
+ farend_handle_(NULL),
+ farend_self_(NULL),
+ binary_(NULL),
+ binary_farend_(NULL),
+ spectrum_size_(kSpectrumSize) {
+ // Dummy input data are set with more or less arbitrary non-zero values.
+ memset(far_f_, 1, sizeof(far_f_));
+ memset(near_f_, 2, sizeof(near_f_));
+ memset(far_u16_, 1, sizeof(far_u16_));
+ memset(near_u16_, 2, sizeof(near_u16_));
+ // Construct a sequence of binary spectra used to verify delay estimate. The
+ // |kSequenceLength| has to be long enough for the delay estimation to leave
+ // the initialized state.
+ binary_spectrum_[0] = 1;
+ for (int i = 1; i < (kSequenceLength + kHistorySize); i++) {
+ binary_spectrum_[i] = 3 * binary_spectrum_[i - 1];
+ }
+}
+
+void DelayEstimatorTest::SetUp() {
+ farend_handle_ = WebRtc_CreateDelayEstimatorFarend(kSpectrumSize,
+ kHistorySize);
+ ASSERT_TRUE(farend_handle_ != NULL);
+ farend_self_ = reinterpret_cast<DelayEstimatorFarend*>(farend_handle_);
+ handle_ = WebRtc_CreateDelayEstimator(farend_handle_, kLookahead);
+ ASSERT_TRUE(handle_ != NULL);
+ self_ = reinterpret_cast<DelayEstimator*>(handle_);
+ binary_farend_ = WebRtc_CreateBinaryDelayEstimatorFarend(kHistorySize);
+ ASSERT_TRUE(binary_farend_ != NULL);
+ binary_ = WebRtc_CreateBinaryDelayEstimator(binary_farend_, kLookahead);
+ ASSERT_TRUE(binary_ != NULL);
+}
+
+void DelayEstimatorTest::TearDown() {
+ WebRtc_FreeDelayEstimator(handle_);
+ handle_ = NULL;
+ self_ = NULL;
+ WebRtc_FreeDelayEstimatorFarend(farend_handle_);
+ farend_handle_ = NULL;
+ farend_self_ = NULL;
+ WebRtc_FreeBinaryDelayEstimator(binary_);
+ binary_ = NULL;
+ WebRtc_FreeBinaryDelayEstimatorFarend(binary_farend_);
+ binary_farend_ = NULL;
+}
+
+void DelayEstimatorTest::Init() {
+ // Initialize Delay Estimator
+ EXPECT_EQ(0, WebRtc_InitDelayEstimatorFarend(farend_handle_));
+ EXPECT_EQ(0, WebRtc_InitDelayEstimator(handle_));
+ // Verify initialization.
+ EXPECT_EQ(0, farend_self_->far_spectrum_initialized);
+ EXPECT_EQ(0, self_->near_spectrum_initialized);
+ EXPECT_EQ(-2, WebRtc_last_delay(handle_)); // Delay in initial state.
+ EXPECT_FLOAT_EQ(0, WebRtc_last_delay_quality(handle_)); // Zero quality.
+}
+
+void DelayEstimatorTest::InitBinary() {
+ // Initialize Binary Delay Estimator (far-end part).
+ WebRtc_InitBinaryDelayEstimatorFarend(binary_farend_);
+ // Initialize Binary Delay Estimator
+ WebRtc_InitBinaryDelayEstimator(binary_);
+ // Verify initialization. This does not guarantee a complete check, since
+ // |last_delay| may be equal to -2 before initialization if done on the fly.
+ EXPECT_EQ(-2, binary_->last_delay);
+}
+
+void DelayEstimatorTest::VerifyDelay(BinaryDelayEstimator* binary_handle,
+ int offset, int delay) {
+ // Verify that we WebRtc_binary_last_delay() returns correct delay.
+ EXPECT_EQ(delay, WebRtc_binary_last_delay(binary_handle));
+
+ if (delay != -2) {
+ // Verify correct delay estimate. In the non-causal case the true delay
+ // is equivalent with the |offset|.
+ EXPECT_EQ(offset, delay);
+ }
+}
+
+void DelayEstimatorTest::RunBinarySpectra(BinaryDelayEstimator* binary1,
+ BinaryDelayEstimator* binary2,
+ int near_offset,
+ int lookahead_offset,
+ int far_offset) {
+ int different_validations = binary1->robust_validation_enabled ^
+ binary2->robust_validation_enabled;
+ WebRtc_InitBinaryDelayEstimatorFarend(binary_farend_);
+ WebRtc_InitBinaryDelayEstimator(binary1);
+ WebRtc_InitBinaryDelayEstimator(binary2);
+ // Verify initialization. This does not guarantee a complete check, since
+ // |last_delay| may be equal to -2 before initialization if done on the fly.
+ EXPECT_EQ(-2, binary1->last_delay);
+ EXPECT_EQ(-2, binary2->last_delay);
+ for (int i = kLookahead; i < (kSequenceLength + kLookahead); i++) {
+ WebRtc_AddBinaryFarSpectrum(binary_farend_,
+ binary_spectrum_[i + far_offset]);
+ int delay_1 = WebRtc_ProcessBinarySpectrum(binary1, binary_spectrum_[i]);
+ int delay_2 =
+ WebRtc_ProcessBinarySpectrum(binary2,
+ binary_spectrum_[i - near_offset]);
+
+ VerifyDelay(binary1, far_offset + kLookahead, delay_1);
+ VerifyDelay(binary2,
+ far_offset + kLookahead + lookahead_offset + near_offset,
+ delay_2);
+ // Expect the two delay estimates to be offset by |lookahead_offset| +
+ // |near_offset| when we have left the initial state.
+ if ((delay_1 != -2) && (delay_2 != -2)) {
+ EXPECT_EQ(delay_1, delay_2 - lookahead_offset - near_offset);
+ }
+ // For the case of identical signals |delay_1| and |delay_2| should match
+ // all the time, unless one of them has robust validation turned on. In
+ // that case the robust validation leaves the initial state faster.
+ if ((near_offset == 0) && (lookahead_offset == 0)) {
+ if (!different_validations) {
+ EXPECT_EQ(delay_1, delay_2);
+ } else {
+ if (binary1->robust_validation_enabled) {
+ EXPECT_GE(delay_1, delay_2);
+ } else {
+ EXPECT_GE(delay_2, delay_1);
+ }
+ }
+ }
+ }
+ // Verify that we have left the initialized state.
+ EXPECT_NE(-2, WebRtc_binary_last_delay(binary1));
+ EXPECT_LT(0, WebRtc_binary_last_delay_quality(binary1));
+ EXPECT_NE(-2, WebRtc_binary_last_delay(binary2));
+ EXPECT_LT(0, WebRtc_binary_last_delay_quality(binary2));
+}
+
+void DelayEstimatorTest::RunBinarySpectraTest(int near_offset,
+ int lookahead_offset,
+ int ref_robust_validation,
+ int robust_validation) {
+ BinaryDelayEstimator* binary2 =
+ WebRtc_CreateBinaryDelayEstimator(binary_farend_,
+ kLookahead + lookahead_offset);
+ // Verify the delay for both causal and non-causal systems. For causal systems
+ // the delay is equivalent with a positive |offset| of the far-end sequence.
+ // For non-causal systems the delay is equivalent with a negative |offset| of
+ // the far-end sequence.
+ binary_->robust_validation_enabled = ref_robust_validation;
+ binary2->robust_validation_enabled = robust_validation;
+ for (int offset = -kLookahead;
+ offset < kMaxDelay - lookahead_offset - near_offset;
+ offset++) {
+ RunBinarySpectra(binary_, binary2, near_offset, lookahead_offset, offset);
+ }
+ WebRtc_FreeBinaryDelayEstimator(binary2);
+ binary2 = NULL;
+ binary_->robust_validation_enabled = 0; // Reset reference.
+}
+
+TEST_F(DelayEstimatorTest, CorrectErrorReturnsOfWrapper) {
+ // In this test we verify correct error returns on invalid API calls.
+
+ // WebRtc_CreateDelayEstimatorFarend() and WebRtc_CreateDelayEstimator()
+ // should return a NULL pointer on invalid input values.
+ // Make sure we have a non-NULL value at start, so we can detect NULL after
+ // create failure.
+ void* handle = farend_handle_;
+ handle = WebRtc_CreateDelayEstimatorFarend(33, kHistorySize);
+ EXPECT_TRUE(handle == NULL);
+ handle = WebRtc_CreateDelayEstimatorFarend(kSpectrumSize, 1);
+ EXPECT_TRUE(handle == NULL);
+
+ handle = handle_;
+ handle = WebRtc_CreateDelayEstimator(NULL, kLookahead);
+ EXPECT_TRUE(handle == NULL);
+ handle = WebRtc_CreateDelayEstimator(farend_handle_, -1);
+ EXPECT_TRUE(handle == NULL);
+
+ // WebRtc_InitDelayEstimatorFarend() and WebRtc_InitDelayEstimator() should
+ // return -1 if we have a NULL pointer as |handle|.
+ EXPECT_EQ(-1, WebRtc_InitDelayEstimatorFarend(NULL));
+ EXPECT_EQ(-1, WebRtc_InitDelayEstimator(NULL));
+
+ // WebRtc_AddFarSpectrumFloat() should return -1 if we have:
+ // 1) NULL pointer as |handle|.
+ // 2) NULL pointer as far-end spectrum.
+ // 3) Incorrect spectrum size.
+ EXPECT_EQ(-1, WebRtc_AddFarSpectrumFloat(NULL, far_f_, spectrum_size_));
+ // Use |farend_handle_| which is properly created at SetUp().
+ EXPECT_EQ(-1, WebRtc_AddFarSpectrumFloat(farend_handle_, NULL,
+ spectrum_size_));
+ EXPECT_EQ(-1, WebRtc_AddFarSpectrumFloat(farend_handle_, far_f_,
+ spectrum_size_ + 1));
+
+ // WebRtc_AddFarSpectrumFix() should return -1 if we have:
+ // 1) NULL pointer as |handle|.
+ // 2) NULL pointer as far-end spectrum.
+ // 3) Incorrect spectrum size.
+ // 4) Too high precision in far-end spectrum (Q-domain > 15).
+ EXPECT_EQ(-1, WebRtc_AddFarSpectrumFix(NULL, far_u16_, spectrum_size_, 0));
+ EXPECT_EQ(-1, WebRtc_AddFarSpectrumFix(farend_handle_, NULL, spectrum_size_,
+ 0));
+ EXPECT_EQ(-1, WebRtc_AddFarSpectrumFix(farend_handle_, far_u16_,
+ spectrum_size_ + 1, 0));
+ EXPECT_EQ(-1, WebRtc_AddFarSpectrumFix(farend_handle_, far_u16_,
+ spectrum_size_, 16));
+
+ // WebRtc_set_history_size() should return -1 if:
+ // 1) |handle| is a NULL.
+ // 2) |history_size| <= 1.
+ EXPECT_EQ(-1, WebRtc_set_history_size(NULL, 1));
+ EXPECT_EQ(-1, WebRtc_set_history_size(handle_, 1));
+ // WebRtc_history_size() should return -1 if:
+ // 1) NULL pointer input.
+ EXPECT_EQ(-1, WebRtc_history_size(NULL));
+ // 2) there is a mismatch between history size.
+ void* tmp_handle = WebRtc_CreateDelayEstimator(farend_handle_, kHistorySize);
+ EXPECT_EQ(0, WebRtc_InitDelayEstimator(tmp_handle));
+ EXPECT_EQ(kDifferentHistorySize,
+ WebRtc_set_history_size(tmp_handle, kDifferentHistorySize));
+ EXPECT_EQ(kDifferentHistorySize, WebRtc_history_size(tmp_handle));
+ EXPECT_EQ(kHistorySize, WebRtc_set_history_size(handle_, kHistorySize));
+ EXPECT_EQ(-1, WebRtc_history_size(tmp_handle));
+
+ // WebRtc_set_lookahead() should return -1 if we try a value outside the
+ /// buffer.
+ EXPECT_EQ(-1, WebRtc_set_lookahead(handle_, kLookahead + 1));
+ EXPECT_EQ(-1, WebRtc_set_lookahead(handle_, -1));
+
+ // WebRtc_set_allowed_offset() should return -1 if we have:
+ // 1) NULL pointer as |handle|.
+ // 2) |allowed_offset| < 0.
+ EXPECT_EQ(-1, WebRtc_set_allowed_offset(NULL, 0));
+ EXPECT_EQ(-1, WebRtc_set_allowed_offset(handle_, -1));
+
+ EXPECT_EQ(-1, WebRtc_get_allowed_offset(NULL));
+
+ // WebRtc_enable_robust_validation() should return -1 if we have:
+ // 1) NULL pointer as |handle|.
+ // 2) Incorrect |enable| value (not 0 or 1).
+ EXPECT_EQ(-1, WebRtc_enable_robust_validation(NULL, kEnable[0]));
+ EXPECT_EQ(-1, WebRtc_enable_robust_validation(handle_, -1));
+ EXPECT_EQ(-1, WebRtc_enable_robust_validation(handle_, 2));
+
+ // WebRtc_is_robust_validation_enabled() should return -1 if we have NULL
+ // pointer as |handle|.
+ EXPECT_EQ(-1, WebRtc_is_robust_validation_enabled(NULL));
+
+ // WebRtc_DelayEstimatorProcessFloat() should return -1 if we have:
+ // 1) NULL pointer as |handle|.
+ // 2) NULL pointer as near-end spectrum.
+ // 3) Incorrect spectrum size.
+ // 4) Non matching history sizes if multiple delay estimators using the same
+ // far-end reference.
+ EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFloat(NULL, near_f_,
+ spectrum_size_));
+ // Use |handle_| which is properly created at SetUp().
+ EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFloat(handle_, NULL,
+ spectrum_size_));
+ EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFloat(handle_, near_f_,
+ spectrum_size_ + 1));
+ // |tmp_handle| is already in a non-matching state.
+ EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFloat(tmp_handle,
+ near_f_,
+ spectrum_size_));
+
+ // WebRtc_DelayEstimatorProcessFix() should return -1 if we have:
+ // 1) NULL pointer as |handle|.
+ // 2) NULL pointer as near-end spectrum.
+ // 3) Incorrect spectrum size.
+ // 4) Too high precision in near-end spectrum (Q-domain > 15).
+ // 5) Non matching history sizes if multiple delay estimators using the same
+ // far-end reference.
+ EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFix(NULL, near_u16_, spectrum_size_,
+ 0));
+ EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFix(handle_, NULL, spectrum_size_,
+ 0));
+ EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFix(handle_, near_u16_,
+ spectrum_size_ + 1, 0));
+ EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFix(handle_, near_u16_,
+ spectrum_size_, 16));
+ // |tmp_handle| is already in a non-matching state.
+ EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFix(tmp_handle,
+ near_u16_,
+ spectrum_size_,
+ 0));
+ WebRtc_FreeDelayEstimator(tmp_handle);
+
+ // WebRtc_last_delay() should return -1 if we have a NULL pointer as |handle|.
+ EXPECT_EQ(-1, WebRtc_last_delay(NULL));
+
+ // Free any local memory if needed.
+ WebRtc_FreeDelayEstimator(handle);
+}
+
+TEST_F(DelayEstimatorTest, VerifyAllowedOffset) {
+ // Is set to zero by default.
+ EXPECT_EQ(0, WebRtc_get_allowed_offset(handle_));
+ for (int i = 1; i >= 0; i--) {
+ EXPECT_EQ(0, WebRtc_set_allowed_offset(handle_, i));
+ EXPECT_EQ(i, WebRtc_get_allowed_offset(handle_));
+ Init();
+ // Unaffected over a reset.
+ EXPECT_EQ(i, WebRtc_get_allowed_offset(handle_));
+ }
+}
+
+TEST_F(DelayEstimatorTest, VerifyEnableRobustValidation) {
+ // Disabled by default.
+ EXPECT_EQ(0, WebRtc_is_robust_validation_enabled(handle_));
+ for (size_t i = 0; i < kSizeEnable; ++i) {
+ EXPECT_EQ(0, WebRtc_enable_robust_validation(handle_, kEnable[i]));
+ EXPECT_EQ(kEnable[i], WebRtc_is_robust_validation_enabled(handle_));
+ Init();
+ // Unaffected over a reset.
+ EXPECT_EQ(kEnable[i], WebRtc_is_robust_validation_enabled(handle_));
+ }
+}
+
+TEST_F(DelayEstimatorTest, InitializedSpectrumAfterProcess) {
+ // In this test we verify that the mean spectra are initialized after first
+ // time we call WebRtc_AddFarSpectrum() and Process() respectively. The test
+ // also verifies the state is not left for zero spectra.
+ const float kZerosFloat[kSpectrumSize] = { 0.0 };
+ const uint16_t kZerosU16[kSpectrumSize] = { 0 };
+
+ // For floating point operations, process one frame and verify initialization
+ // flag.
+ Init();
+ EXPECT_EQ(0, WebRtc_AddFarSpectrumFloat(farend_handle_, kZerosFloat,
+ spectrum_size_));
+ EXPECT_EQ(0, farend_self_->far_spectrum_initialized);
+ EXPECT_EQ(0, WebRtc_AddFarSpectrumFloat(farend_handle_, far_f_,
+ spectrum_size_));
+ EXPECT_EQ(1, farend_self_->far_spectrum_initialized);
+ EXPECT_EQ(-2, WebRtc_DelayEstimatorProcessFloat(handle_, kZerosFloat,
+ spectrum_size_));
+ EXPECT_EQ(0, self_->near_spectrum_initialized);
+ EXPECT_EQ(-2, WebRtc_DelayEstimatorProcessFloat(handle_, near_f_,
+ spectrum_size_));
+ EXPECT_EQ(1, self_->near_spectrum_initialized);
+
+ // For fixed point operations, process one frame and verify initialization
+ // flag.
+ Init();
+ EXPECT_EQ(0, WebRtc_AddFarSpectrumFix(farend_handle_, kZerosU16,
+ spectrum_size_, 0));
+ EXPECT_EQ(0, farend_self_->far_spectrum_initialized);
+ EXPECT_EQ(0, WebRtc_AddFarSpectrumFix(farend_handle_, far_u16_,
+ spectrum_size_, 0));
+ EXPECT_EQ(1, farend_self_->far_spectrum_initialized);
+ EXPECT_EQ(-2, WebRtc_DelayEstimatorProcessFix(handle_, kZerosU16,
+ spectrum_size_, 0));
+ EXPECT_EQ(0, self_->near_spectrum_initialized);
+ EXPECT_EQ(-2, WebRtc_DelayEstimatorProcessFix(handle_, near_u16_,
+ spectrum_size_, 0));
+ EXPECT_EQ(1, self_->near_spectrum_initialized);
+}
+
+TEST_F(DelayEstimatorTest, CorrectLastDelay) {
+ // In this test we verify that we get the correct last delay upon valid call.
+ // We simply process the same data until we leave the initialized state
+ // (|last_delay| = -2). Then we compare the Process() output with the
+ // last_delay() call.
+
+ // TODO(bjornv): Update quality values for robust validation.
+ int last_delay = 0;
+ // Floating point operations.
+ Init();
+ for (int i = 0; i < 200; i++) {
+ EXPECT_EQ(0, WebRtc_AddFarSpectrumFloat(farend_handle_, far_f_,
+ spectrum_size_));
+ last_delay = WebRtc_DelayEstimatorProcessFloat(handle_, near_f_,
+ spectrum_size_);
+ if (last_delay != -2) {
+ EXPECT_EQ(last_delay, WebRtc_last_delay(handle_));
+ if (!WebRtc_is_robust_validation_enabled(handle_)) {
+ EXPECT_FLOAT_EQ(7203.f / kMaxBitCountsQ9,
+ WebRtc_last_delay_quality(handle_));
+ }
+ break;
+ }
+ }
+ // Verify that we have left the initialized state.
+ EXPECT_NE(-2, WebRtc_last_delay(handle_));
+ EXPECT_LT(0, WebRtc_last_delay_quality(handle_));
+
+ // Fixed point operations.
+ Init();
+ for (int i = 0; i < 200; i++) {
+ EXPECT_EQ(0, WebRtc_AddFarSpectrumFix(farend_handle_, far_u16_,
+ spectrum_size_, 0));
+ last_delay = WebRtc_DelayEstimatorProcessFix(handle_, near_u16_,
+ spectrum_size_, 0);
+ if (last_delay != -2) {
+ EXPECT_EQ(last_delay, WebRtc_last_delay(handle_));
+ if (!WebRtc_is_robust_validation_enabled(handle_)) {
+ EXPECT_FLOAT_EQ(7203.f / kMaxBitCountsQ9,
+ WebRtc_last_delay_quality(handle_));
+ }
+ break;
+ }
+ }
+ // Verify that we have left the initialized state.
+ EXPECT_NE(-2, WebRtc_last_delay(handle_));
+ EXPECT_LT(0, WebRtc_last_delay_quality(handle_));
+}
+
+TEST_F(DelayEstimatorTest, CorrectErrorReturnsOfBinaryEstimatorFarend) {
+ // In this test we verify correct output on invalid API calls to the Binary
+ // Delay Estimator (far-end part).
+
+ BinaryDelayEstimatorFarend* binary = binary_farend_;
+ // WebRtc_CreateBinaryDelayEstimatorFarend() should return -1 if the input
+ // history size is less than 2. This is to make sure the buffer shifting
+ // applies properly.
+ // Make sure we have a non-NULL value at start, so we can detect NULL after
+ // create failure.
+ binary = WebRtc_CreateBinaryDelayEstimatorFarend(1);
+ EXPECT_TRUE(binary == NULL);
+}
+
+TEST_F(DelayEstimatorTest, CorrectErrorReturnsOfBinaryEstimator) {
+ // In this test we verify correct output on invalid API calls to the Binary
+ // Delay Estimator.
+
+ BinaryDelayEstimator* binary_handle = binary_;
+ // WebRtc_CreateBinaryDelayEstimator() should return -1 if we have a NULL
+ // pointer as |binary_farend| or invalid input values. Upon failure, the
+ // |binary_handle| should be NULL.
+ // Make sure we have a non-NULL value at start, so we can detect NULL after
+ // create failure.
+ binary_handle = WebRtc_CreateBinaryDelayEstimator(NULL, kLookahead);
+ EXPECT_TRUE(binary_handle == NULL);
+ binary_handle = WebRtc_CreateBinaryDelayEstimator(binary_farend_, -1);
+ EXPECT_TRUE(binary_handle == NULL);
+}
+
+TEST_F(DelayEstimatorTest, MeanEstimatorFix) {
+ // In this test we verify that we update the mean value in correct direction
+ // only. With "direction" we mean increase or decrease.
+
+ int32_t mean_value = 4000;
+ int32_t mean_value_before = mean_value;
+ int32_t new_mean_value = mean_value * 2;
+
+ // Increasing |mean_value|.
+ WebRtc_MeanEstimatorFix(new_mean_value, 10, &mean_value);
+ EXPECT_LT(mean_value_before, mean_value);
+ EXPECT_GT(new_mean_value, mean_value);
+
+ // Decreasing |mean_value|.
+ new_mean_value = mean_value / 2;
+ mean_value_before = mean_value;
+ WebRtc_MeanEstimatorFix(new_mean_value, 10, &mean_value);
+ EXPECT_GT(mean_value_before, mean_value);
+ EXPECT_LT(new_mean_value, mean_value);
+}
+
+TEST_F(DelayEstimatorTest, ExactDelayEstimateMultipleNearSameSpectrum) {
+ // In this test we verify that we get the correct delay estimates if we shift
+ // the signal accordingly. We create two Binary Delay Estimators and feed them
+ // with the same signals, so they should output the same results.
+ // We verify both causal and non-causal delays.
+ // For these noise free signals, the robust validation should not have an
+ // impact, hence we turn robust validation on/off for both reference and
+ // delayed near end.
+
+ for (size_t i = 0; i < kSizeEnable; ++i) {
+ for (size_t j = 0; j < kSizeEnable; ++j) {
+ RunBinarySpectraTest(0, 0, kEnable[i], kEnable[j]);
+ }
+ }
+}
+
+TEST_F(DelayEstimatorTest, ExactDelayEstimateMultipleNearDifferentSpectrum) {
+ // In this test we use the same setup as above, but we now feed the two Binary
+ // Delay Estimators with different signals, so they should output different
+ // results.
+ // For these noise free signals, the robust validation should not have an
+ // impact, hence we turn robust validation on/off for both reference and
+ // delayed near end.
+
+ const int kNearOffset = 1;
+ for (size_t i = 0; i < kSizeEnable; ++i) {
+ for (size_t j = 0; j < kSizeEnable; ++j) {
+ RunBinarySpectraTest(kNearOffset, 0, kEnable[i], kEnable[j]);
+ }
+ }
+}
+
+TEST_F(DelayEstimatorTest, ExactDelayEstimateMultipleNearDifferentLookahead) {
+ // In this test we use the same setup as above, feeding the two Binary
+ // Delay Estimators with the same signals. The difference is that we create
+ // them with different lookahead.
+ // For these noise free signals, the robust validation should not have an
+ // impact, hence we turn robust validation on/off for both reference and
+ // delayed near end.
+
+ const int kLookaheadOffset = 1;
+ for (size_t i = 0; i < kSizeEnable; ++i) {
+ for (size_t j = 0; j < kSizeEnable; ++j) {
+ RunBinarySpectraTest(0, kLookaheadOffset, kEnable[i], kEnable[j]);
+ }
+ }
+}
+
+TEST_F(DelayEstimatorTest, AllowedOffsetNoImpactWhenRobustValidationDisabled) {
+ // The same setup as in ExactDelayEstimateMultipleNearSameSpectrum with the
+ // difference that |allowed_offset| is set for the reference binary delay
+ // estimator.
+
+ binary_->allowed_offset = 10;
+ RunBinarySpectraTest(0, 0, 0, 0);
+ binary_->allowed_offset = 0; // Reset reference.
+}
+
+TEST_F(DelayEstimatorTest, VerifyLookaheadAtCreate) {
+ void* farend_handle = WebRtc_CreateDelayEstimatorFarend(kSpectrumSize,
+ kMaxDelay);
+ ASSERT_TRUE(farend_handle != NULL);
+ void* handle = WebRtc_CreateDelayEstimator(farend_handle, kLookahead);
+ ASSERT_TRUE(handle != NULL);
+ EXPECT_EQ(kLookahead, WebRtc_lookahead(handle));
+ WebRtc_FreeDelayEstimator(handle);
+ WebRtc_FreeDelayEstimatorFarend(farend_handle);
+}
+
+TEST_F(DelayEstimatorTest, VerifyLookaheadIsSetAndKeptAfterInit) {
+ EXPECT_EQ(kLookahead, WebRtc_lookahead(handle_));
+ EXPECT_EQ(kDifferentLookahead,
+ WebRtc_set_lookahead(handle_, kDifferentLookahead));
+ EXPECT_EQ(kDifferentLookahead, WebRtc_lookahead(handle_));
+ EXPECT_EQ(0, WebRtc_InitDelayEstimatorFarend(farend_handle_));
+ EXPECT_EQ(kDifferentLookahead, WebRtc_lookahead(handle_));
+ EXPECT_EQ(0, WebRtc_InitDelayEstimator(handle_));
+ EXPECT_EQ(kDifferentLookahead, WebRtc_lookahead(handle_));
+}
+
+TEST_F(DelayEstimatorTest, VerifyHistorySizeAtCreate) {
+ EXPECT_EQ(kHistorySize, WebRtc_history_size(handle_));
+}
+
+TEST_F(DelayEstimatorTest, VerifyHistorySizeIsSetAndKeptAfterInit) {
+ EXPECT_EQ(kHistorySize, WebRtc_history_size(handle_));
+ EXPECT_EQ(kDifferentHistorySize,
+ WebRtc_set_history_size(handle_, kDifferentHistorySize));
+ EXPECT_EQ(kDifferentHistorySize, WebRtc_history_size(handle_));
+ EXPECT_EQ(0, WebRtc_InitDelayEstimator(handle_));
+ EXPECT_EQ(kDifferentHistorySize, WebRtc_history_size(handle_));
+ EXPECT_EQ(0, WebRtc_InitDelayEstimatorFarend(farend_handle_));
+ EXPECT_EQ(kDifferentHistorySize, WebRtc_history_size(handle_));
+}
+
+// TODO(bjornv): Add tests for SoftReset...(...).
+
+} // namespace
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.c b/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.c
new file mode 100644
index 00000000..270588f3
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.c
@@ -0,0 +1,485 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h"
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "webrtc/modules/audio_processing/utility/delay_estimator.h"
+#include "webrtc/modules/audio_processing/utility/delay_estimator_internal.h"
+#include "webrtc/system_wrappers/interface/compile_assert_c.h"
+
+// Only bit |kBandFirst| through bit |kBandLast| are processed and
+// |kBandFirst| - |kBandLast| must be < 32.
+enum { kBandFirst = 12 };
+enum { kBandLast = 43 };
+
+static __inline uint32_t SetBit(uint32_t in, int pos) {
+ uint32_t mask = (1 << pos);
+ uint32_t out = (in | mask);
+
+ return out;
+}
+
+// Calculates the mean recursively. Same version as WebRtc_MeanEstimatorFix(),
+// but for float.
+//
+// Inputs:
+// - new_value : New additional value.
+// - scale : Scale for smoothing (should be less than 1.0).
+//
+// Input/Output:
+// - mean_value : Pointer to the mean value for updating.
+//
+static void MeanEstimatorFloat(float new_value,
+ float scale,
+ float* mean_value) {
+ assert(scale < 1.0f);
+ *mean_value += (new_value - *mean_value) * scale;
+}
+
+// Computes the binary spectrum by comparing the input |spectrum| with a
+// |threshold_spectrum|. Float and fixed point versions.
+//
+// Inputs:
+// - spectrum : Spectrum of which the binary spectrum should be
+// calculated.
+// - threshold_spectrum : Threshold spectrum with which the input
+// spectrum is compared.
+// Return:
+// - out : Binary spectrum.
+//
+static uint32_t BinarySpectrumFix(const uint16_t* spectrum,
+ SpectrumType* threshold_spectrum,
+ int q_domain,
+ int* threshold_initialized) {
+ int i = kBandFirst;
+ uint32_t out = 0;
+
+ assert(q_domain < 16);
+
+ if (!(*threshold_initialized)) {
+ // Set the |threshold_spectrum| to half the input |spectrum| as starting
+ // value. This speeds up the convergence.
+ for (i = kBandFirst; i <= kBandLast; i++) {
+ if (spectrum[i] > 0) {
+ // Convert input spectrum from Q(|q_domain|) to Q15.
+ int32_t spectrum_q15 = ((int32_t) spectrum[i]) << (15 - q_domain);
+ threshold_spectrum[i].int32_ = (spectrum_q15 >> 1);
+ *threshold_initialized = 1;
+ }
+ }
+ }
+ for (i = kBandFirst; i <= kBandLast; i++) {
+ // Convert input spectrum from Q(|q_domain|) to Q15.
+ int32_t spectrum_q15 = ((int32_t) spectrum[i]) << (15 - q_domain);
+ // Update the |threshold_spectrum|.
+ WebRtc_MeanEstimatorFix(spectrum_q15, 6, &(threshold_spectrum[i].int32_));
+ // Convert |spectrum| at current frequency bin to a binary value.
+ if (spectrum_q15 > threshold_spectrum[i].int32_) {
+ out = SetBit(out, i - kBandFirst);
+ }
+ }
+
+ return out;
+}
+
+static uint32_t BinarySpectrumFloat(const float* spectrum,
+ SpectrumType* threshold_spectrum,
+ int* threshold_initialized) {
+ int i = kBandFirst;
+ uint32_t out = 0;
+ const float kScale = 1 / 64.0;
+
+ if (!(*threshold_initialized)) {
+ // Set the |threshold_spectrum| to half the input |spectrum| as starting
+ // value. This speeds up the convergence.
+ for (i = kBandFirst; i <= kBandLast; i++) {
+ if (spectrum[i] > 0.0f) {
+ threshold_spectrum[i].float_ = (spectrum[i] / 2);
+ *threshold_initialized = 1;
+ }
+ }
+ }
+
+ for (i = kBandFirst; i <= kBandLast; i++) {
+ // Update the |threshold_spectrum|.
+ MeanEstimatorFloat(spectrum[i], kScale, &(threshold_spectrum[i].float_));
+ // Convert |spectrum| at current frequency bin to a binary value.
+ if (spectrum[i] > threshold_spectrum[i].float_) {
+ out = SetBit(out, i - kBandFirst);
+ }
+ }
+
+ return out;
+}
+
+void WebRtc_FreeDelayEstimatorFarend(void* handle) {
+ DelayEstimatorFarend* self = (DelayEstimatorFarend*) handle;
+
+ if (handle == NULL) {
+ return;
+ }
+
+ free(self->mean_far_spectrum);
+ self->mean_far_spectrum = NULL;
+
+ WebRtc_FreeBinaryDelayEstimatorFarend(self->binary_farend);
+ self->binary_farend = NULL;
+
+ free(self);
+}
+
+void* WebRtc_CreateDelayEstimatorFarend(int spectrum_size, int history_size) {
+ DelayEstimatorFarend* self = NULL;
+
+ // Check if the sub band used in the delay estimation is small enough to fit
+ // the binary spectra in a uint32_t.
+ COMPILE_ASSERT(kBandLast - kBandFirst < 32);
+
+ if (spectrum_size >= kBandLast) {
+ self = malloc(sizeof(DelayEstimatorFarend));
+ }
+
+ if (self != NULL) {
+ int memory_fail = 0;
+
+ // Allocate memory for the binary far-end spectrum handling.
+ self->binary_farend = WebRtc_CreateBinaryDelayEstimatorFarend(history_size);
+ memory_fail |= (self->binary_farend == NULL);
+
+ // Allocate memory for spectrum buffers.
+ self->mean_far_spectrum = malloc(spectrum_size * sizeof(SpectrumType));
+ memory_fail |= (self->mean_far_spectrum == NULL);
+
+ self->spectrum_size = spectrum_size;
+
+ if (memory_fail) {
+ WebRtc_FreeDelayEstimatorFarend(self);
+ self = NULL;
+ }
+ }
+
+ return self;
+}
+
+int WebRtc_InitDelayEstimatorFarend(void* handle) {
+ DelayEstimatorFarend* self = (DelayEstimatorFarend*) handle;
+
+ if (self == NULL) {
+ return -1;
+ }
+
+ // Initialize far-end part of binary delay estimator.
+ WebRtc_InitBinaryDelayEstimatorFarend(self->binary_farend);
+
+ // Set averaged far and near end spectra to zero.
+ memset(self->mean_far_spectrum, 0,
+ sizeof(SpectrumType) * self->spectrum_size);
+ // Reset initialization indicators.
+ self->far_spectrum_initialized = 0;
+
+ return 0;
+}
+
+void WebRtc_SoftResetDelayEstimatorFarend(void* handle, int delay_shift) {
+ DelayEstimatorFarend* self = (DelayEstimatorFarend*) handle;
+ assert(self != NULL);
+ WebRtc_SoftResetBinaryDelayEstimatorFarend(self->binary_farend, delay_shift);
+}
+
+int WebRtc_AddFarSpectrumFix(void* handle,
+ const uint16_t* far_spectrum,
+ int spectrum_size,
+ int far_q) {
+ DelayEstimatorFarend* self = (DelayEstimatorFarend*) handle;
+ uint32_t binary_spectrum = 0;
+
+ if (self == NULL) {
+ return -1;
+ }
+ if (far_spectrum == NULL) {
+ // Empty far end spectrum.
+ return -1;
+ }
+ if (spectrum_size != self->spectrum_size) {
+ // Data sizes don't match.
+ return -1;
+ }
+ if (far_q > 15) {
+ // If |far_q| is larger than 15 we cannot guarantee no wrap around.
+ return -1;
+ }
+
+ // Get binary spectrum.
+ binary_spectrum = BinarySpectrumFix(far_spectrum, self->mean_far_spectrum,
+ far_q, &(self->far_spectrum_initialized));
+ WebRtc_AddBinaryFarSpectrum(self->binary_farend, binary_spectrum);
+
+ return 0;
+}
+
+int WebRtc_AddFarSpectrumFloat(void* handle,
+ const float* far_spectrum,
+ int spectrum_size) {
+ DelayEstimatorFarend* self = (DelayEstimatorFarend*) handle;
+ uint32_t binary_spectrum = 0;
+
+ if (self == NULL) {
+ return -1;
+ }
+ if (far_spectrum == NULL) {
+ // Empty far end spectrum.
+ return -1;
+ }
+ if (spectrum_size != self->spectrum_size) {
+ // Data sizes don't match.
+ return -1;
+ }
+
+ // Get binary spectrum.
+ binary_spectrum = BinarySpectrumFloat(far_spectrum, self->mean_far_spectrum,
+ &(self->far_spectrum_initialized));
+ WebRtc_AddBinaryFarSpectrum(self->binary_farend, binary_spectrum);
+
+ return 0;
+}
+
+void WebRtc_FreeDelayEstimator(void* handle) {
+ DelayEstimator* self = (DelayEstimator*) handle;
+
+ if (handle == NULL) {
+ return;
+ }
+
+ free(self->mean_near_spectrum);
+ self->mean_near_spectrum = NULL;
+
+ WebRtc_FreeBinaryDelayEstimator(self->binary_handle);
+ self->binary_handle = NULL;
+
+ free(self);
+}
+
+void* WebRtc_CreateDelayEstimator(void* farend_handle, int max_lookahead) {
+ DelayEstimator* self = NULL;
+ DelayEstimatorFarend* farend = (DelayEstimatorFarend*) farend_handle;
+
+ if (farend_handle != NULL) {
+ self = malloc(sizeof(DelayEstimator));
+ }
+
+ if (self != NULL) {
+ int memory_fail = 0;
+
+ // Allocate memory for the farend spectrum handling.
+ self->binary_handle =
+ WebRtc_CreateBinaryDelayEstimator(farend->binary_farend, max_lookahead);
+ memory_fail |= (self->binary_handle == NULL);
+
+ // Allocate memory for spectrum buffers.
+ self->mean_near_spectrum = malloc(farend->spectrum_size *
+ sizeof(SpectrumType));
+ memory_fail |= (self->mean_near_spectrum == NULL);
+
+ self->spectrum_size = farend->spectrum_size;
+
+ if (memory_fail) {
+ WebRtc_FreeDelayEstimator(self);
+ self = NULL;
+ }
+ }
+
+ return self;
+}
+
+int WebRtc_InitDelayEstimator(void* handle) {
+ DelayEstimator* self = (DelayEstimator*) handle;
+
+ if (self == NULL) {
+ return -1;
+ }
+
+ // Initialize binary delay estimator.
+ WebRtc_InitBinaryDelayEstimator(self->binary_handle);
+
+ // Set averaged far and near end spectra to zero.
+ memset(self->mean_near_spectrum, 0,
+ sizeof(SpectrumType) * self->spectrum_size);
+ // Reset initialization indicators.
+ self->near_spectrum_initialized = 0;
+
+ return 0;
+}
+
+int WebRtc_SoftResetDelayEstimator(void* handle, int delay_shift) {
+ DelayEstimator* self = (DelayEstimator*) handle;
+ assert(self != NULL);
+ return WebRtc_SoftResetBinaryDelayEstimator(self->binary_handle, delay_shift);
+}
+
+int WebRtc_set_history_size(void* handle, int history_size) {
+ DelayEstimator* self = handle;
+
+ if ((self == NULL) || (history_size <= 1)) {
+ return -1;
+ }
+ return WebRtc_AllocateHistoryBufferMemory(self->binary_handle, history_size);
+}
+
+int WebRtc_history_size(const void* handle) {
+ const DelayEstimator* self = handle;
+
+ if (self == NULL) {
+ return -1;
+ }
+ if (self->binary_handle->farend->history_size !=
+ self->binary_handle->history_size) {
+ // Non matching history sizes.
+ return -1;
+ }
+ return self->binary_handle->history_size;
+}
+
+int WebRtc_set_lookahead(void* handle, int lookahead) {
+ DelayEstimator* self = (DelayEstimator*) handle;
+ assert(self != NULL);
+ assert(self->binary_handle != NULL);
+ if ((lookahead > self->binary_handle->near_history_size - 1) ||
+ (lookahead < 0)) {
+ return -1;
+ }
+ self->binary_handle->lookahead = lookahead;
+ return self->binary_handle->lookahead;
+}
+
+int WebRtc_lookahead(void* handle) {
+ DelayEstimator* self = (DelayEstimator*) handle;
+ assert(self != NULL);
+ assert(self->binary_handle != NULL);
+ return self->binary_handle->lookahead;
+}
+
+int WebRtc_set_allowed_offset(void* handle, int allowed_offset) {
+ DelayEstimator* self = (DelayEstimator*) handle;
+
+ if ((self == NULL) || (allowed_offset < 0)) {
+ return -1;
+ }
+ self->binary_handle->allowed_offset = allowed_offset;
+ return 0;
+}
+
+int WebRtc_get_allowed_offset(const void* handle) {
+ const DelayEstimator* self = (const DelayEstimator*) handle;
+
+ if (self == NULL) {
+ return -1;
+ }
+ return self->binary_handle->allowed_offset;
+}
+
+int WebRtc_enable_robust_validation(void* handle, int enable) {
+ DelayEstimator* self = (DelayEstimator*) handle;
+
+ if (self == NULL) {
+ return -1;
+ }
+ if ((enable < 0) || (enable > 1)) {
+ return -1;
+ }
+ assert(self->binary_handle != NULL);
+ self->binary_handle->robust_validation_enabled = enable;
+ return 0;
+}
+
+int WebRtc_is_robust_validation_enabled(const void* handle) {
+ const DelayEstimator* self = (const DelayEstimator*) handle;
+
+ if (self == NULL) {
+ return -1;
+ }
+ return self->binary_handle->robust_validation_enabled;
+}
+
+int WebRtc_DelayEstimatorProcessFix(void* handle,
+ const uint16_t* near_spectrum,
+ int spectrum_size,
+ int near_q) {
+ DelayEstimator* self = (DelayEstimator*) handle;
+ uint32_t binary_spectrum = 0;
+
+ if (self == NULL) {
+ return -1;
+ }
+ if (near_spectrum == NULL) {
+ // Empty near end spectrum.
+ return -1;
+ }
+ if (spectrum_size != self->spectrum_size) {
+ // Data sizes don't match.
+ return -1;
+ }
+ if (near_q > 15) {
+ // If |near_q| is larger than 15 we cannot guarantee no wrap around.
+ return -1;
+ }
+
+ // Get binary spectra.
+ binary_spectrum = BinarySpectrumFix(near_spectrum,
+ self->mean_near_spectrum,
+ near_q,
+ &(self->near_spectrum_initialized));
+
+ return WebRtc_ProcessBinarySpectrum(self->binary_handle, binary_spectrum);
+}
+
+int WebRtc_DelayEstimatorProcessFloat(void* handle,
+ const float* near_spectrum,
+ int spectrum_size) {
+ DelayEstimator* self = (DelayEstimator*) handle;
+ uint32_t binary_spectrum = 0;
+
+ if (self == NULL) {
+ return -1;
+ }
+ if (near_spectrum == NULL) {
+ // Empty near end spectrum.
+ return -1;
+ }
+ if (spectrum_size != self->spectrum_size) {
+ // Data sizes don't match.
+ return -1;
+ }
+
+ // Get binary spectrum.
+ binary_spectrum = BinarySpectrumFloat(near_spectrum, self->mean_near_spectrum,
+ &(self->near_spectrum_initialized));
+
+ return WebRtc_ProcessBinarySpectrum(self->binary_handle, binary_spectrum);
+}
+
+int WebRtc_last_delay(void* handle) {
+ DelayEstimator* self = (DelayEstimator*) handle;
+
+ if (self == NULL) {
+ return -1;
+ }
+
+ return WebRtc_binary_last_delay(self->binary_handle);
+}
+
+float WebRtc_last_delay_quality(void* handle) {
+ DelayEstimator* self = (DelayEstimator*) handle;
+ assert(self != NULL);
+ return WebRtc_binary_last_delay_quality(self->binary_handle);
+}
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h b/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h
new file mode 100644
index 00000000..fdadebeb
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h
@@ -0,0 +1,244 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+// Performs delay estimation on block by block basis.
+// The return value is 0 - OK and -1 - Error, unless otherwise stated.
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_WRAPPER_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_WRAPPER_H_
+
+#include "webrtc/typedefs.h"
+
+// Releases the memory allocated by WebRtc_CreateDelayEstimatorFarend(...)
+void WebRtc_FreeDelayEstimatorFarend(void* handle);
+
+// Allocates the memory needed by the far-end part of the delay estimation. The
+// memory needs to be initialized separately through
+// WebRtc_InitDelayEstimatorFarend(...).
+//
+// Inputs:
+// - spectrum_size : Size of the spectrum used both in far-end and
+// near-end. Used to allocate memory for spectrum
+// specific buffers.
+// - history_size : The far-end history buffer size. A change in buffer
+// size can be forced with WebRtc_set_history_size().
+// Note that the maximum delay which can be estimated is
+// determined together with WebRtc_set_lookahead().
+//
+// Return value:
+// - void* : Created |handle|. If the memory can't be allocated or
+// if any of the input parameters are invalid NULL is
+// returned.
+void* WebRtc_CreateDelayEstimatorFarend(int spectrum_size, int history_size);
+
+// Initializes the far-end part of the delay estimation instance returned by
+// WebRtc_CreateDelayEstimatorFarend(...)
+int WebRtc_InitDelayEstimatorFarend(void* handle);
+
+// Soft resets the far-end part of the delay estimation instance returned by
+// WebRtc_CreateDelayEstimatorFarend(...).
+// Input:
+// - delay_shift : The amount of blocks to shift history buffers.
+void WebRtc_SoftResetDelayEstimatorFarend(void* handle, int delay_shift);
+
+// Adds the far-end spectrum to the far-end history buffer. This spectrum is
+// used as reference when calculating the delay using
+// WebRtc_ProcessSpectrum().
+//
+// Inputs:
+// - far_spectrum : Far-end spectrum.
+// - spectrum_size : The size of the data arrays (same for both far- and
+// near-end).
+// - far_q : The Q-domain of the far-end data.
+//
+// Output:
+// - handle : Updated far-end instance.
+//
+int WebRtc_AddFarSpectrumFix(void* handle,
+ const uint16_t* far_spectrum,
+ int spectrum_size,
+ int far_q);
+
+// See WebRtc_AddFarSpectrumFix() for description.
+int WebRtc_AddFarSpectrumFloat(void* handle,
+ const float* far_spectrum,
+ int spectrum_size);
+
+// Releases the memory allocated by WebRtc_CreateDelayEstimator(...)
+void WebRtc_FreeDelayEstimator(void* handle);
+
+// Allocates the memory needed by the delay estimation. The memory needs to be
+// initialized separately through WebRtc_InitDelayEstimator(...).
+//
+// Inputs:
+// - farend_handle : Pointer to the far-end part of the delay estimation
+// instance created prior to this call using
+// WebRtc_CreateDelayEstimatorFarend().
+//
+// Note that WebRtc_CreateDelayEstimator does not take
+// ownership of |farend_handle|, which has to be torn
+// down properly after this instance.
+//
+// - max_lookahead : Maximum amount of non-causal lookahead allowed. The
+// actual amount of lookahead used can be controlled by
+// WebRtc_set_lookahead(...). The default |lookahead| is
+// set to |max_lookahead| at create time. Use
+// WebRtc_set_lookahead(...) before start if a different
+// value is desired.
+//
+// Using lookahead can detect cases in which a near-end
+// signal occurs before the corresponding far-end signal.
+// It will delay the estimate for the current block by an
+// equal amount, and the returned values will be offset
+// by it.
+//
+// A value of zero is the typical no-lookahead case.
+// This also represents the minimum delay which can be
+// estimated.
+//
+// Note that the effective range of delay estimates is
+// [-|lookahead|,... ,|history_size|-|lookahead|)
+// where |history_size| is set through
+// WebRtc_set_history_size().
+//
+// Return value:
+// - void* : Created |handle|. If the memory can't be allocated or
+// if any of the input parameters are invalid NULL is
+// returned.
+void* WebRtc_CreateDelayEstimator(void* farend_handle, int max_lookahead);
+
+// Initializes the delay estimation instance returned by
+// WebRtc_CreateDelayEstimator(...)
+int WebRtc_InitDelayEstimator(void* handle);
+
+// Soft resets the delay estimation instance returned by
+// WebRtc_CreateDelayEstimator(...)
+// Input:
+// - delay_shift : The amount of blocks to shift history buffers.
+//
+// Return value:
+// - actual_shifts : The actual number of shifts performed.
+int WebRtc_SoftResetDelayEstimator(void* handle, int delay_shift);
+
+// Sets the effective |history_size| used. Valid values from 2. We simply need
+// at least two delays to compare to perform an estimate. If |history_size| is
+// changed, buffers are reallocated filling in with zeros if necessary.
+// Note that changing the |history_size| affects both buffers in far-end and
+// near-end. Hence it is important to change all DelayEstimators that use the
+// same reference far-end, to the same |history_size| value.
+// Inputs:
+// - handle : Pointer to the delay estimation instance.
+// - history_size : Effective history size to be used.
+// Return value:
+// - new_history_size : The new history size used. If the memory was not able
+// to be allocated 0 is returned.
+int WebRtc_set_history_size(void* handle, int history_size);
+
+// Returns the history_size currently used.
+// Input:
+// - handle : Pointer to the delay estimation instance.
+int WebRtc_history_size(const void* handle);
+
+// Sets the amount of |lookahead| to use. Valid values are [0, max_lookahead]
+// where |max_lookahead| was set at create time through
+// WebRtc_CreateDelayEstimator(...).
+//
+// Input:
+// - handle : Pointer to the delay estimation instance.
+// - lookahead : The amount of lookahead to be used.
+//
+// Return value:
+// - new_lookahead : The actual amount of lookahead set, unless |handle| is
+// a NULL pointer or |lookahead| is invalid, for which an
+// error is returned.
+int WebRtc_set_lookahead(void* handle, int lookahead);
+
+// Returns the amount of lookahead we currently use.
+// Input:
+// - handle : Pointer to the delay estimation instance.
+int WebRtc_lookahead(void* handle);
+
+// Sets the |allowed_offset| used in the robust validation scheme. If the
+// delay estimator is used in an echo control component, this parameter is
+// related to the filter length. In principle |allowed_offset| should be set to
+// the echo control filter length minus the expected echo duration, i.e., the
+// delay offset the echo control can handle without quality regression. The
+// default value, used if not set manually, is zero. Note that |allowed_offset|
+// has to be non-negative.
+// Inputs:
+// - handle : Pointer to the delay estimation instance.
+// - allowed_offset : The amount of delay offset, measured in partitions,
+// the echo control filter can handle.
+int WebRtc_set_allowed_offset(void* handle, int allowed_offset);
+
+// Returns the |allowed_offset| in number of partitions.
+int WebRtc_get_allowed_offset(const void* handle);
+
+// Enables/Disables a robust validation functionality in the delay estimation.
+// This is by default set to disabled at create time. The state is preserved
+// over a reset.
+// Inputs:
+// - handle : Pointer to the delay estimation instance.
+// - enable : Enable (1) or disable (0) this feature.
+int WebRtc_enable_robust_validation(void* handle, int enable);
+
+// Returns 1 if robust validation is enabled and 0 if disabled.
+int WebRtc_is_robust_validation_enabled(const void* handle);
+
+// Estimates and returns the delay between the far-end and near-end blocks. The
+// value will be offset by the lookahead (i.e. the lookahead should be
+// subtracted from the returned value).
+// Inputs:
+// - handle : Pointer to the delay estimation instance.
+// - near_spectrum : Pointer to the near-end spectrum data of the current
+// block.
+// - spectrum_size : The size of the data arrays (same for both far- and
+// near-end).
+// - near_q : The Q-domain of the near-end data.
+//
+// Output:
+// - handle : Updated instance.
+//
+// Return value:
+// - delay : >= 0 - Calculated delay value.
+// -1 - Error.
+// -2 - Insufficient data for estimation.
+int WebRtc_DelayEstimatorProcessFix(void* handle,
+ const uint16_t* near_spectrum,
+ int spectrum_size,
+ int near_q);
+
+// See WebRtc_DelayEstimatorProcessFix() for description.
+int WebRtc_DelayEstimatorProcessFloat(void* handle,
+ const float* near_spectrum,
+ int spectrum_size);
+
+// Returns the last calculated delay updated by the function
+// WebRtc_DelayEstimatorProcess(...).
+//
+// Input:
+// - handle : Pointer to the delay estimation instance.
+//
+// Return value:
+// - delay : >= 0 - Last calculated delay value.
+// -1 - Error.
+// -2 - Insufficient data for estimation.
+int WebRtc_last_delay(void* handle);
+
+// Returns the estimation quality/probability of the last calculated delay
+// updated by the function WebRtc_DelayEstimatorProcess(...). The estimation
+// quality is a value in the interval [0, 1]. The higher the value, the better
+// the quality.
+//
+// Return value:
+// - delay_quality : >= 0 - Estimation quality of last calculated delay.
+float WebRtc_last_delay_quality(void* handle);
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_WRAPPER_H_
diff --git a/third_party/webrtc/src/webrtc/system_wrappers/interface/compile_assert_c.h b/third_party/webrtc/src/webrtc/system_wrappers/interface/compile_assert_c.h
new file mode 100644
index 00000000..dbb5292d
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/system_wrappers/interface/compile_assert_c.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_SYSTEM_WRAPPERS_INTERFACE_COMPILE_ASSERT_H_
+#define WEBRTC_SYSTEM_WRAPPERS_INTERFACE_COMPILE_ASSERT_H_
+
+#ifdef __cplusplus
+#error "Only use this for C files. For C++, use static_assert."
+#endif
+
+// Use this macro to verify at compile time that certain restrictions are met.
+// The argument is the boolean expression to evaluate.
+// Example:
+// COMPILE_ASSERT(sizeof(foo) < 128);
+#define COMPILE_ASSERT(expression) switch (0) {case 0: case expression:;}
+
+#endif // WEBRTC_SYSTEM_WRAPPERS_INTERFACE_COMPILE_ASSERT_H_
diff --git a/third_party/webrtc/src/webrtc/system_wrappers/interface/cpu_features_wrapper.h b/third_party/webrtc/src/webrtc/system_wrappers/interface/cpu_features_wrapper.h
new file mode 100644
index 00000000..5697c491
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/system_wrappers/interface/cpu_features_wrapper.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_SYSTEM_WRAPPERS_INTERFACE_CPU_FEATURES_WRAPPER_H_
+#define WEBRTC_SYSTEM_WRAPPERS_INTERFACE_CPU_FEATURES_WRAPPER_H_
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#include "webrtc/typedefs.h"
+
+// List of features in x86.
+typedef enum {
+ kSSE2,
+ kSSE3
+} CPUFeature;
+
+// List of features in ARM.
+enum {
+ kCPUFeatureARMv7 = (1 << 0),
+ kCPUFeatureVFPv3 = (1 << 1),
+ kCPUFeatureNEON = (1 << 2),
+ kCPUFeatureLDREXSTREX = (1 << 3)
+};
+
+typedef int (*WebRtc_CPUInfo)(CPUFeature feature);
+
+// Returns true if the CPU supports the feature.
+extern WebRtc_CPUInfo WebRtc_GetCPUInfo;
+
+// No CPU feature is available => straight C path.
+extern WebRtc_CPUInfo WebRtc_GetCPUInfoNoASM;
+
+// Return the features in an ARM device.
+// It detects the features in the hardware platform, and returns supported
+// values in the above enum definition as a bitmask.
+extern uint64_t WebRtc_GetCPUFeaturesARM(void);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
+
+#endif // WEBRTC_SYSTEM_WRAPPERS_INTERFACE_CPU_FEATURES_WRAPPER_H_
diff --git a/third_party/webrtc/src/webrtc/system_wrappers/source/cpu_features.cc b/third_party/webrtc/src/webrtc/system_wrappers/source/cpu_features.cc
new file mode 100644
index 00000000..b924d773
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/system_wrappers/source/cpu_features.cc
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+// Parts of this file derived from Chromium's base/cpu.cc.
+
+#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h"
+
+#if defined(WEBRTC_ARCH_X86_FAMILY) && defined(_MSC_VER)
+#include <intrin.h>
+#endif
+
+#include "webrtc/typedefs.h"
+
+// No CPU feature is available => straight C path.
+int GetCPUInfoNoASM(CPUFeature feature) {
+ (void)feature;
+ return 0;
+}
+
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+#ifndef _MSC_VER
+// Intrinsic for "cpuid".
+#if defined(__pic__) && defined(__i386__)
+static inline void __cpuid(int cpu_info[4], int info_type) {
+ __asm__ volatile(
+ "mov %%ebx, %%edi\n"
+ "cpuid\n"
+ "xchg %%edi, %%ebx\n"
+ : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
+ : "a"(info_type));
+}
+#else
+static inline void __cpuid(int cpu_info[4], int info_type) {
+ __asm__ volatile(
+ "cpuid\n"
+ : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
+ : "a"(info_type));
+}
+#endif
+#endif // _MSC_VER
+#endif // WEBRTC_ARCH_X86_FAMILY
+
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+// Actual feature detection for x86.
+static int GetCPUInfo(CPUFeature feature) {
+ int cpu_info[4];
+ __cpuid(cpu_info, 1);
+ if (feature == kSSE2) {
+ return 0 != (cpu_info[3] & 0x04000000);
+ }
+ if (feature == kSSE3) {
+ return 0 != (cpu_info[2] & 0x00000001);
+ }
+ return 0;
+}
+#else
+// Default to straight C for other platforms.
+static int GetCPUInfo(CPUFeature feature) {
+ (void)feature;
+ return 0;
+}
+#endif
+
+WebRtc_CPUInfo WebRtc_GetCPUInfo = GetCPUInfo;
+WebRtc_CPUInfo WebRtc_GetCPUInfoNoASM = GetCPUInfoNoASM;
diff --git a/third_party/webrtc/src/webrtc/system_wrappers/source/cpu_features_android.c b/third_party/webrtc/src/webrtc/system_wrappers/source/cpu_features_android.c
new file mode 100644
index 00000000..0cb3a6c5
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/system_wrappers/source/cpu_features_android.c
@@ -0,0 +1,15 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <cpu-features.h>
+
+uint64_t WebRtc_GetCPUFeaturesARM(void) {
+ return android_getCpuFeatures();
+}
diff --git a/third_party/webrtc/src/webrtc/typedefs.h b/third_party/webrtc/src/webrtc/typedefs.h
new file mode 100644
index 00000000..3034c7e7
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/typedefs.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+// This file contains platform-specific typedefs and defines.
+// Much of it is derived from Chromium's build/build_config.h.
+
+#ifndef WEBRTC_TYPEDEFS_H_
+#define WEBRTC_TYPEDEFS_H_
+
+// Processor architecture detection. For more info on what's defined, see:
+// http://msdn.microsoft.com/en-us/library/b0084kay.aspx
+// http://www.agner.org/optimize/calling_conventions.pdf
+// or with gcc, run: "echo | gcc -E -dM -"
+#if defined(_M_X64) || defined(__x86_64__)
+#define WEBRTC_ARCH_X86_FAMILY
+#define WEBRTC_ARCH_X86_64
+#define WEBRTC_ARCH_64_BITS
+#define WEBRTC_ARCH_LITTLE_ENDIAN
+#elif defined(__aarch64__)
+#define WEBRTC_ARCH_64_BITS
+#define WEBRTC_ARCH_LITTLE_ENDIAN
+#elif defined(_M_IX86) || defined(__i386__)
+#define WEBRTC_ARCH_X86_FAMILY
+#define WEBRTC_ARCH_X86
+#define WEBRTC_ARCH_32_BITS
+#define WEBRTC_ARCH_LITTLE_ENDIAN
+#elif defined(__ARMEL__)
+// TODO(ajm): We'd prefer to control platform defines here, but this is
+// currently provided by the Android makefiles. Commented to avoid duplicate
+// definition warnings.
+//#define WEBRTC_ARCH_ARM
+// TODO(ajm): Chromium uses the following two defines. Should we switch?
+//#define WEBRTC_ARCH_ARM_FAMILY
+//#define WEBRTC_ARCH_ARMEL
+#define WEBRTC_ARCH_32_BITS
+#define WEBRTC_ARCH_LITTLE_ENDIAN
+#elif defined(__MIPSEL__)
+#define WEBRTC_ARCH_32_BITS
+#define WEBRTC_ARCH_LITTLE_ENDIAN
+#elif defined(__pnacl__)
+#define WEBRTC_ARCH_32_BITS
+#define WEBRTC_ARCH_LITTLE_ENDIAN
+#else
+#error Please add support for your architecture in typedefs.h
+#endif
+
+#if !(defined(WEBRTC_ARCH_LITTLE_ENDIAN) ^ defined(WEBRTC_ARCH_BIG_ENDIAN))
+#error Define either WEBRTC_ARCH_LITTLE_ENDIAN or WEBRTC_ARCH_BIG_ENDIAN
+#endif
+
+// TODO(zhongwei.yao): WEBRTC_CPU_DETECTION is only used in one place; we should
+// probably just remove it.
+#if (defined(WEBRTC_ARCH_X86_FAMILY) && !defined(__SSE2__)) || \
+ defined(WEBRTC_DETECT_NEON)
+#define WEBRTC_CPU_DETECTION
+#endif
+
+#if !defined(_MSC_VER)
+#include <stdint.h>
+#else
+// Define C99 equivalent types, since pre-2010 MSVC doesn't provide stdint.h.
+typedef signed char int8_t;
+typedef signed short int16_t;
+typedef signed int int32_t;
+typedef __int64 int64_t;
+typedef unsigned char uint8_t;
+typedef unsigned short uint16_t;
+typedef unsigned int uint32_t;
+typedef unsigned __int64 uint64_t;
+#endif
+
+// Annotate a function indicating the caller must examine the return value.
+// Use like:
+// int foo() WARN_UNUSED_RESULT;
+// TODO(ajm): Hack to avoid multiple definitions until the base/ of webrtc and
+// libjingle are merged.
+#if !defined(WARN_UNUSED_RESULT)
+#if defined(__GNUC__)
+#define WARN_UNUSED_RESULT __attribute__((warn_unused_result))
+#else
+#define WARN_UNUSED_RESULT
+#endif
+#endif // WARN_UNUSED_RESULT
+
+// Put after a variable that might not be used, to prevent compiler warnings:
+// int result ATTRIBUTE_UNUSED = DoSomething();
+// assert(result == 17);
+#ifndef ATTRIBUTE_UNUSED
+#if defined(__GNUC__) || defined(__clang__)
+#define ATTRIBUTE_UNUSED __attribute__((unused))
+#else
+#define ATTRIBUTE_UNUSED
+#endif
+#endif
+
+// Macro to be used for switch-case fallthrough (required for enabling
+// -Wimplicit-fallthrough warning on Clang).
+#ifndef FALLTHROUGH
+#if defined(__clang__)
+#define FALLTHROUGH() [[clang::fallthrough]]
+#else
+#define FALLTHROUGH() do { } while (0)
+#endif
+#endif
+
+// Annotate a function that will not return control flow to the caller.
+#if defined(_MSC_VER)
+#define NO_RETURN __declspec(noreturn)
+#elif defined(__GNUC__)
+#define NO_RETURN __attribute__((noreturn))
+#else
+#define NO_RETURN
+#endif
+
+#endif // WEBRTC_TYPEDEFS_H_